\begin{thebibliography}{10} \providecommand{\url}[1]{\texttt{#1}} \providecommand{\urlprefix}{URL } \expandafter\ifx\csname urlstyle\endcsname\relax \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup \urlstyle{rm}\Url}\fi \providecommand{\selectlanguage}[1]{\relax} \providecommand{\eprint}[2][]{\url{#2}} \bibitem{7_lq_methods} Anderson, M.~J., B.D.O.: Optimal Control - Linear Quadratic Methods. \emph{Prentice Hall, Englewood Cliffs, NJ}, 1990. \bibitem{dynamic_programming} Bellman, R.: Dynamic programming. \emph{Princeton University Press}, 1957. \bibitem{2_int_a_in_dec} Ferreira, E.; Subrahmanian, E.; Manstetten, D.: Intelligent agents in decentralized traffic control. \emph{Intelligent Transportation Systems}, 2001. \bibitem{4_rmm_formalization} Gmytrasiewicz, P.~J.; Durfee, E.~H.: A rigorous, operational formalization of recursive modeling. \emph{First International Conference on Multiagent Systems}, 1995. \bibitem{5_bayes_learn} Nagy, I.; Nedoma, P.; Ettler, P.; aj.: O bayesovsk{\'e}m u\v{c}en{\'i}. \emph{Automa}, 2002. \bibitem{1_rmm_bayes_learning} Ou, H.; Zhang, W.; Xu, X.: Urban traffic multi-agent system based on RMM and Bayesian learning. \emph{American Control Conference}, 2002. \bibitem{17_fronta} P., P.; J., D.; Fl{\'i}dr: Modelling and Simultaneous Estimation of State and Parameters of Traffic System. \emph{Robotics, Automation and Control}, 2008. \bibitem{learning_to_predict} Sutton, R.~S.: Learning to predict by the methods of temporal didffrences. \emph{Machine Learning}, 1988. \bibitem{tlc_using_sarsa} Thorpe, T.: Vehicle traffic light controlusing sarsa. \emph{Master’s thesis, Department of Computer Science, Colorado State University}, 1997. \bibitem{6_tuc_lq} Vaya~Dinopoulou, M.~P., Christina~Diakaki: Applications of the urban traffic control strategy TUC. \emph{European Journal of Operational Research}, 2005. \bibitem{leraning_from_delayed_rewards} Watkins, C. J. C.~H.: Leraning from Delayed Rewards. \emph{PhD thesis, King's College, Cambridge, England}, 1989. \bibitem{q_learning} Watkins, C. J. C.~H.; Dayan, P.: Q-leraning. \emph{Machine Learning}, 1992. \bibitem{3_i_traff_light_c} Wiering, M.; {Van Veenen}, J.; Vreeken, J.; aj.: Intelligent traffic light control. \emph{European Research Consortium for Informatics and Mathematics}, 2003. \bibitem{wooldridge} Wooldridge, M.: \emph{Multi Agent Systems}. MIT Press, Březen 2005. \end{thebibliography}