\begin{thebibliography}{10}
\providecommand{\url}[1]{\texttt{#1}}
\providecommand{\urlprefix}{URL }
\expandafter\ifx\csname urlstyle\endcsname\relax
  \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
  \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
  \urlstyle{rm}\Url}\fi
\providecommand{\selectlanguage}[1]{\relax}
\providecommand{\eprint}[2][]{\url{#2}}

\bibitem{7_lq_methods}
Anderson, M.~J., B.D.O.: Optimal Control - Linear Quadratic Methods.
  \emph{Prentice Hall, Englewood Cliffs, NJ}, 1990.

\bibitem{dynamic_programming}
Bellman, R.: Dynamic programming. \emph{Princeton University Press}, 1957.

\bibitem{2_int_a_in_dec}
Ferreira, E.; Subrahmanian, E.; Manstetten, D.: Intelligent agents in
  decentralized traffic control. \emph{Intelligent Transportation Systems},
  2001.

\bibitem{4_rmm_formalization}
Gmytrasiewicz, P.~J.; Durfee, E.~H.: A rigorous, operational formalization of
  recursive modeling. \emph{First International Conference on Multiagent
  Systems}, 1995.

\bibitem{5_bayes_learn}
Nagy, I.; Nedoma, P.; Ettler, P.; aj.: O bayesovsk{\'e}m u\v{c}en{\'i}.
  \emph{Automa}, 2002.

\bibitem{1_rmm_bayes_learning}
Ou, H.; Zhang, W.; Xu, X.: Urban traffic multi-agent system based on RMM and
  Bayesian learning. \emph{American Control Conference}, 2002.

\bibitem{17_fronta}
P., P.; J., D.; Fl{\'i}dr: Modelling and Simultaneous Estimation of State and
  Parameters of Traffic System. \emph{Robotics, Automation and Control}, 2008.

\bibitem{learning_to_predict}
Sutton, R.~S.: Learning to predict by the methods of temporal didffrences.
  \emph{Machine Learning}, 1988.

\bibitem{tlc_using_sarsa}
Thorpe, T.: Vehicle traffic light controlusing sarsa. \emph{Master’s thesis,
  Department of Computer Science, Colorado State University}, 1997.

\bibitem{6_tuc_lq}
Vaya~Dinopoulou, M.~P., Christina~Diakaki: Applications of the urban traffic
  control strategy TUC. \emph{European Journal of Operational Research}, 2005.

\bibitem{leraning_from_delayed_rewards}
Watkins, C. J. C.~H.: Leraning from Delayed Rewards. \emph{PhD thesis, King's
  College, Cambridge, England}, 1989.

\bibitem{q_learning}
Watkins, C. J. C.~H.; Dayan, P.: Q-leraning. \emph{Machine Learning}, 1992.

\bibitem{3_i_traff_light_c}
Wiering, M.; {Van Veenen}, J.; Vreeken, J.; aj.: Intelligent traffic light
  control. \emph{European Research Consortium for Informatics and Mathematics},
  2003.

\bibitem{wooldridge}
Wooldridge, M.: \emph{Multi Agent Systems}. MIT Press, Březen 2005.

\end{thebibliography}