% This file was created with JabRef 2.7b. % Encoding: UTF-8 @ARTICLE{7_lq_methods, author = {Anderson, B.D.O., Moore, J.B.}, title = {Optimal Control - Linear Quadratic Methods}, journal = {Prentice Hall, Englewood Cliffs, NJ}, year = {1990}, owner = {jabu}, timestamp = {2011.11.24} } @ARTICLE{dynamic_programming, author = {R. Bellman}, title = {Dynamic programming}, journal = {Princeton University Press}, year = {1957} } @ARTICLE{2_int_a_in_dec, author = {E.D. Ferreira and E. Subrahmanian and D. Manstetten}, title = {Intelligent agents in decentralized traffic control}, journal = {Intelligent Transportation Systems}, year = {2001} } @ARTICLE{4_rmm_formalization, author = {P. J. Gmytrasiewicz and E. H. Durfee}, title = {A rigorous, operational formalization of recursive modeling}, journal = { First International Conference on Multiagent Systems}, year = {1995} } @ARTICLE{5_bayes_learn, author = {I. Nagy and P. Nedoma and P. Ettler and L. Pavelkov{\'a} and NewAuthor2}, title = {O bayesovsk{\'e}m u\v{c}en{\'i}}, journal = {Automa}, year = {2002} } @ARTICLE{1_rmm_bayes_learning, author = {H. Ou and W. Zhang and X. Xu}, title = {Urban traffic multi-agent system based on RMM and Bayesian learning}, journal = {American Control Conference}, year = {2002} } @ARTICLE{17_fronta, author = {Pecherkov{\'a} P. and Dun{\'i}k J. and Fl{\'i}dr}, title = {Modelling and Simultaneous Estimation of State and Parameters of Traffic System}, journal = {Robotics, Automation and Control}, year = {2008}, owner = {jabu}, timestamp = {2011.12.31} } @ARTICLE{learning_to_predict, author = {R. S. Sutton}, title = {Learning to predict by the methods of temporal didffrences}, journal = {Machine Learning}, year = {1988} } @ARTICLE{tlc_using_sarsa, author = {T. Thorpe}, title = {Vehicle traffic light controlusing sarsa}, journal = {Master’s thesis, Department of Computer Science, Colorado State University}, year = {1997} } @ARTICLE{6_tuc_lq, author = {Vaya Dinopoulou, Christina Diakaki, Markos Papageorgiou}, title = {Applications of the urban traffic control strategy TUC}, journal = {European Journal of Operational Research}, year = {2005}, owner = {jabu}, timestamp = {2011.11.24} } @ARTICLE{leraning_from_delayed_rewards, author = {C. J. C. H. Watkins}, title = {Leraning from Delayed Rewards}, journal = {PhD thesis, King's College, Cambridge, England}, year = {1989} } @ARTICLE{q_learning, author = {C. J. C. H. Watkins and P. Dayan}, title = {Q-leraning}, journal = {Machine Learning}, year = {1992} } @ARTICLE{3_i_traff_light_c, author = {M. Wiering and J. {Van Veenen} and J. Vreeken and A. Koopman}, title = {Intelligent traffic light control}, journal = {European Research Consortium for Informatics and Mathematics}, year = {2003} } @BOOK{wooldridge, title = {Multi Agent Systems}, publisher = {MIT Press}, year = {2005}, author = {Michael Wooldridge}, month = mar, key = {wooldridge} }