[1419] | 1 | @Book{ wooldridge, |
---|
| 2 | title = "Multi Agent Systems", |
---|
| 3 | author = "Michael Wooldridge", |
---|
| 4 | month = mar, |
---|
| 5 | year = "2005", |
---|
| 6 | key = "wooldridge", |
---|
| 7 | publisher = "MIT Press" |
---|
| 8 | } |
---|
| 9 | |
---|
| 10 | @Article{ 1_rmm_bayes_learning, |
---|
| 11 | title = "Urban traffic multi-agent system based on RMM and Bayesian learning", |
---|
| 12 | author = "H. Ou and W. Zhang and X. Xu", |
---|
| 13 | journal = "American Control Conference", |
---|
| 14 | year = "2002" |
---|
| 15 | } |
---|
| 16 | |
---|
| 17 | @Article{ 2_int_a_in_dec, |
---|
| 18 | title = "Intelligent agents in decentralized traffic control", |
---|
| 19 | author = "E.D. Ferreira and E. Subrahmanian and D. Manstetten", |
---|
| 20 | journal = "Intelligent Transportation Systems", |
---|
| 21 | year = "2001" |
---|
| 22 | } |
---|
| 23 | |
---|
| 24 | @Article{ 3_i_traff_light_c, |
---|
| 25 | title = "Intelligent traffic light control", |
---|
| 26 | author = "M. Wiering and J. {Van Veenen} and J. Vreeken and A. Koopman", |
---|
| 27 | journal = "European Research Consortium for Informatics and Mathematics", |
---|
| 28 | year = "2003" |
---|
| 29 | } |
---|
| 30 | |
---|
| 31 | @Article{ 4_rmm_formalization, |
---|
| 32 | title = "A rigorous, operational formalization of recursive modeling", |
---|
| 33 | author = "P. J. Gmytrasiewicz and E. H. Durfee", |
---|
| 34 | journal = " First International Conference on Multiagent Systems", |
---|
| 35 | year = "1995" |
---|
| 36 | } |
---|
| 37 | |
---|
| 38 | @Article{ 5_bayes_learn, |
---|
| 39 | title = "O bayesovsk{\'e}m u\v{c}en{\'i}", |
---|
| 40 | author = "I. Nagy and P. Nedoma and P. Ettler and L. Pavelkov{\'a} and NewAuthor2", |
---|
| 41 | journal = "Automa", |
---|
| 42 | year = "2002" |
---|
| 43 | } |
---|
| 44 | |
---|
| 45 | @Article{ learning_to_predict, |
---|
| 46 | author = "R. S. Sutton", |
---|
| 47 | journal = "Machine Learning", |
---|
| 48 | year = "1988", |
---|
| 49 | title = "Learning to predict by the methods of temporal didffrences" |
---|
| 50 | } |
---|
| 51 | |
---|
| 52 | @Article{ dynamic_programming, |
---|
| 53 | title = "Dynamic programming", |
---|
| 54 | author = "R. Bellman", |
---|
| 55 | journal = "Princeton University Press", |
---|
| 56 | year = "1957" |
---|
| 57 | } |
---|
| 58 | |
---|
| 59 | @Article{ tlc_using_sarsa, |
---|
| 60 | title = "Vehicle traffic light controlusing sarsa", |
---|
| 61 | author = "T. Thorpe", |
---|
| 62 | journal = "Master’s thesis, Department of Computer Science, Colorado State University", |
---|
| 63 | year = "1997" |
---|
| 64 | } |
---|
| 65 | |
---|
| 66 | @Article{ leraning_from_delayed_rewards, |
---|
| 67 | title = "Leraning from Delayed Rewards", |
---|
| 68 | author = "C. J. C. H. Watkins", |
---|
| 69 | journal = "PhD thesis, King's College, Cambridge, England", |
---|
| 70 | year = "1989" |
---|
| 71 | } |
---|
| 72 | |
---|
| 73 | @Article{ q_learning, |
---|
| 74 | title = "Q-leraning", |
---|
| 75 | author = "C. J. C. H. Watkins and P. Dayan", |
---|
| 76 | journal = "Machine Learning", |
---|
| 77 | year = "1992" |
---|
| 78 | } |
---|