1 | @Book{ wooldridge, |
---|
2 | title = "Multi Agent Systems", |
---|
3 | author = "Michael Wooldridge", |
---|
4 | month = mar, |
---|
5 | year = "2005", |
---|
6 | key = "wooldridge", |
---|
7 | publisher = "MIT Press" |
---|
8 | } |
---|
9 | |
---|
10 | @Article{ 1_rmm_bayes_learning, |
---|
11 | title = "Urban traffic multi-agent system based on RMM and Bayesian learning", |
---|
12 | author = "H. Ou and W. Zhang and X. Xu", |
---|
13 | journal = "American Control Conference", |
---|
14 | year = "2002" |
---|
15 | } |
---|
16 | |
---|
17 | @Article{ 2_int_a_in_dec, |
---|
18 | title = "Intelligent agents in decentralized traffic control", |
---|
19 | author = "E.D. Ferreira and E. Subrahmanian and D. Manstetten", |
---|
20 | journal = "Intelligent Transportation Systems", |
---|
21 | year = "2001" |
---|
22 | } |
---|
23 | |
---|
24 | @Article{ 3_i_traff_light_c, |
---|
25 | title = "Intelligent traffic light control", |
---|
26 | author = "M. Wiering and J. {Van Veenen} and J. Vreeken and A. Koopman", |
---|
27 | journal = "European Research Consortium for Informatics and Mathematics", |
---|
28 | year = "2003" |
---|
29 | } |
---|
30 | |
---|
31 | @Article{ 4_rmm_formalization, |
---|
32 | title = "A rigorous, operational formalization of recursive modeling", |
---|
33 | author = "P. J. Gmytrasiewicz and E. H. Durfee", |
---|
34 | journal = " First International Conference on Multiagent Systems", |
---|
35 | year = "1995" |
---|
36 | } |
---|
37 | |
---|
38 | @Article{ 5_bayes_learn, |
---|
39 | title = "O bayesovsk{\'e}m u\v{c}en{\'i}", |
---|
40 | author = "I. Nagy and P. Nedoma and P. Ettler and L. Pavelkov{\'a} and NewAuthor2", |
---|
41 | journal = "Automa", |
---|
42 | year = "2002" |
---|
43 | } |
---|
44 | |
---|
45 | @Article{ learning_to_predict, |
---|
46 | author = "R. S. Sutton", |
---|
47 | journal = "Machine Learning", |
---|
48 | year = "1988", |
---|
49 | title = "Learning to predict by the methods of temporal didffrences" |
---|
50 | } |
---|
51 | |
---|
52 | @Article{ dynamic_programming, |
---|
53 | title = "Dynamic programming", |
---|
54 | author = "R. Bellman", |
---|
55 | journal = "Princeton University Press", |
---|
56 | year = "1957" |
---|
57 | } |
---|
58 | |
---|
59 | @Article{ tlc_using_sarsa, |
---|
60 | title = "Vehicle traffic light controlusing sarsa", |
---|
61 | author = "T. Thorpe", |
---|
62 | journal = "Master’s thesis, Department of Computer Science, Colorado State University", |
---|
63 | year = "1997" |
---|
64 | } |
---|
65 | |
---|
66 | @Article{ leraning_from_delayed_rewards, |
---|
67 | title = "Leraning from Delayed Rewards", |
---|
68 | author = "C. J. C. H. Watkins", |
---|
69 | journal = "PhD thesis, King's College, Cambridge, England", |
---|
70 | year = "1989" |
---|
71 | } |
---|
72 | |
---|
73 | @Article{ q_learning, |
---|
74 | title = "Q-leraning", |
---|
75 | author = "C. J. C. H. Watkins and P. Dayan", |
---|
76 | journal = "Machine Learning", |
---|
77 | year = "1992" |
---|
78 | } |
---|