
Department of Computer Science
Birkbeck College
Malet Street
London WC1E 7HX
E-Mail: tom@dcs.bbk.ac.uk
Telephone: 020 7631 6721
Research: Reward Schemes for Learning SystemsThe credit assignment problem for production systems in delayed
payoff situations has been formalized in a conceptual model in
which the environment of the system is a finite automaton. Areward scheme has been exhibited which avoids detrimental
biases even when eligibility sets overlap. The demonstration thatsuch biases are avoided is an analogue of the proof of Fisher's
fundamental theorem of natural selection.
To help provide a conceptual testbed for reward schemes, a new
cascade
decomposition of finite automata has been obtained.
Sometimes current payoff is due to a system
action taken a long time ago. The question is, how long agomight such an action have occurred?
This question can be usefully re-formulated if the environment
is decomposed using the
cascade decomposition. In the re-formulation it is oftenpossible to ignore all but the first component of the
decomposition.
Subgoal reward schemes avoid the
necessity of explicitly remembering past system actions. Current projectwork is investigating the differences between genetic schemes
and subgoal reward schemes. Holland's Bucket Brigade is asubgoal reward scheme particularly suited to the investigation
of these differences.
It is hoped that a point of view can be found from which the
Bucket Brigade approach and the genetic approach will not look as
different as they currently appear.
Subgoal reward schemes are vulnerable to freeloaders. Obviously, groupselection would combat freeloading. We have indicated how group selectionmight occur when the bucket brigade is used.
Classifier systems can combine subgoal reward schemes and genetic schemes, and
so can highlight the differences between schemes. We havemade a tentative start at investigating the interaction between these two
types of schemes in classifier systems.
Q-learning is another subgoal reward scheme. We have shown that it has somedisadvantages vis a vis the bucket brigade.
@article{recomb,author = "Westerdale, T. H.",
year = "2001",
journal = "Evolutionary Computation",
number = "3",
title = "Local Reinforcement and Recombination in Classifier Systems",
volume = "9",
pages = "259-281"
}
@inproceedings{groups,author = "Tom Westerdale",
title = "Implicit Group Selection in a {Michigan} Classifier
System",booktitle = "Proceedings of the Second Asia-Pacific Conference on
Genetic Algorithms and Applications",year = "2000",
publisher = "Global-Link Publishing Company",
address = "Hong Kong",
editor = "Kin Keung Lai and Osamu Katai and Mitsuo Gen and Baoding Liu",
pages = "258-266"
}
@inproceedings{falsepeaks,author = "Tom Westerdale",
title = "The Bucket Brigade on {M}arkov chains has no False
Peaks",booktitle = "Proceedings of the Second Asia-Pacific Conference on
Genetic Algorithms and Applications",year = "2000",
publisher = "Global-Link Publishing Company",
address = "Hong Kong",
editor = "Kin Keung Lai and Osamu Katai and Mitsuo Gen and Baoding Liu",
pages = "359-367"
}
@article{approach,author = "Westerdale, T. H.",
year = "1999",
journal = "Complexity",
number = "2",
title = "An Approach to Credit Assignment in Classifier Systems",
volume = "4",
pages = "49-52",
note = "electronic version, 40 pages,
http://journals.wiley.com/complexity"}
@inproceedings{stewartsworkshop,author = "Tom Westerdale",
booktitle = "Proceedings of the 1999 Genetic and Evolutionary
Computation Conference Workshop Program",address = "Orlando, Florida",
year = "1999",
pages = "314 - 321",
title = "Wilson's Error Measurement and the {M}arkov
Property -- {I}dentifying Detrimental Classifiers",editor = "Annie S. Wu (Naval Research Laboratory)"
}
@inproceedings{nowonder,author = "Westerdale, T. H.",
booktitle = "Proceedings of the Second Annual Conference on Genetic Programming",
editor = "John R. Koza and others",
address = "San Francisco, CA",
year = "1997",
publisher = "Morgan Kaufmann",
pages = "529-537",
title = "Classifier Systems -- {N}o Wonder They Don't Work"
}
@inproceedings{queasy,author = "T. H. Westerdale",
booktitle = "Foundations of Genetic Algorithms",
year = "1991",
editor = "Gregory J. E. Rawlins",
publisher = "Morgan Kaufmann",
address = "San Mateo, CA",
pages = "128-147",
title = "Quasimorphisms or Queasymorphisms? {M}odeling Finite
Automaton Environments"
}
@inproceedings{prokaryote,author = "Westerdale, T. H.",
booktitle = "Proceedings of the Fourth International Conference on Genetic Algorithms",
year = "1991",
editor = "Richard K. Belew and Lashon B Booker",
publisher = "Morgan Kaufmann",
address = "San Mateo, CA",
pages = "354-360",
title = "Redundant Classifiers and Prokaryote Genomes"
}
@inproceedings{defense,author = "Westerdale, T. H.",
booktitle = "Proceedings of the Third International Conference on Genetic Algorithms",
year = "1989",
editor = "J. David Schaffer",
publisher = "Morgan Kaufmann",
address = "San Mateo, CA",
pages = "282-290",
title = "A Defense of the Bucket Brigade"
}
@article{decomp,author = "T. H. Westerdale",
year = "1988",
journal = "Information and Computation",
number = "3",
pages = "179-191",
title = "An Automaton Decomposition for Learning System Environments",
volume = "77"
}
@inproceedings{altruism,author = "T. H. Westerdale",
booktitle = "Proceedings of the Second International Conference on Genetic Algorithms and their Applications",
address = "Hillsdale, N.J.",
year = "1987",
editor = "John J. Grefenstette",
publisher = "Lawrence Erlbaum Associates",
pages = "22-26",
title = "Altruism in the Bucket Brigade"
}
@article{IEEE,author = "Westerdale, T. H.",
year = "1986",
journal = "IEEE Trans. Syst., Man, Cybern.",
number = "3",
pages = "369-383",
title = "A Reward Scheme for Production Systems with Overlapping Conflict Sets",
volume = "SMC-16"
}
@inproceedings{bucketnotgenetic,author = "T. H. Westerdale",
booktitle = "Proceedings of an International Conference on Genetic Algorithms and their Applications",
address = "Carnegie-Melon University, Pittsburgh",
year = "1985",
editor = "John J. Grefenstette",
pages = "45-59",
title = "The Bucket Brigade is Not Genetic"
}