Tom Westerdale's Home Page

Contact information

 
Department of Computer Science
Birkbeck College
Malet Street
London WC1E 7HX

E-Mail: tom@dcs.bbk.ac.uk
Telephone: 020 7631 6721

Research

 
Research:  Reward Schemes for Learning Systems
 
The credit assignment problem for production systems in delayed
payoff situations has been formalized in a conceptual model in
which the environment of the system is a finite automaton.  A
reward scheme has been exhibited which avoids detrimental
biases even when eligibility sets overlap.  The demonstration that
such biases are avoided is an analogue of the proof of Fisher's
fundamental theorem of natural selection.
 
To help provide a conceptual testbed for reward schemes, a new
cascade
decomposition of finite automata has been obtained.
Sometimes current payoff is due to a system
action taken a long time ago.  The question is, how long ago
might such an action have occurred?
This question can be usefully re-formulated if the environment
is decomposed using the
cascade decomposition.  In the re-formulation it is often
possible to ignore all but the first component of the
decomposition.
 
Subgoal reward schemes avoid the
necessity of explicitly remembering past system actions.  Current project
work is investigating the differences between genetic schemes
and subgoal reward schemes.  Holland's Bucket Brigade is a
subgoal reward scheme particularly suited to the investigation
of these differences. 
It is hoped that a point of view can be found from which the
Bucket Brigade approach and the genetic approach will not look as
different as they currently appear.
 
Subgoal reward schemes are vulnerable to freeloaders.  Obviously, group
selection would combat freeloading.  We have indicated how group selection
might occur when the bucket brigade is used.
 
Classifier systems can combine subgoal reward schemes and genetic schemes, and
so can highlight the differences between schemes.  We have
made a tentative start at investigating the interaction between these two
types of schemes in classifier systems.
 
Q-learning is another subgoal reward scheme.  We have shown that it has some
disadvantages vis a vis the bucket brigade.

Publications

 
@article{recomb,
        author =       "Westerdale, T. H.",
        year =         "2001",
        journal =      "Evolutionary Computation",
        number =       "3",
        title =        "Local Reinforcement and Recombination in Classifier Systems",
        volume =       "9",
        pages =        "259-281"
}
 
@inproceedings{groups,
        author =       "Tom Westerdale",
        title =        "Implicit Group Selection in a {Michigan} Classifier 
                       System",
        booktitle =    "Proceedings of the Second Asia-Pacific Conference on
                       Genetic Algorithms and Applications",
        year =         "2000",
        publisher =    "Global-Link Publishing Company",
        address =      "Hong Kong",
        editor =       "Kin Keung Lai and Osamu Katai and Mitsuo Gen and Baoding Liu",
        pages =        "258-266"
}
 
@inproceedings{falsepeaks,
        author =       "Tom Westerdale",
        title =        "The Bucket Brigade on {M}arkov chains has no False 
                       Peaks",
        booktitle =    "Proceedings of the Second Asia-Pacific Conference on
                       Genetic Algorithms and Applications",
        year =         "2000",
        publisher =    "Global-Link Publishing Company",
        address =      "Hong Kong",
        editor =       "Kin Keung Lai and Osamu Katai and Mitsuo Gen and Baoding Liu",
        pages =        "359-367"
}
 
@article{approach,
        author =       "Westerdale, T. H.",
        year =         "1999",
        journal =      "Complexity",
        number =       "2",
        title =        "An Approach to Credit Assignment in Classifier Systems",
        volume =       "4",
        pages =        "49-52",
        note =         "electronic version, 40 pages, 
                         http://journals.wiley.com/complexity"
}
 
@inproceedings{stewartsworkshop,
        author =       "Tom Westerdale",
        booktitle =    "Proceedings of the 1999 Genetic and Evolutionary
                       Computation Conference Workshop Program",
        address =      "Orlando, Florida",
        year =         "1999",
        pages =        "314 - 321",
        title =        "Wilson's Error Measurement and the {M}arkov 
                       Property -- {I}dentifying Detrimental Classifiers",
        editor =       "Annie S. Wu (Naval Research Laboratory)"
}
 
@inproceedings{nowonder,
        author =       "Westerdale, T. H.",
        booktitle =    "Proceedings of the Second Annual Conference on Genetic Programming",
        editor =       "John R. Koza and others",
        address =      "San Francisco, CA",
        year =         "1997",
        publisher =    "Morgan Kaufmann",
        pages =        "529-537",
        title =        "Classifier Systems -- {N}o Wonder They Don't Work"
}
 
@inproceedings{queasy,
        author =       "T. H. Westerdale",
        booktitle =    "Foundations of Genetic Algorithms",
        year =         "1991",
        editor =       "Gregory J. E. Rawlins",
        publisher =    "Morgan Kaufmann", 
        address =      "San Mateo, CA",
        pages =        "128-147",
        title =        "Quasimorphisms or Queasymorphisms? {M}odeling Finite
Automaton Environments"
}
 
@inproceedings{prokaryote,
        author =       "Westerdale, T. H.",
        booktitle =    "Proceedings of the Fourth International Conference on Genetic Algorithms",
        year =         "1991",
        editor =       "Richard K. Belew and Lashon B Booker",
        publisher =    "Morgan Kaufmann", 
        address =      "San Mateo, CA",
        pages =         "354-360",
        title =        "Redundant Classifiers and Prokaryote Genomes"
}
 
@inproceedings{defense,
        author =       "Westerdale, T. H.",
        booktitle =    "Proceedings of the Third International Conference on Genetic Algorithms",
        year =         "1989",
        editor =       "J. David Schaffer",
        publisher =    "Morgan Kaufmann", 
        address =      "San Mateo, CA",
        pages =        "282-290",
        title =        "A Defense of the Bucket Brigade"
}
 
@article{decomp,
        author =       "T. H. Westerdale",
        year =         "1988",
        journal =      "Information and Computation",
        number =       "3",
        pages =        "179-191",
        title =        "An Automaton Decomposition for Learning System Environments",
        volume =       "77"
}
 
@inproceedings{altruism,
        author =       "T. H. Westerdale",
        booktitle =    "Proceedings of the Second International Conference on Genetic Algorithms and their Applications",
        address =      "Hillsdale, N.J.",
        year =         "1987",
        editor =       "John J. Grefenstette",
        publisher =    "Lawrence Erlbaum Associates",
        pages =        "22-26",
        title =        "Altruism in the Bucket Brigade"
}
 
@article{IEEE,
        author =       "Westerdale, T. H.",
        year =         "1986",
        journal =      "IEEE Trans. Syst., Man, Cybern.",
        number =       "3",
        pages =        "369-383",
        title =        "A Reward Scheme for Production Systems with Overlapping Conflict Sets",
        volume =       "SMC-16"
}
 
@inproceedings{bucketnotgenetic,
        author =       "T. H. Westerdale",
        booktitle =    "Proceedings of an International Conference on Genetic Algorithms and their Applications",
        address =      "Carnegie-Melon University, Pittsburgh",
        year =         "1985",
        editor =       "John J. Grefenstette",
        pages =        "45-59",
        title =        "The Bucket Brigade is Not Genetic"
}