A statistical learning theory approach of bloat
Created by W.Langdon from
gpbibliography.bib Revision:1.7802
 @InProceedings{1068309,

author = "Sylvain Gelly and Olivier Teytaud and
Nicolas Bredeche and Marc Schoenauer",

title = "A statistical learning theory approach of bloat",

booktitle = "{GECCO 2005}: Proceedings of the 2005 conference on
Genetic and evolutionary computation",

year = "2005",

editor = "HansGeorg Beyer and UnaMay O'Reilly and
Dirk V. Arnold and Wolfgang Banzhaf and Christian Blum and
Eric W. Bonabeau and Erick CantuPaz and
Dipankar Dasgupta and Kalyanmoy Deb and James A. Foster and
Edwin D. {de Jong} and Hod Lipson and Xavier Llora and
Spiros Mancoridis and Martin Pelikan and Guenther R. Raidl and
Terence Soule and Andy M. Tyrrell and
JeanPaul Watson and Eckart Zitzler",

volume = "2",

ISBN = "1595930108",

pages = "17831784",

address = "Washington DC, USA",

URL = "http://gpbib.cs.ucl.ac.uk/gecco2005/docs/p1783.pdf",

URL = "http://www.lri.fr/~teytaud/eabloat.pdf",

broken = "http://www.lri.fr/~teytaud/eabloat/eabloat.html",

DOI = "doi:10.1145/1068009.1068309",

publisher = "ACM Press",

publisher_address = "New York, NY, 102861405, USA",

month = "2529 " # jun,

organisation = "ACM SIGEVO (formerly ISGEC)",

keywords = "genetic algorithms, genetic programming, Poster, code
bloat, code growth, reliability, statistical learning
theory, theory",

size = "2 pages",

abstract = "Code bloat, the excessive increase of code size, is an
important issue in Genetic Programming (GP). This paper
proposes a theoretical analysis of code bloat in the
framework of symbolic regression in GP, from the
viewpoint of Statistical Learning Theory, a well
grounded mathematical toolbox for Machine Learning. Two
kinds of bloat must be distinguished in that context,
depending whether the target function lies in the
search space or not. Then, important mathematical
results are proved using classical results from
Statistical Learning. Namely, the VapnikChervonenkis
dimension of programs is computed, and further results
from Statistical Learning allow to prove that a
parsimonious fitness ensures Universal Consistency (the
solution minimising the empirical error does converge
to the best possible error when the number of examples
goes to infinity). However, it is proved that the
standard method consisting in choosing a maximal
program size depending on the number of examples might
still result in programs of infinitely increasing size
with their accuracy; a more complicated modification of
the fitness is proposed that theoretically avoids
unnecessary bloat while nevertheless preserving the
Universal Consistency.
Full paper available at
http://www.lri.fr/~teytaud/longBloat.pdf
\cite{gelly:2005:longBloat}",

notes = "GECCO2005 A joint meeting of the fourteenth
international conference on genetic algorithms
(ICGA2005) and the tenth annual genetic programming
conference (GP2005).
ACM Order Number 910052
eabloat.pdf is substantially more complete than poster
in GECCO proceedings",
 }
Genetic Programming entries for
Sylvain Gelly
Olivier Teytaud
Nicolas Bredeche
Marc Schoenauer
Citations