Lexidate: Model Evaluation and Selection with Lexicase
Created by W.Langdon from
gp-bibliography.bib Revision:1.8051
- @InProceedings{hernandez:2024:GECCOcomp,
-
author = "Jose Guadalupe Hernandez and Anil Kumar Saini and
Jason H Moore",
-
title = "Lexidate: Model Evaluation and Selection with
Lexicase",
-
booktitle = "Proceedings of the 2024 Genetic and Evolutionary
Computation Conference Companion",
-
year = "2024",
-
editor = "Jean-Baptiste Mouret and Kai Qin",
-
pages = "279--282",
-
address = "Melbourne, Australia",
-
series = "GECCO '24",
-
month = "14-18 " # jul,
-
organisation = "SIGEVO",
-
publisher = "Association for Computing Machinery",
-
publisher_address = "New York, NY, USA",
-
keywords = "genetic algorithms, genetic programming, lexicase
selection, AutoML, cross-validation, model evaluation,
model selection, model complexity, Evolutionary Machine
Learning: Poster",
-
isbn13 = "979-8-4007-0495-6",
-
DOI = "doi:10.1145/3638530.3654265",
-
size = "4 pages",
-
abstract = "Automated machine learning streamlines the task of
finding effective machine learning pipelines by
automating model training, evaluation, and selection.
Traditional evaluation strategies, like
cross-validation (CV), generate one value that averages
the accuracy of a pipeline's predictions. This single
value, however, may not fully describe the
generalizability of the pipeline. Here, we present
Lexicase-based Validation (lexidate), a method that
uses multiple, independent prediction values for
selection. Lexidate splits training data into a
learning set and a selection set. Pipelines are trained
on the learning set and make predictions on the
selection set. The predictions are graded for
correctness and used by lexicase selection to identify
parent pipelines. Compared to 10-fold CV, lexicase
reduces the training time. We test the effectiveness of
three lexi-date configurations within the Tree-based
Pipeline Optimization Tool 2 (TPOT2) package on six
OpenML classification tasks. In one configuration, we
detected no difference in the accuracy of the final
model returned from TPOT2 on most tasks compared to
10-fold CV. All configurations studied here returned
similar or less complex final pipelines compared to
10-fold CV.",
-
notes = "GECCO-2024 EML A Recombination of the 33rd
International Conference on Genetic Algorithms (ICGA)
and the 29th Annual Genetic Programming Conference
(GP)",
- }
Genetic Programming entries for
Jose Guadalupe Hernandez
Anil Kumar Saini
Jason H Moore
Citations