Inexact Simplification of Symbolic Regression Expressions with Locality-sensitive Hashing
Created by W.Langdon from
gp-bibliography.bib Revision:1.7906
- @InProceedings{imai-aldeia:2024:GECCO,
-
author = "Guilherme Seidyo {Imai Aldeia} and
Fabricio Olivetti {De Franca} and William G. {La Cava}",
-
title = "Inexact Simplification of Symbolic Regression
Expressions with Locality-sensitive Hashing",
-
booktitle = "Proceedings of the 2024 Genetic and Evolutionary
Computation Conference",
-
year = "2024",
-
editor = "Ting Hu and Aniko Ekart and Julia Handl and
Xiaodong Li and Markus Wagner and Mario Garza-Fabre and
Kate Smith-Miles and Richard Allmendinger and Ying Bi and
Grant Dick and Amir H Gandomi and
Marcella Scoczynski Ribeiro Martins and Hirad Assimi and
Nadarajen Veerapen and Yuan Sun and Mario Andres Munyoz and
Ahmed Kheiri and Nguyen Su and Dhananjay Thiruvady and Andy Song and
Frank Neumann and Carla Silva",
-
pages = "896--904",
-
address = "Melbourne, Australia",
-
series = "GECCO '24",
-
month = "14-18 " # jul,
-
organisation = "SIGEVO",
-
publisher = "Association for Computing Machinery",
-
publisher_address = "New York, NY, USA",
-
keywords = "genetic algorithms, genetic programming, locality
sensitive hashing, simplification, symbolic
regression",
-
isbn13 = "979-8-4007-0494-9",
-
DOI = "doi:10.1145/3638529.3654147",
-
size = "9 pages",
-
abstract = "Symbolic regression (SR) searches for parametric
models that accurately fit a dataset, prioritizing
simplicity and interpretability. Despite this secondary
objective, studies point out that the models are often
overly complex due to redundant operations, introns,
and bloat that arise during the iterative process, and
can hinder the search with repeated exploration of
bloated segments. Applying a fast heuristic algebraic
simplification may not fully simplify the expression
and exact methods can be infeasible depending on size
or complexity of the expressions. We propose a novel
agnostic simplification and bloat control for SR
employing an efficient memoization with
locality-sensitive hashing (LHS). The idea is that
expressions and their sub-expressions traversed during
the iterative simplification process are stored in a
dictionary using LHS, enabling efficient retrieval of
similar structures. We iterate through the expression,
replacing subtrees with others of same hash if they
result in a smaller expression. Empirical results shows
that applying this simplification during evolution
performs equal or better than without simplification in
minimization of error, significantly reducing the
number of nonlinear functions. This technique can learn
simplification rules that work in general or for a
specific problem, and improves convergence while
reducing model complexity.",
-
notes = "GECCO-2024 GP A Recombination of the 33rd
International Conference on Genetic Algorithms (ICGA)
and the 29th Annual Genetic Programming Conference
(GP)",
- }
Genetic Programming entries for
Guilherme Seidyo Imai Aldeia
Fabricio Olivetti de Franca
William La Cava
Citations