Learning Text Patterns using Separate-and-Conquer Genetic Programming
Created by W.Langdon from
gp-bibliography.bib Revision:1.7954
- @InProceedings{Bartoli:2015:EuroGP,
-
author = "Alberto Bartoli and Andrea {De Lorenzo} and
Eric Medvet and Fabiano Tarlao",
-
title = "Learning Text Patterns using Separate-and-Conquer
Genetic Programming",
-
booktitle = "18th European Conference on Genetic Programming",
-
year = "2015",
-
editor = "Penousal Machado and Malcolm I. Heywood and
James McDermott and Mauro Castelli and
Pablo Garcia-Sanchez and Paolo Burelli and Sebastian Risi and Kevin Sim",
-
series = "LNCS",
-
volume = "9025",
-
publisher = "Springer",
-
pages = "16--27",
-
address = "Copenhagen",
-
month = "8-10 " # apr,
-
organisation = "EvoStar",
-
keywords = "genetic algorithms, genetic programming, Regular
expressions, Multiple pattern, Programming by example,
Text extraction",
-
isbn13 = "978-3-319-16500-4",
-
DOI = "doi:10.1007/978-3-319-16501-1_2",
-
abstract = "The problem of extracting knowledge from large volumes
of unstructured textual information has become
increasingly important. We consider the problem of
extracting text slices that adhere to a syntactic
pattern and propose an approach capable of generating
the desired pattern automatically, from a few annotated
examples. Our approach is based on Genetic Programming
and generates extraction patterns in the form of
regular expressions that may be input to existing
engines without any post-processing. Key feature of our
proposal is its ability of discovering automatically
whether the extraction task may be solved by a single
pattern, or rather a set of multiple patterns is
required. We obtain this property by means of a
separate-and-conquer strategy: once a candidate pattern
provides adequate performance on a subset of the
examples, the pattern is inserted into the set of final
solutions and the evolutionary search continues on a
smaller set of examples including only those not yet
solved adequately. Our proposal outperforms an earlier
state-of-the-art approach on three challenging
datasets",
-
notes = "Part of \cite{Machado:2015:GP} EuroGP'2015 held in
conjunction with EvoCOP2015, EvoMusArt2015 and
EvoApplications2015",
- }
Genetic Programming entries for
Alberto Bartoli
Andrea De Lorenzo
Eric Medvet
Fabiano Tarlao
Citations