Evolved Apache Lucene SpanFirst queries are good text classifiers
Created by W.Langdon from
gp-bibliography.bib Revision:1.8081
- @InProceedings{Hirsch:2010:cec,
-
author = "Laurie Hirsch",
-
title = "Evolved Apache Lucene SpanFirst queries are good text
classifiers",
-
booktitle = "IEEE Congress on Evolutionary Computation (CEC 2010)",
-
year = "2010",
-
address = "Barcelona, Spain",
-
month = "18-23 " # jul,
-
publisher = "IEEE Press",
-
keywords = "genetic algorithms, genetic programming",
-
isbn13 = "978-1-4244-6910-9",
-
abstract = "Human readable text classifiers have a number of
advantages over classifiers based on complex and opaque
mathematical models. For some time now search queries
or rules have been used for classification purposes,
either constructed manually or automatically. We have
performed experiments using genetic algorithms to
evolve text classifiers in search query format with the
combined objective of classifier accuracy and
classifier readability. We have found that a small set
of disjunct Lucene SpanFirst queries effectively meet
both goals. This kind of query evaluates to true for a
document if a particular word occurs within the first N
words of a document. Previously researched classifiers
based on queries using combinations of words connected
with OR, AND and NOT were found to be generally less
accurate and (arguably) less readable. The approach is
evaluated using standard test sets Reuters-21578 and
Ohsumed and compared against several classification
algorithms.",
-
DOI = "doi:10.1109/CEC.2010.5585955",
-
notes = "WCCI 2010. Also known as \cite{5585955}",
- }
Genetic Programming entries for
Laurence Hirsch
Citations