The Impact of Parameters Setup on a Genetic Programming Approach to Record Deduplication
Created by W.Langdon from
gp-bibliography.bib Revision:1.8051
- @InProceedings{conf/sbbd/CarvalhoLGP08,
-
title = "The Impact of Parameters Setup on a Genetic
Programming Approach to Record Deduplication",
-
author = "Moises G. {de Carvalho} and Alberto H. F. Laender and
Marcos Andre Goncalves and Thiago C. Porto",
-
bibdate = "2009-03-02",
-
bibsource = "DBLP,
http://dblp.uni-trier.de/db/conf/sbbd/sbbd2008.html#CarvalhoLGP08",
-
booktitle = "{XXIII} Simp{\'o}sio Brasileiro de Banco de Dados",
-
publisher = "SBC",
-
year = "2008",
-
editor = "Sandra de Amo",
-
isbn13 = "978-85-7669-205-8",
-
pages = "91--105",
-
URL = "http://www.lbd.dcc.ufmg.br:8080/colecoes/sbbd/2008/007.pdf",
-
address = "Campinas, {S}{\~a}o Paulo, Brasil",
-
month = "13-15 " # oct,
-
keywords = "genetic algorithms, genetic programming",
-
size = "15 pages",
-
abstract = "Several systems that rely on the integrity of the data
in order to offer high quality services, such as
digital libraries and e-commerce brokers, may be
affected by the existence of duplicates,
quasi-replicas, or near-duplicates entries in their
repositories. Because of that, there has been a huge
effort from private and government organizations in
developing effective methods for removing replicas from
large data repositories. This is due to the fact that
cleaned, replica-free repositories not only allow the
retrieval of higher-quality information but also lead
to a more concise data representation and to potential
savings in computational time and resources to process
this data. In this work, we extend the results of a
GP-based approach we proposed to record deduplication
by performing a comprehensive set of experiments
regarding its parameterization setup. Our experiments
show that some parameter choices can improve the
results to up 30percent Thus, the obtained results can
be used as guidelines to suggest the most effective way
to set up the parameters of our GP-based approach to
record deduplication.",
-
notes = "SDG SBBD 2008.",
- }
Genetic Programming entries for
Moises G de Carvalho
Alberto H F Laender
Marcos Andre Goncalves
Thiago Costa Porto
Citations