Impact of Imputation of Missing Values on Genetic Programming based Multiple Feature Construction for Classification
Created by W.Langdon from
gp-bibliography.bib Revision:1.7917
- @InProceedings{Tran:2015:CEC,
-
author = "Cao Truong Tran and Peter Andreae and Mengjie Zhang",
-
title = "Impact of Imputation of Missing Values on Genetic
Programming based Multiple Feature Construction for
Classification",
-
booktitle = "Proceedings of 2015 IEEE Congress on Evolutionary
Computation (CEC 2015)",
-
year = "2015",
-
editor = "Yadahiko Murata",
-
pages = "2398--2405",
-
address = "Sendai, Japan",
-
month = "25-28 " # may,
-
publisher = "IEEE Press",
-
keywords = "genetic algorithms, genetic programming",
-
DOI = "doi:10.1109/CEC.2015.7257182",
-
abstract = "Missing values are a common problem in many real world
databases. A common way to cope with this problem is to
use imputation methods to fill missing values with
plausible values. Genetic programming-based multiple
feature construction (GPMFC) is a filter approach to
multiple feature construction for classifiers using
Genetic programming. The GPMFC algorithm has been
demonstrated to improve classification performance in
decision tree and rule-based classifiers for complete
data, but it has not been tested on imputed data. This
paper studies the effect of GPMFC on classification
accuracy with imputed data and how the choice of
different imputation methods (mean imputation, hot deck
imputation, Knn imputation, EM imputation and MICE
imputation) affects classifiers using constructed
features. Results show that GPMFC improves
classification performance for datasets with a small
amount of missing values. The combination of GPMFC and
MICE imputation, in most cases, enhances classification
performance for datasets with varying amounts of
missing values and obtains the best classification
accuracy.",
-
notes = "0950 hrs 15225 CEC2015",
- }
Genetic Programming entries for
Cao Truong Tran
Peter Andreae
Mengjie Zhang
Citations