@inproceedings{1f275b3ee9164c79b1c3c1991beca1c2,
title = "A knowledge based approach for tackling mislabeled multi-class big social data",
abstract = "The performance of classification models extremely relies on the quality of training data. However, label imperfection is an inherent fault of training data, which is impossible manually handled in big data environment. Various methods have been proposed to remove label noises in order to improve classification quality, with the side effect of cutting down data bulk. In this paper, we propose a knowledge based approach for tackling mislabeled multi-class big data, in which knowledge graph technique is combined with other data correction method to perceive and correct the error labels in big data. The knowledge graph is built with the medical concepts extracted from online health consulting and medical guidance. Experimental results show our knowledge graph based approach can effectively improve data quality and classification accuracy. Furthermore, this approach can be applied in other data mining tasks requiring deep understanding.",
keywords = "classification, knowledge graph, label correction, label imperfection",
author = "Minyi Guo and Yi Liu and Jie Li and Huakang Li and Bei Xu",
year = "2014",
doi = "10.1007/978-3-319-07443-6_24",
language = "English",
isbn = "9783319074429",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "349--363",
booktitle = "The Semantic Web",
note = "11th International Conference on Semantic Web: Trends and Challenges, ESWC 2014 ; Conference date: 25-05-2014 Through 29-05-2014",
}