@inproceedings{a083e1ad20a749b5977113d105db2b9c,
title = "An Improved Unbalanced Data Classification Method Based on Hybrid Sampling Approach",
abstract = "The problem of data imbalance has received far- reaching concerns since they could affect the accuracy of classification problem in the area of machine learning. As the minority class instances can be ignored by traditional classifiers, it is necessary to improve the recognition rate of minority instances. Therefore, the paper proposes a new hybrid sampling method to solve the data imbalance problem by enlarging the proportion of minority instances. For the oversampling part, a variant of SMOTE is provided combining methods of LR-SMOTE and CCR (Combined Cleaning and Resampling Algorithm); for the under-sampling part, the Tomek-link method is utilized to complete the task. After the pre-processing stage, the data set is classified by Random Forest (RF). Experimental results show that the novel algorithm effectively enhances the performance of RF on the data set with a higher accuracy.",
keywords = "data mining, hybrid sampling, imbalanced dataset, smote",
author = "Biru Xu and Wenjia Wang and Rui Yang and Qi Han",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE 4th International Conference on Big Data and Artificial Intelligence, BDAI 2021 ; Conference date: 02-07-2021 Through 04-07-2021",
year = "2021",
month = jul,
day = "2",
doi = "10.1109/BDAI52447.2021.9515306",
language = "English",
series = "2021 IEEE 4th International Conference on Big Data and Artificial Intelligence, BDAI 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "125--129",
booktitle = "2021 IEEE 4th International Conference on Big Data and Artificial Intelligence, BDAI 2021",
}