@inproceedings{caa58e9c0a4b452982bdad5d8944c25b,
title = "Deep convolutional neural network with mixup for environmental sound classification",
abstract = "Environmental sound classification (ESC) is an important and challenging problem. In contrast to speech, sound events have noise-like nature and may be produced by a wide variety of sources. In this paper, we propose to use a novel deep convolutional neural network for ESC tasks. Our network architecture uses stacked convolutional and pooling layers to extract high-level feature representations from spectrogram-like features. Furthermore, we apply mixup to ESC tasks and explore its impacts on classification performance and feature distribution. Experiments were conducted on UrbanSound8K, ESC-50 and ESC-10 datasets. Our experimental results demonstrated that our ESC system has achieved the state-of-the-art performance (83.7%) on UrbanSound8K and competitive performance on ESC-50 and ESC-10.",
keywords = "Convolutional neural network, Environmental sound classification, Mixup",
author = "Zhichao Zhang and Shugong Xu and Shan Cao and Shunqing Zhang",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 1st Chinese Conference on Pattern Recognition and Computer Vision, PRCV 2018 ; Conference date: 23-11-2018 Through 26-11-2018",
year = "2018",
doi = "10.1007/978-3-030-03335-4_31",
language = "English",
isbn = "9783030033347",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "356--367",
editor = "Cheng-Lin Liu and Tieniu Tan and Jie Zhou and Jian-Huang Lai and Xilin Chen and Nanning Zheng and Hongbin Zha",
booktitle = "Pattern Recognition and Computer Vision - First Chinese Conference, PRCV 2018, Proceedings",
}