@inproceedings{8067e6c1ee184a039726de5996e71e22,
title = "An Word2vec based on Chinese Medical Knowledge",
abstract = "Introducing a large amount of external prior domain knowledge will effectively improve the performance of the word embedded language model in downstream NLP tasks. Based on this assumption, we collect and collate a medical corpus data with about 36M (Million) characters and use the data of CCKS2019 as the test set to carry out multiple classifications and named entity recognition (NER) tasks with the generated word and character vectors. Compared with the results of BERT, our models obtained the ideal performance and efficiency results.",
keywords = "EMR, Language Model, Word Embedding",
author = "Jiayi Zhu and Pin Ni and Yuming Li and Junkun Peng and Zhenjin Dai and Gangmin Li and Xuming Bai",
note = "Funding Information: VI. ACKNOWLEDGEMENT This work is partially supported by the AI University Research Centre (AI-URC) through XJTLU Key Programme Special Fund (KSF-P-02) and KSF-A-17. And it is also partially supported by Suzhou Science and Technology Programme Key Industrial Technology Innovation programme with project code SYG201840. We appreciate their support and guidance. Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Big Data, Big Data 2019 ; Conference date: 09-12-2019 Through 12-12-2019",
year = "2019",
month = dec,
doi = "10.1109/BigData47090.2019.9005510",
language = "English",
series = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6263--6265",
editor = "Chaitanya Baru and Jun Huan and Latifur Khan and Hu, {Xiaohua Tony} and Ronay Ak and Yuanyuan Tian and Roger Barga and Carlo Zaniolo and Kisung Lee and Ye, {Yanfang Fanny}",
booktitle = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
}