@inproceedings{063cdae043b148278c6bc614b34165b2,
title = "Compressed domain-specific data processing and analysis",
abstract = "Domain specific data such as sensor outputs and server trace logs have low levels of symbol richness, and so they can be represented in a very compact format. In this paper, we present a bit-oriented compression scheme designed not only to represent the data compactly but also to allow MapReduce programs to perform analysis and processing directly on the compressed data, and to do so in parallel. The core of the compression scheme is a novel hybrid data structure supporting bit pattern searching in constant time, and a scheme for making a block-splittable compressed file. Supporting software allows developers to work transparently with the compressed data. Experimental results demonstrate that the proposed compression scheme can significantly reduce data size and improve MapReduce analysis performance.",
keywords = "Algorithm, Big Data, Compression, MapReduce",
author = "Dapeng Dong and John Herbert",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 5th IEEE International Conference on Big Data, Big Data 2017 ; Conference date: 11-12-2017 Through 14-12-2017",
year = "2017",
month = jul,
day = "1",
doi = "10.1109/BigData.2017.8257941",
language = "English",
series = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "325--330",
editor = "Jian-Yun Nie and Zoran Obradovic and Toyotaro Suzumura and Rumi Ghosh and Raghunath Nambiar and Chonggang Wang and Hui Zang and Ricardo Baeza-Yates and Ricardo Baeza-Yates and Xiaohua Hu and Jeremy Kepner and Alfredo Cuzzocrea and Jian Tang and Masashi Toyoda",
booktitle = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
}