@inproceedings{17951006859c44299b3cdae85591a4fa,
title = "A lightweight real-time stereo depth estimation network with dynamic upsampling modules",
abstract = "Deep learning based stereo matching networks achieve great success in the depth estimation from stereo image pairs. However, current state-of-the-art methods usually are computationally intensive, which prevents them from being applied in real-time scenarios or on mobile platforms with limited computational resources. In order to tackle this shortcoming, we propose a lightweight real-time stereo matching network for disparity estimation. Our network adopts the efficient hierarchical Coarse-To-Fine (CTF) matching scheme, which starts matching from the low-resolution feature maps, and then upsamples and refines the previous disparity stage by stage until the full resolution. We can take the result of any stage as output to trade off accuracy and runtime. We propose an efficient hourglass-shaped feature extractor based on the latest MobileNet V3 to extract multi-resolution feature maps from stereo image pairs. We also propose to replace the traditional upsampling method in the CTF matching scheme with the learning-based dynamic upsampling modules to avoid blurring effects caused by conventional upsampling methods. Our model can process 1242 × 375 resolution images with 35-68 FPS on a GeForce GTX 1660 GPU, and outperforms all competitive baselines with comparable runtime on the KITTI 2012/2015 datasets.",
keywords = "Deep learning, Depth estimation, Dynamic upsampling, Stereo matching",
author = "Yong Deng and Jimin Xiao and Zhou, {Steven Zhiying}",
note = "Publisher Copyright: Copyright {\textcopyright} 2021 by SCITEPRESS – Science and Technology Publications, Lda. All rights reserved.; 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, VISIGRAPP 2021 ; Conference date: 08-02-2021 Through 10-02-2021",
year = "2021",
language = "English",
series = "VISIGRAPP 2021 - Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications",
publisher = "SciTePress",
pages = "701--710",
editor = "Farinella, {Giovanni Maria} and Petia Radeva and Jose Braz and Kadi Bouatouch",
booktitle = "VISAPP",
}