@inproceedings{ea49dbc819694a45a0babe7b2135923c,
title = "Hierarchical Vision Transformer with Channel Attention for RGB-D Image Segmentation",
abstract = "Although convolutional neural networks (CNNs) have become the mainstream for image processing and achieved great success in the past decade, due to the local characteristics, CNN is difficult to obtain global and long-range semantical information. Moreover, in some scenes, the pure RGB image-based model is difficult to accurately identify the pixel classification and finely segment the edge of objects. This study presents a hierarchical vision Transformer model named Swin-RGB-D to incorporate and exploit the depth information in depth images to supplement and enhance the ambiguous and obscure features in RGB images. In this design, RGB and depth images are used as the two inputs of the two-branch network. The upstream branch applies the Swin Transform which is capable of learning global continuous information from RGB images for segmentation; the other branch performs channel attention on depth image to abstract the feature correlation and dependency between channels and generates a weight matrix. Then matrix multiplication on the feature maps in each stage of the down-sampling process is performed for weighted multi-modal feature extraction. Then this study adds the fused maps to the up-sampled feature maps of the corresponding size, which sufficiently compensates for the distortion of feature in the sampling process. The experiment results on the two benchmark datasets show that the proposed model makes the network more sensitive to edge information.",
keywords = "Channel attention, Depth images, Multi-modal, Segmentation, Swin Transformer",
author = "Yali Yang and Yuanping Xu and Chaolong Zhang and Zhijie Xu and Jian Huang",
note = "Publisher Copyright: {\textcopyright} 2022 ACM.; 4th International Symposium on Signal Processing Systems, SSPS 2022 ; Conference date: 25-03-2022 Through 27-03-2022",
year = "2022",
month = mar,
day = "25",
doi = "10.1145/3532342.3532352",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
pages = "68--73",
booktitle = "SSPS 2022 - 2022 4th International Symposium on Signal Processing Systems",
}