@inproceedings{d7b4e69a56b749ad85b96d3dfd0b4155,
title = "CostDiff: Residual Diffusion-Based Cost Map Refinement for Open-Vocabulary Semantic Segmentation",
abstract = "Open-Vocabulary Semantic Segmentation (OVSS) empowers models to recognize novel classes beyond predefined categories. While contrastive Vision-Language Models (VLMs) like CLIP enable open-vocabulary learning, they struggle with pixel-level semantic localization due to image-level pretraining. We propose a residual diffusion-based cost map refinement strategy to address these challenges. By treating CLIP{\textquoteright}s coarse-grained classification maps as initial cost maps, our method iteratively refines them via a multi-step diffusion process, bridging the gap between high-level semantics and low-level spatial details. This enhances pixel-wise discriminative ability without retraining VLMs. Experiments on standard benchmarks demonstrate promising improvements in both quantitative accuracy and qualitative boundary precision, verifying the effectiveness of integrating diffusion for OVSS. Our approach offers a novel paradigm for advancing open-vocabulary visual understanding via foundation model refinement.",
keywords = "Cost Map Refinement, Open-Vocabulary Semantic Segmentation, Residual Diffusion",
author = "Bowen Deng and Yutao Rao and Fangyu Wu and Junjie Zhang",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2026.; 8th Chinese Conference on Pattern Recognition and Computer Vision, PRCV 2025 ; Conference date: 15-10-2025 Through 18-10-2025",
year = "2026",
doi = "10.1007/978-981-95-5761-5\_9",
language = "English",
isbn = "9789819557608",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "120--134",
editor = "Josef Kittler and Hongkai Xiong and Weiyao Lin and Jian Yang and Xilin Chen and Jiwen Lu and Jingyi Yu and Weishi Zheng",
booktitle = "Pattern Recognition and Computer Vision - 8th Chinese Conference, PRCV 2025, Proceedings",
}