@InProceedings{10.1007/978-981-95-2725-0_14,
author="Wang, Yile
and Huang, Hui",
editor="Sun, Maosong
and Duan, Peiyong
and Liu, Zhiyuan
and Xu, Ruifeng
and Sun, Weiwei
and Chen, Yubo
and Tian, Zhiliang
and Liu, Zhenghao",
title="Improving Abstract Reasoning Ability of Large Language Models Through Mixture Program-Based Data Synthesis",
booktitle="Chinese Computational Linguistics",
year="2026",
publisher="Springer Nature Singapore",
address="Singapore",
pages="208--228",
abstract="Abstract reasoning is a challenging task that involves identifying patterns from limited input-output grids and applying them to new grids. With the development of large language models (LLMs), recent studies attempt to transfer the problems to textual format and tackle abstract reasoning tasks using models such as GPT-4. However, the overall accuracy is still low, which also results in the poor quality of abstract reasoning data directly synthesized by GPT-4, making it unsuitable as effective fine-tuning data. In this paper, we propose mixture program-based data synthesis strategies, including low-level code-based synthesis, high-level DSL-based synthesis, and shuffle-based synthesis. Through these strategies, we construct diverse and valid abstract reasoning instruction data to help improving the general abstract reasoning ability of LLMs for multiple datasets. Experimental results show that, by supervised fine-tuning Qwen-2.5-7B on our synthesized instruction data, the resulting model shows improved abstract reasoning ability and outperforms various strong baseline LLMs, including closed-source model GPT-4 and open-source models such as LLaMA-3 and Qwen-2.5. We release the logs by GPT and our model at https://github.com/szu-tera/ARC.",
isbn="978-981-95-2725-0"
}