@InProceedings{10.1007/978-981-95-2725-0_14, author="Wang, Yile and Huang, Hui", editor="Sun, Maosong and Duan, Peiyong and Liu, Zhiyuan and Xu, Ruifeng and Sun, Weiwei and Chen, Yubo and Tian, Zhiliang and Liu, Zhenghao", title="Improving Abstract Reasoning Ability of Large Language Models Through Mixture Program-Based Data Synthesis", booktitle="Chinese Computational Linguistics", year="2026", publisher="Springer Nature Singapore", address="Singapore", pages="208--228", abstract="Abstract reasoning is a challenging task that involves identifying patterns from limited input-output grids and applying them to new grids. With the development of large language models (LLMs), recent studies attempt to transfer the problems to textual format and tackle abstract reasoning tasks using models such as GPT-4. However, the overall accuracy is still low, which also results in the poor quality of abstract reasoning data directly synthesized by GPT-4, making it unsuitable as effective fine-tuning data. In this paper, we propose mixture program-based data synthesis strategies, including low-level code-based synthesis, high-level DSL-based synthesis, and shuffle-based synthesis. Through these strategies, we construct diverse and valid abstract reasoning instruction data to help improving the general abstract reasoning ability of LLMs for multiple datasets. Experimental results show that, by supervised fine-tuning Qwen-2.5-7B on our synthesized instruction data, the resulting model shows improved abstract reasoning ability and outperforms various strong baseline LLMs, including closed-source model GPT-4 and open-source models such as LLaMA-3 and Qwen-2.5. We release the logs by GPT and our model at https://github.com/szu-tera/ARC.", isbn="978-981-95-2725-0" }