@article{wang2025beyond, title={Beyond Majority Voting: Towards Fine-grained and More Reliable Reward Signal for Test-Time Reinforcement Learning}, author={Wang, Weiqin and Wang, Yile Chen, Kehao and Huang, Hui}, journal={arXiv preprint arXiv:2512.15146}, year={2025} }