Recent work has demonstrated the potential of large language models (LLMs) for program optimization, a key challenge in programming languages. We propose a blackbox adaptation method called Retrieval Augmented Search (RAS) that performs beam search over candidate optimizations; at each step, it retrieves in-context examples from a given training dataset of slow-fast program pairs to guide the LLM. Critically, we find that performing contextual retrieval based on an LLM-generated natural language description significantly outperforms retrieval based on the source code. We also propose aegis, a method for improving interpretability by decomposing training examples into ''atomic edits'' that are significantly more incremental in nature. We show that RAS performs up to 2.06\(\times\) better than prior state-of-the-art blackbox adaptation strategies on optimizing C++ programs, and that aegis performs up to 1.37\(\times\) better while making significantly smaller edits. We also show that using RAS improves the mean runtime percentile of Python programs by 10.27 compared to baselines.
@article{anupam2026llm,
author = {Anupam, Sagnik and Shypula, Alexander and Bastani, Osbert},
title = {LLM Program Optimization via Retrieval Augmented Search},
journal = {Findings of the Association for Computational Linguistics: ACL 2026},
year = {2026},
url = {https://arxiv.org/abs/2501.18916},
}