@article{fiegel2026optimal,
  abstract = {We address the problem of minimax policy learning in zero-sum matrix games with bandit feedback. We demonstrate that employing log-barrier regularization with a dual-focused analysis achieves a convergence rate of $\tilde{O}(t^{-1/4})$ on the exploitability gap, matching the previously established lower bound. Our method extends to extensive-form games with equivalent performance guarantees.},
  author = {C\^ome Fiegel and Pierre M\'enard and Tadashi Kozuno and Michal Valko and Vianney Perchet},
  journal = {arXiv preprint arXiv:2604.15242},
  title = {{Optimal last-iterate convergence in matrix games with bandit feedback using the log-barrier}},
  url = {https://arxiv.org/abs/2604.15242},
  year = {2026}
}