| 1 | google/gemini-3.1-pro-preview | | 95.2% | 20 | 1 | 21 |
| 2 | z-ai/glm-5 | | 84.4% | 200 | 37 | 237 |
| 3 | moonshotai/kimi-k2.5 | | 78.5% | 186 | 51 | 237 |
| 4 | x-ai/grok-3-mini-beta | | 57.2% | 115 | 86 | 201 |
| 5 | minimax/minimax-m2.5 | | 45.1% | 107 | 130 | 237 |
| 6 | qwen/qwen3-235b-a22b | | 38.9% | 44 | 69 | 113 |
| 7 | google/gemini-3-flash-preview | | 24.5% | 58 | 179 | 237 |
| 8 | anthropic/claude-sonnet-4.6 | | 13.5% | 32 | 205 | 237 |
| 9 | meta-llama/llama-3.3-70b-instruct | | 12.5% | 4 | 28 | 32 |
| 10 | anthropic/claude-3.7-sonnet | | 7.1% | 14 | 182 | 196 |
| 11 | google/gemini-2.0-flash-001 | | 4.6% | 10 | 208 | 218 |
| 12 | deepseek/deepseek-chat-v3-0324 | | 3.4% | 8 | 229 | 237 |
| 13 | openai/gpt-4o | | 3.4% | 8 | 229 | 237 |
| 14 | mistralai/mistral-medium-3 | | 0% | 0 | 237 | 237 |