| 1 | google/gemini-3.1-pro-preview | | 95.2% | 20 | 1 | 21 |
| 2 | z-ai/glm-5 | | 85.4% | 164 | 28 | 192 |
| 3 | moonshotai/kimi-k2.5 | | 78.1% | 150 | 42 | 192 |
| 4 | x-ai/grok-3-mini-beta | | 56.8% | 109 | 83 | 192 |
| 5 | minimax/minimax-m2.5 | | 40.6% | 78 | 114 | 192 |
| 6 | qwen/qwen3-235b-a22b | | 38.9% | 44 | 69 | 113 |
| 7 | google/gemini-3-flash-preview | | 22.9% | 44 | 148 | 192 |
| 8 | anthropic/claude-sonnet-4.6 | | 13% | 25 | 167 | 192 |
| 9 | meta-llama/llama-3.3-70b-instruct | | 12.5% | 4 | 28 | 32 |
| 10 | anthropic/claude-3.7-sonnet | | 7.3% | 14 | 178 | 192 |
| 11 | google/gemini-2.0-flash-001 | | 5.2% | 10 | 182 | 192 |
| 12 | deepseek/deepseek-chat-v3-0324 | | 4.2% | 8 | 184 | 192 |
| 13 | openai/gpt-4o | | 4.2% | 8 | 184 | 192 |
| 14 | mistralai/mistral-medium-3 | | 0% | 0 | 192 | 192 |