| { | |
| "llama-3.2-3b-instruct": { | |
| "success_rate": 0.18239999999999998, | |
| "relative_action_count": 0.8956800000000001 | |
| }, | |
| "qwen2.5-3b-instruct": { | |
| "success_rate": 0.184, | |
| "relative_action_count": 0.8255466666666665 | |
| }, | |
| "gpt-3.5-turbo": { | |
| "success_rate": 0.272, | |
| "relative_action_count": 0.8743314285714285 | |
| }, | |
| "qwen2.5-7b-instruct": { | |
| "success_rate": 0.36639999999999995, | |
| "relative_action_count": 0.8682133333333335 | |
| }, | |
| "gemma-2-9b-it": { | |
| "success_rate": 0.392, | |
| "relative_action_count": 0.8522190476190475 | |
| }, | |
| "llama-3.1-8b-instruct": { | |
| "success_rate": 0.4424, | |
| "relative_action_count": 0.8441104761904763 | |
| }, | |
| "gemma-2-27b-it": { | |
| "success_rate": 0.548, | |
| "relative_action_count": 0.6583142857142856 | |
| }, | |
| "yi-lightning": { | |
| "success_rate": 0.6728, | |
| "relative_action_count": 0.5962819047619048 | |
| }, | |
| "llama-3.1-70b-instruct": { | |
| "success_rate": 0.696, | |
| "relative_action_count": 0.5514495238095238 | |
| }, | |
| "llama-3.3-70b-instruct": { | |
| "success_rate": 0.712, | |
| "relative_action_count": 0.5916438095238095 | |
| }, | |
| "gpt-4o-mini": { | |
| "success_rate": 0.7239999999999999, | |
| "relative_action_count": 0.5270952380952381 | |
| }, | |
| "gemini-1.5-pro": { | |
| "success_rate": 0.7256, | |
| "relative_action_count": 0.5686514285714285 | |
| }, | |
| "claude-3.5-haiku": { | |
| "success_rate": 0.7343999999999999, | |
| "relative_action_count": 0.757095238095238 | |
| }, | |
| "qwen2.5-14b-instruct": { | |
| "success_rate": 0.756, | |
| "relative_action_count": 0.5723257142857143 | |
| }, | |
| "qwen2.5-72b-instruct": { | |
| "success_rate": 0.7584, | |
| "relative_action_count": 0.5753561904761904 | |
| }, | |
| "gpt-4o": { | |
| "success_rate": 0.7856000000000002, | |
| "relative_action_count": 0.506207619047619 | |
| }, | |
| "qwen2.5-32b-instruct": { | |
| "success_rate": 0.7879999999999999, | |
| "relative_action_count": 0.5955619047619047 | |
| }, | |
| "mistral-large-instruct-2411": { | |
| "success_rate": 0.7879999999999999, | |
| "relative_action_count": 0.5365238095238094 | |
| }, | |
| "claude-3.5-sonnet": { | |
| "success_rate": 0.8263999999999999, | |
| "relative_action_count": 0.46185714285714285 | |
| }, | |
| "deepseek-r1": { | |
| "success_rate": 0.8712, | |
| "relative_action_count": 0.51432 | |
| }, | |
| "o1-mini": { | |
| "success_rate": 0.8784000000000001, | |
| "relative_action_count": 0.46449523809523807 | |
| }, | |
| "deepseek-v3": { | |
| "success_rate": 0.8928, | |
| "relative_action_count": 0.5308400000000001 | |
| }, | |
| "qwq-32b": { | |
| "success_rate": 0.9032, | |
| "relative_action_count": 0.5338533333333333 | |
| } | |
| } | |