Spaces:
Sleeping
Sleeping
| """Shared limits and scoring helpers for explainer episodes.""" | |
| MAX_EXPLORE_STEPS = 6 | |
| MAX_REPAIR_STEPS = 3 | |
| AVAILABLE_TOOLS = ( | |
| "search_wikipedia", | |
| "search_hf_papers", | |
| "search_arxiv", | |
| "search_scholar", | |
| "fetch_docs", | |
| "search_hf_hub", | |
| ) | |
| MAX_EXPLORE_REWARD = 1.0 | |
| MAX_GENERATE_REWARD = 1.0 | |
| MAX_REPAIR_REWARD = 1.0 | |
| SUCCESS_SCORE_THRESHOLD = 0.3 | |
| def clamp_action_reward(value: float) -> float: | |
| """Clamp any single action reward to the required [0, 1] range.""" | |
| return min(max(value, 0.0), 1.0) | |
| def normalized_episode_score(total_reward: float) -> float: | |
| """Normalize an episode's accumulated reward to the required [0, 1] range. | |
| Repair is intentionally not added to the denominator: repair rewards are | |
| discounted so a failed generate + successful repair should not beat a clean | |
| first-pass generation. | |
| """ | |
| max_possible = MAX_EXPLORE_STEPS * MAX_EXPLORE_REWARD + MAX_GENERATE_REWARD | |
| score = total_reward / max_possible if max_possible > 0 else 0.0 | |
| return min(max(score, 0.0), 1.0) | |