manueldeprada HF Staff commited on
Commit
fda0540
·
verified ·
1 Parent(s): 0c04c7f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. data.py +27 -10
data.py CHANGED
@@ -102,12 +102,12 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
102
  def get_available_dates() -> List[str]:
103
  """Get list of available dates from both AMD and NVIDIA datasets."""
104
  try:
105
- # Get AMD dates - the path structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
106
- amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
107
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
108
  logger.info(f"Found {len(files_amd)} AMD files")
109
 
110
- # Get NVIDIA dates - same structure
111
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
112
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
113
  logger.info(f"Found {len(files_nvidia)} NVIDIA files")
@@ -115,15 +115,22 @@ def get_available_dates() -> List[str]:
115
  # Extract dates from file paths
116
  amd_dates = set()
117
  for file_path in files_amd:
118
- # Pattern to match the date in the path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
119
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
120
  match = re.search(pattern, file_path)
121
  if match:
122
  amd_dates.add(match.group(1))
 
 
 
 
 
 
 
123
 
124
  nvidia_dates = set()
125
  for file_path in files_nvidia:
126
- # Same pattern for NVIDIA
127
  pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
128
  match = re.search(pattern, file_path)
129
  if match:
@@ -146,12 +153,22 @@ def get_available_dates() -> List[str]:
146
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
147
  """Get data for a specific date."""
148
  try:
149
- # Construct paths for specific date - correct structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
150
- amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
 
 
 
 
 
 
 
 
 
 
151
  nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
152
 
153
- # Read dataframes directly
154
- df_amd, _ = read_one_dataframe(amd_src, "amd")
155
  df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
156
 
157
  # Join both dataframes
 
102
  def get_available_dates() -> List[str]:
103
  """Get list of available dates from both AMD and NVIDIA datasets."""
104
  try:
105
+ # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
106
+ amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
107
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
108
  logger.info(f"Found {len(files_amd)} AMD files")
109
 
110
+ # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
111
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
112
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
113
  logger.info(f"Found {len(files_nvidia)} NVIDIA files")
 
115
  # Extract dates from file paths
116
  amd_dates = set()
117
  for file_path in files_amd:
118
+ # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
119
+ pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
120
  match = re.search(pattern, file_path)
121
  if match:
122
  amd_dates.add(match.group(1))
123
+ else:
124
+ # Log unmatched paths for debugging
125
+ logger.debug(f"AMD file path didn't match pattern: {file_path}")
126
+
127
+ # Log a few example AMD file paths for debugging
128
+ if files_amd:
129
+ logger.info(f"Example AMD file paths: {files_amd[:3]}")
130
 
131
  nvidia_dates = set()
132
  for file_path in files_nvidia:
133
+ # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
134
  pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
135
  match = re.search(pattern, file_path)
136
  if match:
 
153
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
154
  """Get data for a specific date."""
155
  try:
156
+ # For AMD, we need to find the specific run file for the date
157
+ # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
158
+ amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
159
+ amd_files = fs.glob(amd_src, refresh=True)
160
+
161
+ if not amd_files:
162
+ raise FileNotFoundError(f"No AMD data found for date {target_date}")
163
+
164
+ # Use the first (most recent) run for the date
165
+ amd_file = amd_files[0]
166
+
167
+ # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
168
  nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
169
 
170
+ # Read dataframes
171
+ df_amd, _ = read_one_dataframe(amd_file, "amd")
172
  df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
173
 
174
  # Join both dataframes