Spaces:

manueldeprada
/

Tcid

Sleeping

App Files Files Community

manueldeprada HF Staff commited on Sep 13

Commit

fda0540

verified ·

1 Parent(s): 0c04c7f

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

data.py +27 -10

data.py CHANGED Viewed

@@ -102,12 +102,12 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
 def get_available_dates() -> List[str]:
     """Get list of available dates from both AMD and NVIDIA datasets."""
     try:
-        # Get AMD dates - the path structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
-        amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
         files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
         logger.info(f"Found {len(files_amd)} AMD files")
-        # Get NVIDIA dates - same structure
         nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
         files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
         logger.info(f"Found {len(files_nvidia)} NVIDIA files")
@@ -115,15 +115,22 @@ def get_available_dates() -> List[str]:
         # Extract dates from file paths
         amd_dates = set()
         for file_path in files_amd:
-            # Pattern to match the date in the path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
-            pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
             match = re.search(pattern, file_path)
             if match:
                 amd_dates.add(match.group(1))
         nvidia_dates = set()
         for file_path in files_nvidia:
-            # Same pattern for NVIDIA
             pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
             match = re.search(pattern, file_path)
             if match:
@@ -146,12 +153,22 @@ def get_available_dates() -> List[str]:
 def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
     """Get data for a specific date."""
     try:
-        # Construct paths for specific date - correct structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
-        amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
         nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
-        # Read dataframes directly
-        df_amd, _ = read_one_dataframe(amd_src, "amd")
         df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
         # Join both dataframes

 def get_available_dates() -> List[str]:
     """Get list of available dates from both AMD and NVIDIA datasets."""
     try:
+        # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+        amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
         files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
         logger.info(f"Found {len(files_amd)} AMD files")
+        # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
         nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
         files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
         logger.info(f"Found {len(files_nvidia)} NVIDIA files")
         # Extract dates from file paths
         amd_dates = set()
         for file_path in files_amd:
+            # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+            pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
             match = re.search(pattern, file_path)
             if match:
                 amd_dates.add(match.group(1))
+            else:
+                # Log unmatched paths for debugging
+                logger.debug(f"AMD file path didn't match pattern: {file_path}")
+        # Log a few example AMD file paths for debugging
+        if files_amd:
+            logger.info(f"Example AMD file paths: {files_amd[:3]}")
         nvidia_dates = set()
         for file_path in files_nvidia:
+            # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
             pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
             match = re.search(pattern, file_path)
             if match:
 def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
     """Get data for a specific date."""
     try:
+        # For AMD, we need to find the specific run file for the date
+        # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+        amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
+        amd_files = fs.glob(amd_src, refresh=True)
+        if not amd_files:
+            raise FileNotFoundError(f"No AMD data found for date {target_date}")
+        # Use the first (most recent) run for the date
+        amd_file = amd_files[0]
+        # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
         nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
+        # Read dataframes
+        df_amd, _ = read_one_dataframe(amd_file, "amd")
         df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
         # Join both dataframes