Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
data.py
CHANGED
|
@@ -102,12 +102,12 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
|
|
| 102 |
def get_available_dates() -> List[str]:
|
| 103 |
"""Get list of available dates from both AMD and NVIDIA datasets."""
|
| 104 |
try:
|
| 105 |
-
# Get AMD dates - the path structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 106 |
-
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci
|
| 107 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
| 108 |
logger.info(f"Found {len(files_amd)} AMD files")
|
| 109 |
|
| 110 |
-
# Get NVIDIA dates -
|
| 111 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
| 112 |
files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
|
| 113 |
logger.info(f"Found {len(files_nvidia)} NVIDIA files")
|
|
@@ -115,15 +115,22 @@ def get_available_dates() -> List[str]:
|
|
| 115 |
# Extract dates from file paths
|
| 116 |
amd_dates = set()
|
| 117 |
for file_path in files_amd:
|
| 118 |
-
# Pattern to match the date in the path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 119 |
-
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
|
| 120 |
match = re.search(pattern, file_path)
|
| 121 |
if match:
|
| 122 |
amd_dates.add(match.group(1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
nvidia_dates = set()
|
| 125 |
for file_path in files_nvidia:
|
| 126 |
-
#
|
| 127 |
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
|
| 128 |
match = re.search(pattern, file_path)
|
| 129 |
if match:
|
|
@@ -146,12 +153,22 @@ def get_available_dates() -> List[str]:
|
|
| 146 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
| 147 |
"""Get data for a specific date."""
|
| 148 |
try:
|
| 149 |
-
#
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
|
| 152 |
|
| 153 |
-
# Read dataframes
|
| 154 |
-
df_amd, _ = read_one_dataframe(
|
| 155 |
df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
|
| 156 |
|
| 157 |
# Join both dataframes
|
|
|
|
| 102 |
def get_available_dates() -> List[str]:
|
| 103 |
"""Get list of available dates from both AMD and NVIDIA datasets."""
|
| 104 |
try:
|
| 105 |
+
# Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
|
| 106 |
+
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
|
| 107 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
| 108 |
logger.info(f"Found {len(files_amd)} AMD files")
|
| 109 |
|
| 110 |
+
# Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 111 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
| 112 |
files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
|
| 113 |
logger.info(f"Found {len(files_nvidia)} NVIDIA files")
|
|
|
|
| 115 |
# Extract dates from file paths
|
| 116 |
amd_dates = set()
|
| 117 |
for file_path in files_amd:
|
| 118 |
+
# Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
|
| 119 |
+
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
|
| 120 |
match = re.search(pattern, file_path)
|
| 121 |
if match:
|
| 122 |
amd_dates.add(match.group(1))
|
| 123 |
+
else:
|
| 124 |
+
# Log unmatched paths for debugging
|
| 125 |
+
logger.debug(f"AMD file path didn't match pattern: {file_path}")
|
| 126 |
+
|
| 127 |
+
# Log a few example AMD file paths for debugging
|
| 128 |
+
if files_amd:
|
| 129 |
+
logger.info(f"Example AMD file paths: {files_amd[:3]}")
|
| 130 |
|
| 131 |
nvidia_dates = set()
|
| 132 |
for file_path in files_nvidia:
|
| 133 |
+
# Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 134 |
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
|
| 135 |
match = re.search(pattern, file_path)
|
| 136 |
if match:
|
|
|
|
| 153 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
| 154 |
"""Get data for a specific date."""
|
| 155 |
try:
|
| 156 |
+
# For AMD, we need to find the specific run file for the date
|
| 157 |
+
# AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
|
| 158 |
+
amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
|
| 159 |
+
amd_files = fs.glob(amd_src, refresh=True)
|
| 160 |
+
|
| 161 |
+
if not amd_files:
|
| 162 |
+
raise FileNotFoundError(f"No AMD data found for date {target_date}")
|
| 163 |
+
|
| 164 |
+
# Use the first (most recent) run for the date
|
| 165 |
+
amd_file = amd_files[0]
|
| 166 |
+
|
| 167 |
+
# NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 168 |
nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
|
| 169 |
|
| 170 |
+
# Read dataframes
|
| 171 |
+
df_amd, _ = read_one_dataframe(amd_file, "amd")
|
| 172 |
df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
|
| 173 |
|
| 174 |
# Join both dataframes
|