badaoui HF Staff commited on
Commit
be3bf7d
·
1 Parent(s): 657067c

improve code

Browse files
Files changed (1) hide show
  1. data.py +146 -262
data.py CHANGED
@@ -6,7 +6,8 @@ import threading
6
  import traceback
7
  import json
8
  import re
9
- from typing import List, Tuple, Optional
 
10
 
11
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
12
  fs = HfFileSystem()
@@ -56,6 +57,40 @@ KEYS_TO_KEEP = [
56
  "job_link_nvidia",
57
  ]
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def log_dataframe_link(link: str) -> str:
61
  """
@@ -109,69 +144,37 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
109
  def get_available_dates() -> List[str]:
110
  """Get list of available dates from both AMD and NVIDIA datasets."""
111
  try:
112
- # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
113
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
114
- files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
115
- logger.info(f"Found {len(files_amd)} AMD files")
116
-
117
- # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
118
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
 
 
119
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
120
- logger.info(f"Found {len(files_nvidia)} NVIDIA files")
121
-
122
- # Extract dates from file paths
123
- amd_dates = set()
124
- for file_path in files_amd:
125
- # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
126
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
127
- match = re.search(pattern, file_path)
128
- if match:
129
- amd_dates.add(match.group(1))
130
- else:
131
- # Log unmatched paths for debugging
132
- logger.debug(f"AMD file path didn't match pattern: {file_path}")
133
 
134
- # Log a few example AMD file paths for debugging
135
- if files_amd:
136
- logger.info(f"Example AMD file paths: {files_amd[:3]}")
 
 
 
 
 
137
 
138
- nvidia_dates = set()
139
- for file_path in files_nvidia:
140
- # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
141
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
142
- match = re.search(pattern, file_path)
143
- if match:
144
- nvidia_dates.add(match.group(1))
145
 
146
- logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...") # Show first 5
147
- logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...") # Show first 5
148
 
149
- # Return intersection of both datasets (dates where both have data)
150
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
151
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
152
 
153
- if common_dates:
154
- return common_dates[:30] # Limit to last 30 days for performance
155
- else:
156
- # If no real dates available, generate fake dates for the last 7 days
157
- logger.warning("No real dates available, generating fake dates for demo purposes")
158
- fake_dates = []
159
- today = datetime.now()
160
- for i in range(7):
161
- date = today - timedelta(days=i)
162
- fake_dates.append(date.strftime("%Y-%m-%d"))
163
- return fake_dates
164
 
165
  except Exception as e:
166
  logger.error(f"Error getting available dates: {e}")
167
- # Generate fake dates when there's an error
168
- logger.info("Generating fake dates due to error")
169
- fake_dates = []
170
- today = datetime.now()
171
- for i in range(7):
172
- date = today - timedelta(days=i)
173
- fake_dates.append(date.strftime("%Y-%m-%d"))
174
- return fake_dates
175
 
176
 
177
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
@@ -242,37 +245,30 @@ def get_historical_data(start_date: str, end_date: str, sample_data = False) ->
242
  """Get historical data for a date range."""
243
  if sample_data:
244
  return get_fake_historical_data(start_date, end_date)
 
245
  try:
246
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
247
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
248
-
249
  historical_data = []
250
- current_dt = start_dt
251
 
 
 
252
  while current_dt <= end_dt:
253
  date_str = current_dt.strftime("%Y-%m-%d")
254
  try:
255
  df, _ = get_data_for_date(date_str)
256
- # Only add non-empty dataframes
257
  if not df.empty:
258
  df['date'] = date_str
259
  historical_data.append(df)
260
  logger.info(f"Loaded data for {date_str}")
261
- else:
262
- logger.warning(f"No data available for {date_str}")
263
  except Exception as e:
264
  logger.warning(f"Could not load data for {date_str}: {e}")
265
-
266
  current_dt += timedelta(days=1)
267
 
268
- # Combine all dataframes
269
- combined_df = pd.concat(historical_data, ignore_index=False)
270
- return combined_df
271
 
272
  except Exception as e:
273
  logger.error(f"Error getting historical data: {e}")
274
- # Fall back to fake data when there's an error
275
- logger.info("Falling back to fake historical data due to error")
276
  return get_fake_historical_data(start_date, end_date)
277
 
278
 
@@ -326,49 +322,36 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
326
  try:
327
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
328
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
329
-
330
- # Generate fake data for each date in the range
331
- historical_data = []
332
- current_dt = start_dt
333
-
334
- # Get base sample data to use as template
335
  sample_df, _ = get_sample_data()
 
336
 
 
 
337
  while current_dt <= end_dt:
338
- date_str = current_dt.strftime("%Y-%m-%d")
339
-
340
- # Create a copy of sample data for this date with some random variations
341
  date_df = sample_df.copy()
342
- date_df['date'] = date_str
343
 
344
- # Add some random variation to make it look more realistic
345
- import random
346
  for idx in date_df.index:
347
- # Vary the success/failure counts slightly (±20%)
348
  for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
349
- if col in date_df.columns:
350
- original_val = date_df.loc[idx, col]
351
- if pd.notna(original_val) and original_val > 0:
352
- variation = random.uniform(0.8, 1.2)
353
- date_df.loc[idx, col] = max(0, int(original_val * variation))
354
 
355
- # Vary failure counts more dramatically to show trends
356
  for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
357
- if col in date_df.columns:
358
- original_val = date_df.loc[idx, col]
359
- if pd.notna(original_val):
360
- # Sometimes have more failures, sometimes fewer
361
- variation = random.uniform(0.5, 2.0)
362
- date_df.loc[idx, col] = max(0, int(original_val * variation))
363
 
364
  historical_data.append(date_df)
365
  current_dt += timedelta(days=1)
366
 
367
  if not historical_data:
368
- logger.warning("No fake historical data generated")
369
  return pd.DataFrame()
370
 
371
- # Combine all dataframes
372
  combined_df = pd.concat(historical_data, ignore_index=False)
373
  logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
374
  return combined_df
@@ -377,53 +360,23 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
377
  logger.error(f"Error generating fake historical data: {e}")
378
  return pd.DataFrame()
379
 
380
- def safe_extract(row: pd.DataFrame, key: str) -> int:
381
- return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
382
-
383
-
384
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
385
- """
386
- Find the first date when a specific test failure appeared in historical data.
387
- """
388
  if historical_df.empty:
389
  return None
390
 
391
  try:
392
- # Normalize model name to match DataFrame index
393
- model_name_lower = model_name.lower()
394
-
395
- # Filter historical data for this model
396
- model_data = historical_df[historical_df.index == model_name_lower].copy()
397
-
398
  if model_data.empty:
399
  return None
400
 
401
- # Sort by date (oldest first)
402
- model_data = model_data.sort_values('date')
403
-
404
- # Check each date for this failure
405
- for idx, row in model_data.iterrows():
406
- failures = row.get(f'failures_{device}', None)
407
-
408
- if failures is None or pd.isna(failures):
409
- continue
410
-
411
- # Handle case where failures might be a string (JSON)
412
- if isinstance(failures, str):
413
- try:
414
- import json
415
- failures = json.loads(failures)
416
- except:
417
- continue
418
-
419
- # Check if this test appears in the failures for this gpu_type
420
  if gpu_type in failures:
421
  for test in failures[gpu_type]:
422
- test_line = test.get('line', '')
423
- if test_line == test_name:
424
- # Found the first occurrence
425
- return row.get('date', None)
426
-
427
  return None
428
 
429
  except Exception as e:
@@ -431,148 +384,89 @@ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_n
431
  return None
432
 
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
435
- """
436
- Compare CURRENT failures against PREVIOUS day's failures to find NEW regressions.
437
-
438
- A regression is a test that:
439
- - Is failing in the CURRENT/LATEST run (current_df)
440
- - Was NOT failing in the PREVIOUS run (yesterday in historical_df)
441
- """
442
  if current_df.empty or historical_df.empty:
443
  return []
444
 
445
- new_regressions = []
446
-
447
- # Get the most recent date from historical data (this is "yesterday")
448
  available_dates = sorted(historical_df['date'].unique(), reverse=True)
449
- if len(available_dates) < 1:
450
- # No history to compare against
451
  return []
452
 
453
- yesterday_date = available_dates[0]
454
- yesterday_data = historical_df[historical_df['date'] == yesterday_date]
455
 
456
- # For each model in current data, compare against yesterday
457
  for model_name in current_df.index:
458
- model_name_lower = model_name.lower()
459
-
460
- # Get CURRENT failures from current_df
461
  current_row = current_df.loc[model_name]
 
462
 
463
- # Get YESTERDAY's failures from historical_df
464
- yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
465
- yesterday_failures_amd = {}
466
- yesterday_failures_nvidia = {}
467
 
 
 
 
468
  if not yesterday_row.empty:
469
  yesterday_row = yesterday_row.iloc[0]
470
- yesterday_failures_amd = yesterday_row.get('failures_amd', {})
471
- yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
472
-
473
- # Handle string/dict conversion
474
- if isinstance(yesterday_failures_amd, str):
475
- try:
476
- yesterday_failures_amd = json.loads(yesterday_failures_amd)
477
- except:
478
- yesterday_failures_amd = {}
479
- if isinstance(yesterday_failures_nvidia, str):
480
- try:
481
- yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
482
- except:
483
- yesterday_failures_nvidia = {}
484
-
485
- # Get CURRENT failures
486
- current_failures_amd = current_row.get('failures_amd', {})
487
- current_failures_nvidia = current_row.get('failures_nvidia', {})
488
-
489
- # Handle string/dict conversion
490
- if isinstance(current_failures_amd, str):
491
- try:
492
- current_failures_amd = json.loads(current_failures_amd)
493
- except:
494
- current_failures_amd = {}
495
- if isinstance(current_failures_nvidia, str):
496
- try:
497
- current_failures_nvidia = json.loads(current_failures_nvidia)
498
- except:
499
- current_failures_nvidia = {}
500
-
501
- # Check AMD failures - find tests failing NOW but NOT yesterday
502
- for gpu_type in ['single', 'multi']:
503
- current_tests = current_failures_amd.get(gpu_type, [])
504
- yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
505
-
506
- # Get test names
507
- current_test_names = {test.get('line', '') for test in current_tests}
508
- yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
509
-
510
- # Find NEW failures: failing NOW but NOT yesterday
511
- new_tests = current_test_names - yesterday_test_names
512
- for test_name in new_tests:
513
- if test_name: # Skip empty names
514
- new_regressions.append({
515
- 'model': model_name,
516
- 'test': test_name.split('::')[-1], # Short name
517
- 'test_full': test_name, # Full name
518
- 'device': 'amd',
519
- 'gpu_type': gpu_type
520
- })
521
-
522
- # Check NVIDIA failures - find tests failing NOW but NOT yesterday
523
- for gpu_type in ['single', 'multi']:
524
- current_tests = current_failures_nvidia.get(gpu_type, [])
525
- yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
526
-
527
- # Get test names
528
- current_test_names = {test.get('line', '') for test in current_tests}
529
- yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
530
-
531
- # Find NEW failures: failing NOW but NOT yesterday
532
- new_tests = current_test_names - yesterday_test_names
533
- for test_name in new_tests:
534
- if test_name: # Skip empty names
535
- new_regressions.append({
536
- 'model': model_name,
537
- 'test': test_name.split('::')[-1], # Short name
538
- 'test_full': test_name, # Full name
539
- 'device': 'nvidia',
540
- 'gpu_type': gpu_type
541
- })
542
 
543
  return new_regressions
544
 
545
 
546
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
547
  """Extract and process model data from DataFrame row."""
548
- # Handle missing values and get counts directly from dataframe
549
- success_nvidia = safe_extract(row, "success_nvidia")
550
- success_amd = safe_extract(row, "success_amd")
551
-
552
- skipped_nvidia = safe_extract(row, "skipped_nvidia")
553
- skipped_amd = safe_extract(row, "skipped_amd")
554
 
555
- failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
556
- failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
557
- failed_single_amd = safe_extract(row, 'failed_single_no_amd')
558
- failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
559
- # Calculate total failures
560
- total_failed_amd = failed_multi_amd + failed_single_amd
561
- total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
562
- # Create stats dictionaries directly from dataframe values
563
  amd_stats = {
564
- 'passed': success_amd,
565
- 'failed': total_failed_amd,
566
- 'skipped': skipped_amd,
567
- 'error': 0 # Not available in this dataset
568
  }
569
  nvidia_stats = {
570
- 'passed': success_nvidia,
571
- 'failed': total_failed_nvidia,
572
- 'skipped': skipped_nvidia,
573
- 'error': 0 # Not available in this dataset
574
  }
575
- return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
 
 
 
576
 
577
 
578
 
@@ -643,19 +537,11 @@ class CIResults:
643
  """Load all available historical data at startup."""
644
  try:
645
  if not self.available_dates:
646
- # Generate fake dates when no real dates are available
647
- fake_dates = []
648
- today = datetime.now()
649
- for i in range(7):
650
- date = today - timedelta(days=i)
651
- fake_dates.append(date.strftime("%Y-%m-%d"))
652
- self.available_dates = fake_dates
653
  logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
654
-
655
- logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
656
- start_date = self.available_dates[-1] # Oldest date
657
- end_date = self.available_dates[0] # Newest date
658
 
 
 
659
  self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
660
  logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
661
  except Exception as e:
@@ -672,17 +558,15 @@ class CIResults:
672
  self.historical_df = pd.DataFrame()
673
  return
674
 
675
- # Filter the pre-loaded data by date range
676
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
677
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
678
 
679
- # Filter data within the date range
680
- filtered_data = []
681
- for date_str in self.all_historical_data['date'].unique():
682
- date_dt = datetime.strptime(date_str, "%Y-%m-%d")
683
- if start_dt <= date_dt <= end_dt:
684
- date_data = self.all_historical_data[self.all_historical_data['date'] == date_str]
685
- filtered_data.append(date_data)
686
 
687
  if filtered_data:
688
  self.historical_df = pd.concat(filtered_data, ignore_index=False)
 
6
  import traceback
7
  import json
8
  import re
9
+ import random
10
+ from typing import List, Tuple, Optional, Dict
11
 
12
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
13
  fs = HfFileSystem()
 
57
  "job_link_nvidia",
58
  ]
59
 
60
+ # ============================================================================
61
+ # HELPER FUNCTIONS
62
+ # ============================================================================
63
+
64
+ def generate_fake_dates(num_days: int = 7) -> List[str]:
65
+ """Generate fake dates for the last N days."""
66
+ today = datetime.now()
67
+ return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(num_days)]
68
+
69
+ def parse_json_field(value) -> dict:
70
+ """Safely parse a JSON field that might be a string or dict."""
71
+ if isinstance(value, str):
72
+ try:
73
+ return json.loads(value)
74
+ except:
75
+ return {}
76
+ return value if isinstance(value, dict) else {}
77
+
78
+ def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
79
+ """Extract date from file path using regex pattern."""
80
+ match = re.search(pattern, path)
81
+ return match.group(1) if match else None
82
+
83
+ def get_test_names(tests: list) -> set:
84
+ """Extract test names from a list of test dictionaries."""
85
+ return {test.get('line', '') for test in tests}
86
+
87
+ def safe_extract(row: pd.Series, key: str) -> int:
88
+ """Safely extract an integer value from a DataFrame row."""
89
+ return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
90
+
91
+ # ============================================================================
92
+ # DATA LOADING FUNCTIONS
93
+ # ============================================================================
94
 
95
  def log_dataframe_link(link: str) -> str:
96
  """
 
144
  def get_available_dates() -> List[str]:
145
  """Get list of available dates from both AMD and NVIDIA datasets."""
146
  try:
147
+ # Get file lists
148
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
 
 
 
 
149
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
150
+
151
+ files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
152
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ logger.info(f"Found {len(files_amd)} AMD files, {len(files_nvidia)} NVIDIA files")
155
+
156
+ # Extract dates using patterns
157
+ amd_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
158
+ nvidia_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
159
+
160
+ amd_dates = {extract_date_from_path(f, amd_pattern) for f in files_amd}
161
+ amd_dates.discard(None) # Remove None values
162
 
163
+ nvidia_dates = {extract_date_from_path(f, nvidia_pattern) for f in files_nvidia}
164
+ nvidia_dates.discard(None)
 
 
 
 
 
165
 
166
+ logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...")
167
+ logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...")
168
 
169
+ # Return intersection of both datasets
170
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
171
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
172
 
173
+ return common_dates[:30] if common_dates else generate_fake_dates()
 
 
 
 
 
 
 
 
 
 
174
 
175
  except Exception as e:
176
  logger.error(f"Error getting available dates: {e}")
177
+ return generate_fake_dates()
 
 
 
 
 
 
 
178
 
179
 
180
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
 
245
  """Get historical data for a date range."""
246
  if sample_data:
247
  return get_fake_historical_data(start_date, end_date)
248
+
249
  try:
250
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
251
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 
252
  historical_data = []
 
253
 
254
+ # Load data for each day in range
255
+ current_dt = start_dt
256
  while current_dt <= end_dt:
257
  date_str = current_dt.strftime("%Y-%m-%d")
258
  try:
259
  df, _ = get_data_for_date(date_str)
 
260
  if not df.empty:
261
  df['date'] = date_str
262
  historical_data.append(df)
263
  logger.info(f"Loaded data for {date_str}")
 
 
264
  except Exception as e:
265
  logger.warning(f"Could not load data for {date_str}: {e}")
 
266
  current_dt += timedelta(days=1)
267
 
268
+ return pd.concat(historical_data, ignore_index=False) if historical_data else pd.DataFrame()
 
 
269
 
270
  except Exception as e:
271
  logger.error(f"Error getting historical data: {e}")
 
 
272
  return get_fake_historical_data(start_date, end_date)
273
 
274
 
 
322
  try:
323
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
324
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 
 
 
 
 
 
325
  sample_df, _ = get_sample_data()
326
+ historical_data = []
327
 
328
+ # Generate data for each date
329
+ current_dt = start_dt
330
  while current_dt <= end_dt:
 
 
 
331
  date_df = sample_df.copy()
332
+ date_df['date'] = current_dt.strftime("%Y-%m-%d")
333
 
334
+ # Add random variations to make it realistic
 
335
  for idx in date_df.index:
336
+ # Vary success/skipped counts (±20%)
337
  for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
338
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
339
+ val = date_df.loc[idx, col]
340
+ if val > 0:
341
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.8, 1.2)))
 
342
 
343
+ # Vary failure counts more dramatically (±50-100%)
344
  for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
345
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
346
+ val = date_df.loc[idx, col]
347
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.5, 2.0)))
 
 
 
348
 
349
  historical_data.append(date_df)
350
  current_dt += timedelta(days=1)
351
 
352
  if not historical_data:
 
353
  return pd.DataFrame()
354
 
 
355
  combined_df = pd.concat(historical_data, ignore_index=False)
356
  logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
357
  return combined_df
 
360
  logger.error(f"Error generating fake historical data: {e}")
361
  return pd.DataFrame()
362
 
 
 
 
 
363
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
364
+ """Find the first date when a specific test failure appeared in historical data."""
 
 
365
  if historical_df.empty:
366
  return None
367
 
368
  try:
369
+ model_data = historical_df[historical_df.index == model_name.lower()].copy()
 
 
 
 
 
370
  if model_data.empty:
371
  return None
372
 
373
+ # Check each date (oldest first) for this failure
374
+ for _, row in model_data.sort_values('date').iterrows():
375
+ failures = parse_json_field(row.get(f'failures_{device}'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  if gpu_type in failures:
377
  for test in failures[gpu_type]:
378
+ if test.get('line', '') == test_name:
379
+ return row.get('date')
 
 
 
380
  return None
381
 
382
  except Exception as e:
 
384
  return None
385
 
386
 
387
+ def _find_device_regressions(model_name: str, current_failures: dict, yesterday_failures: dict, device: str) -> list[dict]:
388
+ """Helper to find regressions for a specific device."""
389
+ regressions = []
390
+ for gpu_type in ['single', 'multi']:
391
+ current_tests = get_test_names(current_failures.get(gpu_type, []))
392
+ yesterday_tests = get_test_names(yesterday_failures.get(gpu_type, []))
393
+
394
+ # Find NEW failures: failing NOW but NOT yesterday
395
+ new_tests = current_tests - yesterday_tests
396
+ for test_name in new_tests:
397
+ if test_name: # Skip empty names
398
+ regressions.append({
399
+ 'model': model_name,
400
+ 'test': test_name.split('::')[-1], # Short name
401
+ 'test_full': test_name, # Full name
402
+ 'device': device,
403
+ 'gpu_type': gpu_type
404
+ })
405
+ return regressions
406
+
407
  def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
408
+ """Compare current failures against previous day's failures to find new regressions."""
 
 
 
 
 
 
409
  if current_df.empty or historical_df.empty:
410
  return []
411
 
412
+ # Get yesterday's data
 
 
413
  available_dates = sorted(historical_df['date'].unique(), reverse=True)
414
+ if not available_dates:
 
415
  return []
416
 
417
+ yesterday_data = historical_df[historical_df['date'] == available_dates[0]]
418
+ new_regressions = []
419
 
420
+ # For each model, compare current vs yesterday
421
  for model_name in current_df.index:
 
 
 
422
  current_row = current_df.loc[model_name]
423
+ yesterday_row = yesterday_data[yesterday_data.index == model_name.lower()]
424
 
425
+ # Parse current failures
426
+ current_amd = parse_json_field(current_row.get('failures_amd', {}))
427
+ current_nvidia = parse_json_field(current_row.get('failures_nvidia', {}))
 
428
 
429
+ # Parse yesterday failures
430
+ yesterday_amd = {}
431
+ yesterday_nvidia = {}
432
  if not yesterday_row.empty:
433
  yesterday_row = yesterday_row.iloc[0]
434
+ yesterday_amd = parse_json_field(yesterday_row.get('failures_amd', {}))
435
+ yesterday_nvidia = parse_json_field(yesterday_row.get('failures_nvidia', {}))
436
+
437
+ # Find regressions for both devices
438
+ new_regressions.extend(_find_device_regressions(model_name, current_amd, yesterday_amd, 'amd'))
439
+ new_regressions.extend(_find_device_regressions(model_name, current_nvidia, yesterday_nvidia, 'nvidia'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
  return new_regressions
442
 
443
 
444
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
445
  """Extract and process model data from DataFrame row."""
446
+ # Extract all counts
447
+ counts = {key: safe_extract(row, key) for key in [
448
+ 'success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia',
449
+ 'failed_multi_no_amd', 'failed_multi_no_nvidia',
450
+ 'failed_single_no_amd', 'failed_single_no_nvidia'
451
+ ]}
452
 
453
+ # Create stats dictionaries
 
 
 
 
 
 
 
454
  amd_stats = {
455
+ 'passed': counts['success_amd'],
456
+ 'failed': counts['failed_multi_no_amd'] + counts['failed_single_no_amd'],
457
+ 'skipped': counts['skipped_amd'],
458
+ 'error': 0
459
  }
460
  nvidia_stats = {
461
+ 'passed': counts['success_nvidia'],
462
+ 'failed': counts['failed_multi_no_nvidia'] + counts['failed_single_no_nvidia'],
463
+ 'skipped': counts['skipped_nvidia'],
464
+ 'error': 0
465
  }
466
+
467
+ return (amd_stats, nvidia_stats, counts['failed_multi_no_amd'],
468
+ counts['failed_single_no_amd'], counts['failed_multi_no_nvidia'],
469
+ counts['failed_single_no_nvidia'])
470
 
471
 
472
 
 
537
  """Load all available historical data at startup."""
538
  try:
539
  if not self.available_dates:
540
+ self.available_dates = generate_fake_dates()
 
 
 
 
 
 
541
  logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
 
 
 
 
542
 
543
+ logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
544
+ start_date, end_date = self.available_dates[-1], self.available_dates[0]
545
  self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
546
  logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
547
  except Exception as e:
 
558
  self.historical_df = pd.DataFrame()
559
  return
560
 
561
+ # Filter by date range
562
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
563
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
564
 
565
+ filtered_data = [
566
+ self.all_historical_data[self.all_historical_data['date'] == date_str]
567
+ for date_str in self.all_historical_data['date'].unique()
568
+ if start_dt <= datetime.strptime(date_str, "%Y-%m-%d") <= end_dt
569
+ ]
 
 
570
 
571
  if filtered_data:
572
  self.historical_df = pd.concat(filtered_data, ignore_index=False)