"""Fetch blockchain data - supports both complete historical data and API fallback""" import requests import pandas as pd from datetime import datetime, timedelta import os # Global cache for complete blockchain data _BLOCKCHAIN_DATA_CACHE = None COMPLETE_DATA_FILE = 'blockchain_data_complete.csv' def load_complete_blockchain_data(force_reload=False): """ Load the complete blockchain data CSV (one-time load, then cached in memory) Parameters: ----------- force_reload : bool Force reload from disk even if cached Returns: -------- pd.DataFrame or None Complete blockchain data with columns: date, bitcoin_price, difficulty, fees, hashrate, revenue, block_reward, days_since_halving """ global _BLOCKCHAIN_DATA_CACHE # Return cached data if available if _BLOCKCHAIN_DATA_CACHE is not None and not force_reload: return _BLOCKCHAIN_DATA_CACHE # Check if file exists if not os.path.exists(COMPLETE_DATA_FILE): print(f"\nāš ļø WARNING: {COMPLETE_DATA_FILE} not found!") print(" Falling back to API (limited to recent data)...") return None # Load from CSV print(f"šŸ“‚ Loading complete blockchain data from {COMPLETE_DATA_FILE}...") df = pd.read_csv(COMPLETE_DATA_FILE) df['date'] = pd.to_datetime(df['date']) # Cache it in memory _BLOCKCHAIN_DATA_CACHE = df print(f"āœ… Loaded {len(df):,} rows of data") print(f" Date range: {df['date'].min().date()} to {df['date'].max().date()}") return df def get_blockchain_data_for_date(target_date, window_size=30): """ Get blockchain data for a specific date (includes window_size days before) Parameters: ----------- target_date : str or datetime Target prediction date window_size : int Number of days needed before target_date (default: 30) Returns: -------- pd.DataFrame Blockchain data from (target_date - window_size) to target_date """ # Load complete data complete_df = load_complete_blockchain_data() if complete_df is None: # Fallback to API return get_latest_blockchain_data(days=90) # Convert target_date to datetime if isinstance(target_date, str): target_date = pd.to_datetime(target_date) # Calculate start date (need window_size days before target) start_date = target_date - timedelta(days=window_size + 10) # +10 buffer for safety # Filter to date range mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= target_date) filtered_df = complete_df[mask].copy().reset_index(drop=True) if len(filtered_df) < window_size: print(f"āš ļø WARNING: Not enough data for {target_date.date()}") print(f" Need {window_size} days, got {len(filtered_df)}") return filtered_df def get_latest_blockchain_data(days=90): """ Get the most recent N days of blockchain data Compatible with original function signature Parameters: ----------- days : int Number of days to fetch (from today backward) Returns: -------- pd.DataFrame Blockchain data for the last N days """ # Try to load complete data first complete_df = load_complete_blockchain_data() if complete_df is not None: # Get last N days from complete data end_date = complete_df['date'].max() start_date = end_date - timedelta(days=days) mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= end_date) filtered_df = complete_df[mask].copy().reset_index(drop=True) return filtered_df else: # Fallback to API if complete data not available print("šŸ“” Falling back to API...") return get_latest_blockchain_data_from_api(days) def get_latest_blockchain_data_from_api(days=90): """ Fallback: Fetch from API if complete data file not available (Original implementation) """ data_types = { 'bitcoin_price': 'market-price', 'difficulty': 'difficulty', 'fees': 'transaction-fees', 'hashrate': 'hash-rate', 'revenue': 'miners-revenue' } timespan = f'{days}days' all_data = {} for name, chart_name in data_types.items(): url = f'https://api.blockchain.info/charts/{chart_name}' params = {'timespan': timespan, 'format': 'json'} try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() values = response.json().get('values', []) df_temp = pd.DataFrame(values) df_temp['x'] = pd.to_datetime(df_temp['x'], unit='s') df_temp = df_temp.set_index('x').rename(columns={'y': name}) all_data[name] = df_temp except Exception as e: print(f"āŒ Failed to fetch {name}: {e}") return None # Merge all merged_df = all_data['bitcoin_price'] for name in ['difficulty', 'fees', 'hashrate', 'revenue']: merged_df = merged_df.join(all_data[name], how='outer') merged_df = merged_df.reset_index().rename(columns={'x': 'date'}) merged_df = merged_df.sort_values('date').reset_index(drop=True) # Add block reward merged_df['block_reward'] = merged_df['date'].apply(calculate_block_reward) # Add days since halving merged_df['days_since_halving'] = merged_df['date'].apply(get_days_since_halving) return merged_df def calculate_block_reward(date): """Calculate block reward based on halving schedule""" if pd.isna(date): return None elif date < pd.Timestamp('2012-11-28'): return 50 elif date < pd.Timestamp('2016-07-09'): return 25 elif date < pd.Timestamp('2020-05-11'): return 12.5 elif date < pd.Timestamp('2024-04-20'): return 6.25 else: return 3.125 def get_days_since_halving(date): """Calculate days since most recent halving""" halving_dates = [ pd.Timestamp('2012-11-28'), pd.Timestamp('2016-07-09'), pd.Timestamp('2020-05-11'), pd.Timestamp('2024-04-20'), ] recent_halving = None for halving in halving_dates: if date >= halving: recent_halving = halving if recent_halving is None: return 0 return (date - recent_halving).days if __name__ == "__main__": print("Testing blockchain data loading...\n") # Test 1: Load complete data print("="*80) print("TEST 1: Load complete blockchain data") print("="*80) df_complete = load_complete_blockchain_data() if df_complete is not None: print(f"\nāœ… Successfully loaded {len(df_complete):,} rows") print(f" Date range: {df_complete['date'].min().date()} to {df_complete['date'].max().date()}") print(f" Columns: {list(df_complete.columns)}") else: print("\nāš ļø Complete data not available") # Test 2: Get data for specific date print("\n" + "="*80) print("TEST 2: Get data for specific date (2021-06-01)") print("="*80) df_2021 = get_blockchain_data_for_date('2021-06-01', window_size=30) if df_2021 is not None: print(f"\nāœ… Got {len(df_2021)} days") print(f" Date range: {df_2021['date'].min().date()} to {df_2021['date'].max().date()}") print(f" Bitcoin price on 2021-06-01: ${df_2021[df_2021['date'].dt.date == pd.to_datetime('2021-06-01').date()]['bitcoin_price'].values[0]:,.2f}") # Test 3: Get latest 90 days print("\n" + "="*80) print("TEST 3: Get latest 90 days") print("="*80) df_latest = get_latest_blockchain_data(days=90) if df_latest is not None: print(f"\nāœ… Got {len(df_latest)} days") print(f" Date range: {df_latest['date'].min().date()} to {df_latest['date'].max().date()}") print("\n" + "="*80) print("āœ… ALL TESTS PASSED") print("="*80)