Spaces:
Running
Running
| """Fetch blockchain data - supports both complete historical data and API fallback""" | |
| import requests | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| import os | |
| # Global cache for complete blockchain data | |
| _BLOCKCHAIN_DATA_CACHE = None | |
| COMPLETE_DATA_FILE = 'blockchain_data_complete.csv' | |
| def load_complete_blockchain_data(force_reload=False): | |
| """ | |
| Load the complete blockchain data CSV (one-time load, then cached in memory) | |
| Parameters: | |
| ----------- | |
| force_reload : bool | |
| Force reload from disk even if cached | |
| Returns: | |
| -------- | |
| pd.DataFrame or None | |
| Complete blockchain data with columns: date, bitcoin_price, difficulty, fees, hashrate, revenue, block_reward, days_since_halving | |
| """ | |
| global _BLOCKCHAIN_DATA_CACHE | |
| # Return cached data if available | |
| if _BLOCKCHAIN_DATA_CACHE is not None and not force_reload: | |
| return _BLOCKCHAIN_DATA_CACHE | |
| # Check if file exists | |
| if not os.path.exists(COMPLETE_DATA_FILE): | |
| print(f"\nโ ๏ธ WARNING: {COMPLETE_DATA_FILE} not found!") | |
| print(" Falling back to API (limited to recent data)...") | |
| return None | |
| # Load from CSV | |
| print(f"๐ Loading complete blockchain data from {COMPLETE_DATA_FILE}...") | |
| df = pd.read_csv(COMPLETE_DATA_FILE) | |
| df['date'] = pd.to_datetime(df['date']) | |
| # Cache it in memory | |
| _BLOCKCHAIN_DATA_CACHE = df | |
| print(f"โ Loaded {len(df):,} rows of data") | |
| print(f" Date range: {df['date'].min().date()} to {df['date'].max().date()}") | |
| return df | |
| def get_blockchain_data_for_date(target_date, window_size=30): | |
| """ | |
| Get blockchain data for a specific date (includes window_size days before) | |
| Parameters: | |
| ----------- | |
| target_date : str or datetime | |
| Target prediction date | |
| window_size : int | |
| Number of days needed before target_date (default: 30) | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| Blockchain data from (target_date - window_size) to target_date | |
| """ | |
| # Load complete data | |
| complete_df = load_complete_blockchain_data() | |
| if complete_df is None: | |
| # Fallback to API | |
| return get_latest_blockchain_data(days=90) | |
| # Convert target_date to datetime | |
| if isinstance(target_date, str): | |
| target_date = pd.to_datetime(target_date) | |
| # Calculate start date (need window_size days before target) | |
| start_date = target_date - timedelta(days=window_size + 10) # +10 buffer for safety | |
| # Filter to date range | |
| mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= target_date) | |
| filtered_df = complete_df[mask].copy().reset_index(drop=True) | |
| if len(filtered_df) < window_size: | |
| print(f"โ ๏ธ WARNING: Not enough data for {target_date.date()}") | |
| print(f" Need {window_size} days, got {len(filtered_df)}") | |
| return filtered_df | |
| def get_latest_blockchain_data(days=90): | |
| """ | |
| Get the most recent N days of blockchain data | |
| Compatible with original function signature | |
| Parameters: | |
| ----------- | |
| days : int | |
| Number of days to fetch (from today backward) | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| Blockchain data for the last N days | |
| """ | |
| # Try to load complete data first | |
| complete_df = load_complete_blockchain_data() | |
| if complete_df is not None: | |
| # Get last N days from complete data | |
| end_date = complete_df['date'].max() | |
| start_date = end_date - timedelta(days=days) | |
| mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= end_date) | |
| filtered_df = complete_df[mask].copy().reset_index(drop=True) | |
| return filtered_df | |
| else: | |
| # Fallback to API if complete data not available | |
| print("๐ก Falling back to API...") | |
| return get_latest_blockchain_data_from_api(days) | |
| def get_latest_blockchain_data_from_api(days=90): | |
| """ | |
| Fallback: Fetch from API if complete data file not available | |
| (Original implementation) | |
| """ | |
| data_types = { | |
| 'bitcoin_price': 'market-price', | |
| 'difficulty': 'difficulty', | |
| 'fees': 'transaction-fees', | |
| 'hashrate': 'hash-rate', | |
| 'revenue': 'miners-revenue' | |
| } | |
| timespan = f'{days}days' | |
| all_data = {} | |
| for name, chart_name in data_types.items(): | |
| url = f'https://api.blockchain.info/charts/{chart_name}' | |
| params = {'timespan': timespan, 'format': 'json'} | |
| try: | |
| response = requests.get(url, params=params, timeout=30) | |
| response.raise_for_status() | |
| values = response.json().get('values', []) | |
| df_temp = pd.DataFrame(values) | |
| df_temp['x'] = pd.to_datetime(df_temp['x'], unit='s') | |
| df_temp = df_temp.set_index('x').rename(columns={'y': name}) | |
| all_data[name] = df_temp | |
| except Exception as e: | |
| print(f"โ Failed to fetch {name}: {e}") | |
| return None | |
| # Merge all | |
| merged_df = all_data['bitcoin_price'] | |
| for name in ['difficulty', 'fees', 'hashrate', 'revenue']: | |
| merged_df = merged_df.join(all_data[name], how='outer') | |
| merged_df = merged_df.reset_index().rename(columns={'x': 'date'}) | |
| merged_df = merged_df.sort_values('date').reset_index(drop=True) | |
| # Add block reward | |
| merged_df['block_reward'] = merged_df['date'].apply(calculate_block_reward) | |
| # Add days since halving | |
| merged_df['days_since_halving'] = merged_df['date'].apply(get_days_since_halving) | |
| return merged_df | |
| def calculate_block_reward(date): | |
| """Calculate block reward based on halving schedule""" | |
| if pd.isna(date): | |
| return None | |
| elif date < pd.Timestamp('2012-11-28'): | |
| return 50 | |
| elif date < pd.Timestamp('2016-07-09'): | |
| return 25 | |
| elif date < pd.Timestamp('2020-05-11'): | |
| return 12.5 | |
| elif date < pd.Timestamp('2024-04-20'): | |
| return 6.25 | |
| else: | |
| return 3.125 | |
| def get_days_since_halving(date): | |
| """Calculate days since most recent halving""" | |
| halving_dates = [ | |
| pd.Timestamp('2012-11-28'), | |
| pd.Timestamp('2016-07-09'), | |
| pd.Timestamp('2020-05-11'), | |
| pd.Timestamp('2024-04-20'), | |
| ] | |
| recent_halving = None | |
| for halving in halving_dates: | |
| if date >= halving: | |
| recent_halving = halving | |
| if recent_halving is None: | |
| return 0 | |
| return (date - recent_halving).days | |
| if __name__ == "__main__": | |
| print("Testing blockchain data loading...\n") | |
| # Test 1: Load complete data | |
| print("="*80) | |
| print("TEST 1: Load complete blockchain data") | |
| print("="*80) | |
| df_complete = load_complete_blockchain_data() | |
| if df_complete is not None: | |
| print(f"\nโ Successfully loaded {len(df_complete):,} rows") | |
| print(f" Date range: {df_complete['date'].min().date()} to {df_complete['date'].max().date()}") | |
| print(f" Columns: {list(df_complete.columns)}") | |
| else: | |
| print("\nโ ๏ธ Complete data not available") | |
| # Test 2: Get data for specific date | |
| print("\n" + "="*80) | |
| print("TEST 2: Get data for specific date (2021-06-01)") | |
| print("="*80) | |
| df_2021 = get_blockchain_data_for_date('2021-06-01', window_size=30) | |
| if df_2021 is not None: | |
| print(f"\nโ Got {len(df_2021)} days") | |
| print(f" Date range: {df_2021['date'].min().date()} to {df_2021['date'].max().date()}") | |
| print(f" Bitcoin price on 2021-06-01: ${df_2021[df_2021['date'].dt.date == pd.to_datetime('2021-06-01').date()]['bitcoin_price'].values[0]:,.2f}") | |
| # Test 3: Get latest 90 days | |
| print("\n" + "="*80) | |
| print("TEST 3: Get latest 90 days") | |
| print("="*80) | |
| df_latest = get_latest_blockchain_data(days=90) | |
| if df_latest is not None: | |
| print(f"\nโ Got {len(df_latest)} days") | |
| print(f" Date range: {df_latest['date'].min().date()} to {df_latest['date'].max().date()}") | |
| print("\n" + "="*80) | |
| print("โ ALL TESTS PASSED") | |
| print("="*80) |