Spaces:

amaai-lab
/

MineROI-Net

Running

App Files Files Community

MineROI-Net / fetch_blockchain_data.py

sithuWiki

upload fetch_blockchain_data.py

b0a6f60 verified 2 days ago

raw

history blame

8.14 kB

	"""Fetch blockchain data - supports both complete historical data and API fallback"""

	import requests
	import pandas as pd
	from datetime import datetime, timedelta
	import os


	# Global cache for complete blockchain data
	_BLOCKCHAIN_DATA_CACHE = None
	COMPLETE_DATA_FILE = 'blockchain_data_complete.csv'


	def load_complete_blockchain_data(force_reload=False):
	"""
	Load the complete blockchain data CSV (one-time load, then cached in memory)

	Parameters:
	-----------
	force_reload : bool
	Force reload from disk even if cached

	Returns:
	--------
	pd.DataFrame or None
	Complete blockchain data with columns: date, bitcoin_price, difficulty, fees, hashrate, revenue, block_reward, days_since_halving
	"""
	global _BLOCKCHAIN_DATA_CACHE

	# Return cached data if available
	if _BLOCKCHAIN_DATA_CACHE is not None and not force_reload:
	return _BLOCKCHAIN_DATA_CACHE

	# Check if file exists
	if not os.path.exists(COMPLETE_DATA_FILE):
	print(f"\n⚠️ WARNING: {COMPLETE_DATA_FILE} not found!")
	print(" Falling back to API (limited to recent data)...")
	return None

	# Load from CSV
	print(f"📂 Loading complete blockchain data from {COMPLETE_DATA_FILE}...")
	df = pd.read_csv(COMPLETE_DATA_FILE)
	df['date'] = pd.to_datetime(df['date'])

	# Cache it in memory
	_BLOCKCHAIN_DATA_CACHE = df

	print(f"✅ Loaded {len(df):,} rows of data")
	print(f" Date range: {df['date'].min().date()} to {df['date'].max().date()}")

	return df


	def get_blockchain_data_for_date(target_date, window_size=30):
	"""
	Get blockchain data for a specific date (includes window_size days before)

	Parameters:
	-----------
	target_date : str or datetime
	Target prediction date
	window_size : int
	Number of days needed before target_date (default: 30)

	Returns:
	--------
	pd.DataFrame
	Blockchain data from (target_date - window_size) to target_date
	"""

	# Load complete data
	complete_df = load_complete_blockchain_data()

	if complete_df is None:
	# Fallback to API
	return get_latest_blockchain_data(days=90)

	# Convert target_date to datetime
	if isinstance(target_date, str):
	target_date = pd.to_datetime(target_date)

	# Calculate start date (need window_size days before target)
	start_date = target_date - timedelta(days=window_size + 10) # +10 buffer for safety

	# Filter to date range
	mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= target_date)
	filtered_df = complete_df[mask].copy().reset_index(drop=True)

	if len(filtered_df) < window_size:
	print(f"⚠️ WARNING: Not enough data for {target_date.date()}")
	print(f" Need {window_size} days, got {len(filtered_df)}")

	return filtered_df


	def get_latest_blockchain_data(days=90):
	"""
	Get the most recent N days of blockchain data
	Compatible with original function signature

	Parameters:
	-----------
	days : int
	Number of days to fetch (from today backward)

	Returns:
	--------
	pd.DataFrame
	Blockchain data for the last N days
	"""

	# Try to load complete data first
	complete_df = load_complete_blockchain_data()

	if complete_df is not None:
	# Get last N days from complete data
	end_date = complete_df['date'].max()
	start_date = end_date - timedelta(days=days)

	mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= end_date)
	filtered_df = complete_df[mask].copy().reset_index(drop=True)

	return filtered_df
	else:
	# Fallback to API if complete data not available
	print("📡 Falling back to API...")
	return get_latest_blockchain_data_from_api(days)


	def get_latest_blockchain_data_from_api(days=90):
	"""
	Fallback: Fetch from API if complete data file not available
	(Original implementation)
	"""

	data_types = {
	'bitcoin_price': 'market-price',
	'difficulty': 'difficulty',
	'fees': 'transaction-fees',
	'hashrate': 'hash-rate',
	'revenue': 'miners-revenue'
	}

	timespan = f'{days}days'
	all_data = {}

	for name, chart_name in data_types.items():
	url = f'https://api.blockchain.info/charts/{chart_name}'
	params = {'timespan': timespan, 'format': 'json'}

	try:
	response = requests.get(url, params=params, timeout=30)
	response.raise_for_status()
	values = response.json().get('values', [])

	df_temp = pd.DataFrame(values)
	df_temp['x'] = pd.to_datetime(df_temp['x'], unit='s')
	df_temp = df_temp.set_index('x').rename(columns={'y': name})
	all_data[name] = df_temp
	except Exception as e:
	print(f"❌ Failed to fetch {name}: {e}")
	return None

	# Merge all
	merged_df = all_data['bitcoin_price']
	for name in ['difficulty', 'fees', 'hashrate', 'revenue']:
	merged_df = merged_df.join(all_data[name], how='outer')

	merged_df = merged_df.reset_index().rename(columns={'x': 'date'})
	merged_df = merged_df.sort_values('date').reset_index(drop=True)

	# Add block reward
	merged_df['block_reward'] = merged_df['date'].apply(calculate_block_reward)

	# Add days since halving
	merged_df['days_since_halving'] = merged_df['date'].apply(get_days_since_halving)

	return merged_df


	def calculate_block_reward(date):
	"""Calculate block reward based on halving schedule"""
	if pd.isna(date):
	return None
	elif date < pd.Timestamp('2012-11-28'):
	return 50
	elif date < pd.Timestamp('2016-07-09'):
	return 25
	elif date < pd.Timestamp('2020-05-11'):
	return 12.5
	elif date < pd.Timestamp('2024-04-20'):
	return 6.25
	else:
	return 3.125


	def get_days_since_halving(date):
	"""Calculate days since most recent halving"""
	halving_dates = [
	pd.Timestamp('2012-11-28'),
	pd.Timestamp('2016-07-09'),
	pd.Timestamp('2020-05-11'),
	pd.Timestamp('2024-04-20'),
	]

	recent_halving = None
	for halving in halving_dates:
	if date >= halving:
	recent_halving = halving

	if recent_halving is None:
	return 0

	return (date - recent_halving).days


	if __name__ == "__main__":
	print("Testing blockchain data loading...\n")

	# Test 1: Load complete data
	print("="*80)
	print("TEST 1: Load complete blockchain data")
	print("="*80)
	df_complete = load_complete_blockchain_data()

	if df_complete is not None:
	print(f"\n✅ Successfully loaded {len(df_complete):,} rows")
	print(f" Date range: {df_complete['date'].min().date()} to {df_complete['date'].max().date()}")
	print(f" Columns: {list(df_complete.columns)}")
	else:
	print("\n⚠️ Complete data not available")

	# Test 2: Get data for specific date
	print("\n" + "="*80)
	print("TEST 2: Get data for specific date (2021-06-01)")
	print("="*80)
	df_2021 = get_blockchain_data_for_date('2021-06-01', window_size=30)

	if df_2021 is not None:
	print(f"\n✅ Got {len(df_2021)} days")
	print(f" Date range: {df_2021['date'].min().date()} to {df_2021['date'].max().date()}")
	print(f" Bitcoin price on 2021-06-01: ${df_2021[df_2021['date'].dt.date == pd.to_datetime('2021-06-01').date()]['bitcoin_price'].values[0]:,.2f}")

	# Test 3: Get latest 90 days
	print("\n" + "="*80)
	print("TEST 3: Get latest 90 days")
	print("="*80)
	df_latest = get_latest_blockchain_data(days=90)

	if df_latest is not None:
	print(f"\n✅ Got {len(df_latest)} days")
	print(f" Date range: {df_latest['date'].min().date()} to {df_latest['date'].max().date()}")

	print("\n" + "="*80)
	print("✅ ALL TESTS PASSED")
	print("="*80)