sithuWiki commited on
Commit
b0a6f60
·
verified ·
1 Parent(s): 38ef0db

upload fetch_blockchain_data.py

Browse files
Files changed (1) hide show
  1. fetch_blockchain_data.py +256 -0
fetch_blockchain_data.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch blockchain data - supports both complete historical data and API fallback"""
2
+
3
+ import requests
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+ import os
7
+
8
+
9
+ # Global cache for complete blockchain data
10
+ _BLOCKCHAIN_DATA_CACHE = None
11
+ COMPLETE_DATA_FILE = 'blockchain_data_complete.csv'
12
+
13
+
14
+ def load_complete_blockchain_data(force_reload=False):
15
+ """
16
+ Load the complete blockchain data CSV (one-time load, then cached in memory)
17
+
18
+ Parameters:
19
+ -----------
20
+ force_reload : bool
21
+ Force reload from disk even if cached
22
+
23
+ Returns:
24
+ --------
25
+ pd.DataFrame or None
26
+ Complete blockchain data with columns: date, bitcoin_price, difficulty, fees, hashrate, revenue, block_reward, days_since_halving
27
+ """
28
+ global _BLOCKCHAIN_DATA_CACHE
29
+
30
+ # Return cached data if available
31
+ if _BLOCKCHAIN_DATA_CACHE is not None and not force_reload:
32
+ return _BLOCKCHAIN_DATA_CACHE
33
+
34
+ # Check if file exists
35
+ if not os.path.exists(COMPLETE_DATA_FILE):
36
+ print(f"\n⚠️ WARNING: {COMPLETE_DATA_FILE} not found!")
37
+ print(" Falling back to API (limited to recent data)...")
38
+ return None
39
+
40
+ # Load from CSV
41
+ print(f"📂 Loading complete blockchain data from {COMPLETE_DATA_FILE}...")
42
+ df = pd.read_csv(COMPLETE_DATA_FILE)
43
+ df['date'] = pd.to_datetime(df['date'])
44
+
45
+ # Cache it in memory
46
+ _BLOCKCHAIN_DATA_CACHE = df
47
+
48
+ print(f"✅ Loaded {len(df):,} rows of data")
49
+ print(f" Date range: {df['date'].min().date()} to {df['date'].max().date()}")
50
+
51
+ return df
52
+
53
+
54
+ def get_blockchain_data_for_date(target_date, window_size=30):
55
+ """
56
+ Get blockchain data for a specific date (includes window_size days before)
57
+
58
+ Parameters:
59
+ -----------
60
+ target_date : str or datetime
61
+ Target prediction date
62
+ window_size : int
63
+ Number of days needed before target_date (default: 30)
64
+
65
+ Returns:
66
+ --------
67
+ pd.DataFrame
68
+ Blockchain data from (target_date - window_size) to target_date
69
+ """
70
+
71
+ # Load complete data
72
+ complete_df = load_complete_blockchain_data()
73
+
74
+ if complete_df is None:
75
+ # Fallback to API
76
+ return get_latest_blockchain_data(days=90)
77
+
78
+ # Convert target_date to datetime
79
+ if isinstance(target_date, str):
80
+ target_date = pd.to_datetime(target_date)
81
+
82
+ # Calculate start date (need window_size days before target)
83
+ start_date = target_date - timedelta(days=window_size + 10) # +10 buffer for safety
84
+
85
+ # Filter to date range
86
+ mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= target_date)
87
+ filtered_df = complete_df[mask].copy().reset_index(drop=True)
88
+
89
+ if len(filtered_df) < window_size:
90
+ print(f"⚠️ WARNING: Not enough data for {target_date.date()}")
91
+ print(f" Need {window_size} days, got {len(filtered_df)}")
92
+
93
+ return filtered_df
94
+
95
+
96
+ def get_latest_blockchain_data(days=90):
97
+ """
98
+ Get the most recent N days of blockchain data
99
+ Compatible with original function signature
100
+
101
+ Parameters:
102
+ -----------
103
+ days : int
104
+ Number of days to fetch (from today backward)
105
+
106
+ Returns:
107
+ --------
108
+ pd.DataFrame
109
+ Blockchain data for the last N days
110
+ """
111
+
112
+ # Try to load complete data first
113
+ complete_df = load_complete_blockchain_data()
114
+
115
+ if complete_df is not None:
116
+ # Get last N days from complete data
117
+ end_date = complete_df['date'].max()
118
+ start_date = end_date - timedelta(days=days)
119
+
120
+ mask = (complete_df['date'] >= start_date) & (complete_df['date'] <= end_date)
121
+ filtered_df = complete_df[mask].copy().reset_index(drop=True)
122
+
123
+ return filtered_df
124
+ else:
125
+ # Fallback to API if complete data not available
126
+ print("📡 Falling back to API...")
127
+ return get_latest_blockchain_data_from_api(days)
128
+
129
+
130
+ def get_latest_blockchain_data_from_api(days=90):
131
+ """
132
+ Fallback: Fetch from API if complete data file not available
133
+ (Original implementation)
134
+ """
135
+
136
+ data_types = {
137
+ 'bitcoin_price': 'market-price',
138
+ 'difficulty': 'difficulty',
139
+ 'fees': 'transaction-fees',
140
+ 'hashrate': 'hash-rate',
141
+ 'revenue': 'miners-revenue'
142
+ }
143
+
144
+ timespan = f'{days}days'
145
+ all_data = {}
146
+
147
+ for name, chart_name in data_types.items():
148
+ url = f'https://api.blockchain.info/charts/{chart_name}'
149
+ params = {'timespan': timespan, 'format': 'json'}
150
+
151
+ try:
152
+ response = requests.get(url, params=params, timeout=30)
153
+ response.raise_for_status()
154
+ values = response.json().get('values', [])
155
+
156
+ df_temp = pd.DataFrame(values)
157
+ df_temp['x'] = pd.to_datetime(df_temp['x'], unit='s')
158
+ df_temp = df_temp.set_index('x').rename(columns={'y': name})
159
+ all_data[name] = df_temp
160
+ except Exception as e:
161
+ print(f"❌ Failed to fetch {name}: {e}")
162
+ return None
163
+
164
+ # Merge all
165
+ merged_df = all_data['bitcoin_price']
166
+ for name in ['difficulty', 'fees', 'hashrate', 'revenue']:
167
+ merged_df = merged_df.join(all_data[name], how='outer')
168
+
169
+ merged_df = merged_df.reset_index().rename(columns={'x': 'date'})
170
+ merged_df = merged_df.sort_values('date').reset_index(drop=True)
171
+
172
+ # Add block reward
173
+ merged_df['block_reward'] = merged_df['date'].apply(calculate_block_reward)
174
+
175
+ # Add days since halving
176
+ merged_df['days_since_halving'] = merged_df['date'].apply(get_days_since_halving)
177
+
178
+ return merged_df
179
+
180
+
181
+ def calculate_block_reward(date):
182
+ """Calculate block reward based on halving schedule"""
183
+ if pd.isna(date):
184
+ return None
185
+ elif date < pd.Timestamp('2012-11-28'):
186
+ return 50
187
+ elif date < pd.Timestamp('2016-07-09'):
188
+ return 25
189
+ elif date < pd.Timestamp('2020-05-11'):
190
+ return 12.5
191
+ elif date < pd.Timestamp('2024-04-20'):
192
+ return 6.25
193
+ else:
194
+ return 3.125
195
+
196
+
197
+ def get_days_since_halving(date):
198
+ """Calculate days since most recent halving"""
199
+ halving_dates = [
200
+ pd.Timestamp('2012-11-28'),
201
+ pd.Timestamp('2016-07-09'),
202
+ pd.Timestamp('2020-05-11'),
203
+ pd.Timestamp('2024-04-20'),
204
+ ]
205
+
206
+ recent_halving = None
207
+ for halving in halving_dates:
208
+ if date >= halving:
209
+ recent_halving = halving
210
+
211
+ if recent_halving is None:
212
+ return 0
213
+
214
+ return (date - recent_halving).days
215
+
216
+
217
+ if __name__ == "__main__":
218
+ print("Testing blockchain data loading...\n")
219
+
220
+ # Test 1: Load complete data
221
+ print("="*80)
222
+ print("TEST 1: Load complete blockchain data")
223
+ print("="*80)
224
+ df_complete = load_complete_blockchain_data()
225
+
226
+ if df_complete is not None:
227
+ print(f"\n✅ Successfully loaded {len(df_complete):,} rows")
228
+ print(f" Date range: {df_complete['date'].min().date()} to {df_complete['date'].max().date()}")
229
+ print(f" Columns: {list(df_complete.columns)}")
230
+ else:
231
+ print("\n⚠️ Complete data not available")
232
+
233
+ # Test 2: Get data for specific date
234
+ print("\n" + "="*80)
235
+ print("TEST 2: Get data for specific date (2021-06-01)")
236
+ print("="*80)
237
+ df_2021 = get_blockchain_data_for_date('2021-06-01', window_size=30)
238
+
239
+ if df_2021 is not None:
240
+ print(f"\n✅ Got {len(df_2021)} days")
241
+ print(f" Date range: {df_2021['date'].min().date()} to {df_2021['date'].max().date()}")
242
+ print(f" Bitcoin price on 2021-06-01: ${df_2021[df_2021['date'].dt.date == pd.to_datetime('2021-06-01').date()]['bitcoin_price'].values[0]:,.2f}")
243
+
244
+ # Test 3: Get latest 90 days
245
+ print("\n" + "="*80)
246
+ print("TEST 3: Get latest 90 days")
247
+ print("="*80)
248
+ df_latest = get_latest_blockchain_data(days=90)
249
+
250
+ if df_latest is not None:
251
+ print(f"\n✅ Got {len(df_latest)} days")
252
+ print(f" Date range: {df_latest['date'].min().date()} to {df_latest['date'].max().date()}")
253
+
254
+ print("\n" + "="*80)
255
+ print("✅ ALL TESTS PASSED")
256
+ print("="*80)