sithuWiki commited on
Commit
f481275
Β·
verified Β·
1 Parent(s): b0a6f60

upload 7 .py files

Browse files
Files changed (6) hide show
  1. electricity_prices.py +129 -0
  2. fetch_asic_prices.py +291 -0
  3. miner_specs.py +34 -0
  4. model.py +96 -0
  5. predictor.py +91 -0
  6. preprocessing.py +206 -0
electricity_prices.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # electricity_prices.py
2
+
3
+ import os
4
+ from datetime import date as Date
5
+ from typing import Dict, Optional
6
+
7
+ import pandas as pd
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ # ---------------------------------------------------------------------------
11
+ # Configuration
12
+ # ---------------------------------------------------------------------------
13
+
14
+ # Private dataset repo on Hugging Face containing the CSV files
15
+ HF_DATASET_REPO = "sithuWiki/electricity"
16
+ HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets
17
+
18
+ # Fallback / base rates used when a date is outside the CSV range
19
+ BASE_ELECTRICITY_RATES: Dict[str, float] = {
20
+ "texas": 0.1549,
21
+ "china": 0.08,
22
+ "ethiopia": 0.01,
23
+ }
24
+
25
+ # Mapping from region name -> CSV filename in the private dataset
26
+ REGION_FILES: Dict[str, str] = {
27
+ "texas": "texas_residential_daily_df.csv",
28
+ "china": "china_electricity_prices_daily.csv",
29
+ "ethiopia": "ethiopia_electricity_prices_daily.csv",
30
+ }
31
+
32
+ # In-memory cache: region -> pandas.Series indexed by python date with float prices
33
+ _ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {}
34
+
35
+
36
+ def _get_token() -> str:
37
+ token = os.environ.get(HF_DATASET_TOKEN_ENV)
38
+ if not token:
39
+ raise RuntimeError(
40
+ f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. "
41
+ "Add a read token for the private dataset to your Space secrets."
42
+ )
43
+ return token
44
+
45
+
46
+ def _load_region_series(region: str, filename: str) -> Optional[pd.Series]:
47
+ """
48
+ Load a single region's CSV from the private HF dataset as a Series.
49
+
50
+ Expected columns in CSV:
51
+ - 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15')
52
+ - 'price' (electricity price per kWh)
53
+ """
54
+ try:
55
+ token = _get_token()
56
+ file_path = hf_hub_download(
57
+ repo_id=HF_DATASET_REPO,
58
+ filename=filename,
59
+ repo_type="dataset",
60
+ token=token,
61
+ )
62
+ df = pd.read_csv(file_path)
63
+
64
+ if "date" not in df.columns or "price" not in df.columns:
65
+ raise ValueError(f"{filename} must contain 'date' and 'price' columns.")
66
+
67
+ # Normalize date to python date objects
68
+ df["date"] = pd.to_datetime(df["date"]).dt.date
69
+ df = df[["date", "price"]].copy()
70
+ df = df.sort_values("date")
71
+ series = df.set_index("date")["price"].astype(float)
72
+ return series
73
+ except Exception as e:
74
+ print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}")
75
+ return None
76
+
77
+
78
+ # Load all regions at import time (one-time cost)
79
+ for _region, _fname in REGION_FILES.items():
80
+ _ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname)
81
+
82
+
83
+ def get_electricity_rate(region: str, d) -> float:
84
+ """
85
+ Return the electricity rate (USD/kWh) for a given region and date.
86
+
87
+ - If d is inside the CSV range, we use that day's price (or last available
88
+ before d, to handle gaps).
89
+ - If d is outside the CSV range or data is missing, we fall back to
90
+ BASE_ELECTRICITY_RATES[region].
91
+ """
92
+ if region not in BASE_ELECTRICITY_RATES:
93
+ raise ValueError(
94
+ f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}"
95
+ )
96
+
97
+ # Normalise input date
98
+ if isinstance(d, pd.Timestamp):
99
+ d = d.date()
100
+ elif isinstance(d, str):
101
+ d = pd.to_datetime(d).date()
102
+ elif isinstance(d, Date):
103
+ pass # already ok
104
+ else:
105
+ raise TypeError(
106
+ f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str"
107
+ )
108
+
109
+ base_rate = BASE_ELECTRICITY_RATES[region]
110
+ series = _ELECTRICITY_SERIES.get(region)
111
+
112
+ if series is None or series.empty:
113
+ return base_rate
114
+
115
+ idx = series.index
116
+
117
+ # Outside known range β†’ use base constant rate
118
+ if d < idx[0] or d > idx[-1]:
119
+ return base_rate
120
+
121
+ # Exact match
122
+ if d in series.index:
123
+ return float(series.loc[d])
124
+
125
+ # Otherwise, use the last available price before this date
126
+ prev = series.loc[:d]
127
+ if prev.empty:
128
+ return base_rate
129
+ return float(prev.iloc[-1])
fetch_asic_prices.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fetch ASIC miner prices from Hashrate Index API
3
+ Downloads price indices for efficiency categories and calculates individual miner prices
4
+ """
5
+
6
+ import requests
7
+ import pandas as pd
8
+ import os
9
+ from datetime import datetime
10
+ from miner_specs import MINER_SPECS
11
+
12
+
13
+ # Efficiency category mapping for each miner
14
+ EFFICIENCY_MAPPING = {
15
+ 's19pro': '25to38', # 30 W/TH
16
+ 's19jpro': '25to38', # 30 W/TH
17
+ 's19kpro': '19to25', # 23 W/TH
18
+ 's21': 'under19', # 18 W/TH
19
+ 'ka3': 'under19', # 19 W/TH
20
+ 't19': '25to38', # 38 W/TH
21
+ 's19xp': '19to25', # 21 W/TH
22
+ 's19apro': '25to38', # 31 W/TH
23
+ 'm50s': '19to25', # 24 W/TH
24
+ 'm53': '25to38' , # 29 W/TH
25
+ 'm30s':'25to38' # 31 W/TH
26
+ }
27
+
28
+
29
+ def fetch_asic_price_for_date(target_date, api_key=None, currency='USD'):
30
+ """
31
+ Fetch ASIC price for a specific historical date
32
+
33
+ Parameters:
34
+ -----------
35
+ target_date : str or datetime
36
+ Target date in format 'YYYY-MM-DD' or datetime object
37
+ api_key : str, optional
38
+ Hashrate Index API key
39
+ currency : str
40
+ Currency (default: USD)
41
+
42
+ Returns:
43
+ --------
44
+ tuple: (dict of miner_name -> price, bool indicating if data exists)
45
+ """
46
+
47
+ # Get API key from environment if not provided
48
+ if not api_key:
49
+ api_key = os.environ.get('HASHRATE_API_KEY')
50
+
51
+ if not api_key:
52
+ print("⚠️ No API key found, using fallback prices")
53
+ return get_fallback_prices(), False
54
+
55
+ # Convert to datetime if string
56
+ if isinstance(target_date, str):
57
+ target_date = pd.to_datetime(target_date)
58
+
59
+ url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
60
+
61
+ headers = {
62
+ "Accept": "application/json",
63
+ "X-Hi-Api-Key": api_key
64
+ }
65
+
66
+ params = {
67
+ "currency": currency,
68
+ "span": "ALL" # Get all historical data
69
+ }
70
+
71
+ try:
72
+ response = requests.get(url, headers=headers, params=params, timeout=30)
73
+ response.raise_for_status()
74
+
75
+ data = response.json().get("data", [])
76
+
77
+ if not data:
78
+ print("⚠️ No data returned from API")
79
+ return get_fallback_prices(), False
80
+
81
+ # Create DataFrame
82
+ df = pd.DataFrame(data)
83
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
84
+ df['date'] = df['timestamp'].dt.date
85
+
86
+ # Find closest date
87
+ target_date_only = target_date.date()
88
+ df['date_diff'] = abs((df['timestamp'].dt.date - target_date_only).apply(lambda x: x.days))
89
+
90
+ # Get row with closest date (within 7 days tolerance)
91
+ closest_row = df.loc[df['date_diff'].idxmin()]
92
+
93
+ if closest_row['date_diff'] > 7:
94
+ print(f"⚠️ No price data within 7 days of {target_date_only}")
95
+ return get_fallback_prices(), False
96
+
97
+ # Extract efficiency prices
98
+ efficiency_prices = {
99
+ 'under19': closest_row.get('under19', None),
100
+ '19to25': closest_row.get('19to25', None),
101
+ '25to38': closest_row.get('25to38', None)
102
+ }
103
+
104
+ print(f"βœ… Found price data for {closest_row['date']} (requested: {target_date_only})")
105
+ print(f" under19: ${efficiency_prices['under19']}/TH")
106
+ print(f" 19to25: ${efficiency_prices['19to25']}/TH")
107
+ print(f" 25to38: ${efficiency_prices['25to38']}/TH")
108
+
109
+ # Calculate miner prices
110
+ miner_prices = {}
111
+
112
+ for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
113
+ if miner_name in MINER_SPECS:
114
+ hashrate = MINER_SPECS[miner_name]['hashrate']
115
+ price_per_th = efficiency_prices.get(efficiency_cat)
116
+
117
+ if price_per_th and price_per_th > 0:
118
+ miner_prices[miner_name] = hashrate * price_per_th
119
+ else:
120
+ miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
121
+
122
+ return miner_prices, True
123
+
124
+ except requests.exceptions.RequestException as e:
125
+ print(f"⚠️ API request failed: {e}")
126
+ return get_fallback_prices(), False
127
+ except Exception as e:
128
+ print(f"⚠️ Error: {e}")
129
+ return get_fallback_prices(), False
130
+
131
+
132
+ def fetch_asic_price_index(api_key=None, currency='USD'):
133
+ """
134
+ Fetch ASIC price index from Hashrate Index API
135
+
136
+ Parameters:
137
+ -----------
138
+ api_key : str, optional
139
+ Hashrate Index API key (if None, returns fallback prices)
140
+ currency : str
141
+ Currency (default: USD)
142
+
143
+ Returns:
144
+ --------
145
+ dict
146
+ Dictionary of miner_name -> price (USD)
147
+ """
148
+
149
+ if not api_key:
150
+ api_key = os.environ.get('HASHRATE_API_KEY')
151
+
152
+ if not api_key:
153
+ print("⚠️ No API key provided, using fallback prices")
154
+ return get_fallback_prices()
155
+
156
+ url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
157
+
158
+ headers = {
159
+ "Accept": "application/json",
160
+ "X-Hi-Api-Key": api_key
161
+ }
162
+
163
+ params = {
164
+ "currency": currency,
165
+ "span": "1Y" # Last year of data
166
+ }
167
+
168
+ try:
169
+ response = requests.get(url, headers=headers, params=params, timeout=30)
170
+ response.raise_for_status()
171
+
172
+ data = response.json().get("data", [])
173
+
174
+ if not data:
175
+ print("⚠️ No data returned from API, using fallback prices")
176
+ return get_fallback_prices()
177
+
178
+ # Get most recent data
179
+ df = pd.DataFrame(data)
180
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
181
+ df = df.sort_values('timestamp', ascending=False)
182
+
183
+ # Get latest row
184
+ latest = df.iloc[0]
185
+
186
+ # Extract efficiency category prices ($/TH)
187
+ efficiency_prices = {
188
+ 'under19': latest.get('under19', None),
189
+ '19to25': latest.get('19to25', None),
190
+ '25to38': latest.get('25to38', None)
191
+ }
192
+
193
+ print(f"βœ… Fetched price index (date: {latest['timestamp'].date()})")
194
+ print(f" under19: ${efficiency_prices['under19']}/TH")
195
+ print(f" 19to25: ${efficiency_prices['19to25']}/TH")
196
+ print(f" 25to38: ${efficiency_prices['25to38']}/TH")
197
+
198
+ # Calculate individual miner prices
199
+ miner_prices = {}
200
+
201
+ for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
202
+ if miner_name in MINER_SPECS:
203
+ hashrate = MINER_SPECS[miner_name]['hashrate']
204
+ price_per_th = efficiency_prices.get(efficiency_cat)
205
+
206
+ if price_per_th:
207
+ miner_prices[miner_name] = hashrate * price_per_th
208
+ else:
209
+ # Fallback if category not available
210
+ miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
211
+
212
+ return miner_prices
213
+
214
+ except requests.exceptions.RequestException as e:
215
+ print(f"⚠️ API request failed: {e}")
216
+ print(" Using fallback prices")
217
+ return get_fallback_prices()
218
+ except Exception as e:
219
+ print(f"⚠️ Error processing API data: {e}")
220
+ print(" Using fallback prices")
221
+ return get_fallback_prices()
222
+
223
+
224
+ def get_fallback_prices():
225
+ """
226
+ Fallback prices when API is unavailable
227
+ Based on approximate market prices as of Dec 2024
228
+
229
+ Returns:
230
+ --------
231
+ dict
232
+ Dictionary of miner_name -> price (USD)
233
+ """
234
+
235
+ return {
236
+ 's19pro': 2500,
237
+ 's19jpro': 2200,
238
+ 's19kpro': 3500,
239
+ 's21': 5500,
240
+ 'ka3': 5000,
241
+ 't19': 2000,
242
+ 's19xp': 4000,
243
+ 's19apro': 2800,
244
+ 'm50s': 3200,
245
+ 'm53': 8000
246
+ }
247
+
248
+
249
+ def get_fallback_price_for_miner(miner_name):
250
+ """Get fallback price for a specific miner"""
251
+ fallback = get_fallback_prices()
252
+ return fallback.get(miner_name, 2500)
253
+
254
+
255
+ # For backward compatibility with your existing code
256
+ FALLBACK_PRICES = get_fallback_prices()
257
+
258
+
259
+ if __name__ == "__main__":
260
+ print("\n" + "="*60)
261
+ print("ASIC Price Fetcher Test")
262
+ print("="*60 + "\n")
263
+
264
+ # Try to get API key from environment variable
265
+ api_key = os.environ.get('HASHRATE_API_KEY')
266
+
267
+ if api_key:
268
+ print(f"Using API key: {api_key[:8]}...")
269
+ else:
270
+ print("No API key found in HASHRATE_API_KEY environment variable")
271
+ print("Set it with: export HASHRATE_API_KEY='your-key-here'")
272
+
273
+ # Fetch prices
274
+ prices = fetch_asic_price_index(api_key)
275
+
276
+ print("\n" + "="*60)
277
+ print("Current Miner Prices")
278
+ print("="*60)
279
+
280
+ for miner_name, price in sorted(prices.items()):
281
+ specs = MINER_SPECS.get(miner_name, {})
282
+ full_name = specs.get('full_name', miner_name)
283
+ hashrate = specs.get('hashrate', 0)
284
+ efficiency = specs.get('efficiency', 0)
285
+ efficiency_cat = EFFICIENCY_MAPPING.get(miner_name, 'unknown')
286
+
287
+ print(f"{full_name:25s} ({hashrate:3.0f} TH/s, {efficiency:4.1f} W/TH)")
288
+ print(f" Category: {efficiency_cat:15s} β†’ Price: ${price:,.2f}")
289
+ print()
290
+
291
+ print("="*60)
miner_specs.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Miner specifications and electricity rates"""
2
+
3
+ # 's9': {'hashrate': 13.5, 'power': 1323, 'efficiency': 98, 'release_date': '2016-07-01', 'full_name': 'Antminer S9'},
4
+ # 's15': {'hashrate': 28, 'power': 1596, 'efficiency': 57, 'release_date': '2018-12-01', 'full_name': 'Antminer S15'},
5
+ # 's17pro': {'hashrate': 50, 'power': 1975, 'efficiency': 40, 'release_date': '2019-04-09', 'full_name': 'Antminer S17 Pro'},
6
+ # 'M32': {'hashrate': 62, 'power': 3348, 'efficiency': 54, 'release_date': '2019-09-01', 'full_name': 'WhatsMiner M32'},
7
+ # 's7': {'hashrate': 4.73, 'power': 1293, 'efficiency': 273, 'release_date': '2015-09-01', 'full_name': 'Antminer S7'},
8
+ # 't17': {'hashrate': 40, 'power': 2200, 'efficiency': 55, 'release_date': '2019-05-01', 'full_name': 'Antminer T17'},
9
+ # 'm21s': {'hashrate': 56, 'power': 3360, 'efficiency': 60, 'release_date': '2019-04-01', 'full_name': 'WhatsMiner M21S'},
10
+ # 'm10s': {'hashrate': 55, 'power': 3500, 'efficiency': 64, 'release_date': '2019-11-01', 'full_name': 'WhatsMiner M10S'},
11
+ # 'r4': {'hashrate': 8.7, 'power': 845, 'efficiency': 97, 'release_date': '2017-02-01', 'full_name': 'Antminer R4'},
12
+
13
+ MINER_SPECS = {
14
+ 's19pro': {'hashrate': 110, 'power': 3250, 'efficiency': 30, 'release_date': '2020-05-01', 'full_name': 'Antminer S19 Pro'},
15
+ 's19jpro': {'hashrate': 100, 'power': 2950, 'efficiency': 30, 'release_date': '2021-06-01', 'full_name': 'Antminer S19j Pro'},
16
+ 's19kpro': {'hashrate': 120, 'power': 2760, 'efficiency': 23, 'release_date': '2023-04-02', 'full_name': 'Antminer S19k Pro'},
17
+ 's21': {'hashrate': 200, 'power': 3500, 'efficiency': 18, 'release_date': '2023-08-14', 'full_name': 'Antminer S21'},
18
+ 'ka3': {'hashrate': 166, 'power': 3154, 'efficiency': 19, 'release_date': '2022-09-01', 'full_name': 'AvalonMiner KA3'},
19
+ 't19': {'hashrate': 84, 'power': 3344, 'efficiency': 38, 'release_date': '2020-08-01', 'full_name': 'Antminer T19'},
20
+ 's19xp': {'hashrate': 141, 'power': 3031, 'efficiency': 21, 'release_date': '2021-11-11', 'full_name': 'Antminer S19 XP'},
21
+ 's19apro': {'hashrate': 104, 'power': 3250, 'efficiency': 31, 'release_date': '2021-11-01', 'full_name': 'Antminer S19a Pro'},
22
+ 'm50s': {'hashrate': 136, 'power': 3264, 'efficiency': 24, 'release_date': '2022-12-01', 'full_name': 'WhatsMiner M50S'},
23
+ 'm53': {'hashrate': 226, 'power': 6554, 'efficiency': 29, 'release_date': '2021-04-02', 'full_name': 'WhatsMiner M53'},
24
+ 'm30s': {'hashrate': 112, 'power': 3472, 'efficiency': 31, 'release_date': '2020-10-01', 'full_name': 'WhatsMiner M30S'}
25
+ }
26
+
27
+ ELECTRICITY_RATES = {
28
+ 'texas': 0.1549,
29
+ 'china': 0.08,
30
+ 'ethiopia': 0.01
31
+ }
32
+
33
+ def get_miner_list():
34
+ return list(MINER_SPECS.keys())
model.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+
5
+
6
+ class SpectralFeatureExtractor(nn.Module):
7
+ def __init__(self, num_features):
8
+ super().__init__()
9
+ self.complex_weight = nn.Parameter(torch.randn(num_features, 2, dtype=torch.float32) * 0.02)
10
+
11
+ def forward(self, x):
12
+ B, L, C = x.shape
13
+ x = x.transpose(1, 2)
14
+ x_fft = torch.fft.rfft(x, dim=2, norm="ortho")
15
+ weight = torch.view_as_complex(self.complex_weight)
16
+ x_weighted = x_fft * weight.unsqueeze(0).unsqueeze(-1)
17
+ x_out = torch.fft.irfft(x_weighted, n=L, dim=2, norm="ortho")
18
+ return x_out.transpose(1, 2)
19
+
20
+
21
+ class ChannelMixing(nn.Module):
22
+ def __init__(self, num_features, reduction=4):
23
+ super().__init__()
24
+ self.fc1 = nn.Linear(num_features, num_features // reduction)
25
+ self.fc2 = nn.Linear(num_features // reduction, num_features)
26
+ self.act = nn.GELU()
27
+
28
+ def forward(self, x):
29
+ identity = x
30
+ x_pooled = x.mean(dim=1)
31
+ x_weighted = self.fc2(self.act(self.fc1(x_pooled)))
32
+ out = identity * x_weighted.unsqueeze(1)
33
+ return out
34
+
35
+
36
+ class PositionalEncoding(nn.Module):
37
+ def __init__(self, d_model, max_len=5000, dropout=0.2):
38
+ super().__init__()
39
+ self.dropout = nn.Dropout(p=dropout)
40
+
41
+ pe = torch.zeros(max_len, d_model)
42
+ position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
43
+ div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
44
+
45
+ pe[:, 0::2] = torch.sin(position * div_term)
46
+ pe[:, 1::2] = torch.cos(position * div_term)
47
+ pe = pe.unsqueeze(0)
48
+
49
+ self.register_buffer("pe", pe)
50
+
51
+ def forward(self, x):
52
+ x = x + self.pe[:, : x.size(1), :]
53
+ return self.dropout(x)
54
+
55
+
56
+ class MineROINet(nn.Module):
57
+ def __init__(self, input_dim, d_model=64, nhead=2, num_layers=2, dim_feedforward=256, dropout=0.2, num_classes=3, seq_len=30):
58
+ super().__init__()
59
+
60
+ self.spectral = SpectralFeatureExtractor(input_dim)
61
+ self.channel_mix = ChannelMixing(input_dim)
62
+ self.input_projection = nn.Linear(input_dim, d_model) if input_dim != d_model else nn.Identity()
63
+ self.pos_encoder = PositionalEncoding(d_model, max_len=seq_len, dropout=dropout)
64
+
65
+ encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
66
+ dropout=dropout, activation="gelu", batch_first=True)
67
+ self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
68
+
69
+ self.classifier = nn.Sequential(
70
+ nn.LayerNorm(d_model),
71
+ nn.Dropout(dropout),
72
+ nn.Linear(d_model, d_model // 2),
73
+ nn.GELU(),
74
+ nn.Dropout(dropout),
75
+ nn.Linear(d_model // 2, num_classes),
76
+ )
77
+
78
+ def forward(self, seq):
79
+ seq = self.spectral(seq)
80
+ seq = self.channel_mix(seq)
81
+ seq = self.input_projection(seq)
82
+ seq = self.pos_encoder(seq)
83
+ z = self.transformer_encoder(seq)
84
+ pooled = z.mean(dim=1)
85
+ out = self.classifier(pooled)
86
+ return out
87
+
88
+
89
+ def create_model_30day(input_dim, num_classes=3):
90
+ return MineROINet(input_dim=input_dim, d_model=64, nhead=2, num_layers=2,
91
+ dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=30)
92
+
93
+
94
+ # def create_model_60day(input_dim, num_classes=3):
95
+ # return MineROINet(input_dim=input_dim, d_model=64, nhead=4, num_layers=2,
96
+ # dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=60)
predictor.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # predictor.py
2
+ import os
3
+ import torch
4
+ import numpy as np
5
+ import joblib
6
+ from model import create_model_30day
7
+
8
+
9
+ SCALER_PATHS = {
10
+ "texas": "scaler_texas.joblib",
11
+ "china": "scaler_china.joblib",
12
+ "ethiopia": "scaler_ethiopia.joblib",
13
+ }
14
+
15
+
16
+ class MineROIPredictor:
17
+ def __init__(self, model_path, device=None):
18
+ self.window_size = 30
19
+ self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ self.class_names = [
21
+ 'Unprofitable (ROI ≀ 0)',
22
+ 'Marginal (0 < ROI < 1)',
23
+ 'Profitable (ROI β‰₯ 1)'
24
+ ]
25
+
26
+ # βœ… Load all scalers that were used in preprocessing
27
+ self.scalers = {}
28
+ for region, path in SCALER_PATHS.items():
29
+ if not os.path.exists(path):
30
+ raise FileNotFoundError(f"Scaler not found for {region}: {path}")
31
+ self.scalers[region] = joblib.load(path)
32
+
33
+ # Load model weights
34
+ state_dict = torch.load(model_path, map_location=self.device)
35
+
36
+ # Infer input_dim from spectral layer weights
37
+ # (works because you saved the full state_dict from training)
38
+ self.input_dim = state_dict['spectral.complex_weight'].shape[0]
39
+
40
+ # Build model with same hyperparams used in training
41
+ self.model = create_model_30day(self.input_dim)
42
+ self.model.load_state_dict(state_dict)
43
+ self.model.to(self.device)
44
+ self.model.eval()
45
+
46
+ def normalize_sequence(self, sequence: np.ndarray, region: str) -> np.ndarray:
47
+ """
48
+ sequence: shape (L, C)
49
+ region: 'texas', 'china', or 'ethiopia'
50
+ """
51
+ if region not in self.scalers:
52
+ raise ValueError(f"Unknown region '{region}'. Expected one of {list(self.scalers.keys())}")
53
+
54
+ scaler = self.scalers[region]
55
+
56
+ original_shape = sequence.shape # (L, C)
57
+ seq_2d = sequence.reshape(-1, original_shape[-1])
58
+
59
+ # βœ… Only transform, never fit here
60
+ seq_scaled = scaler.transform(seq_2d)
61
+
62
+ return seq_scaled.reshape(original_shape)
63
+
64
+ def predict(self, sequence: np.ndarray, region: str):
65
+ """
66
+ sequence: np.ndarray of shape (L, C) with *raw* features (same as training CSV)
67
+ region: which country scaler to use
68
+ """
69
+ # 1) scale using the correct country’s scaler
70
+ sequence = self.normalize_sequence(sequence, region)
71
+
72
+ # 2) to torch: [B, C, L]
73
+ seq_tensor = torch.from_numpy(sequence).float().unsqueeze(0).to(self.device) # (1, L, C)
74
+
75
+ with torch.no_grad():
76
+ logits = self.model(seq_tensor)
77
+ probabilities = torch.softmax(logits, dim=1)
78
+ predicted_class = torch.argmax(probabilities, dim=1).item()
79
+
80
+ probs = probabilities.cpu().numpy()[0]
81
+
82
+ return {
83
+ "predicted_class": predicted_class,
84
+ "predicted_label": self.class_names[predicted_class],
85
+ "probabilities": {
86
+ "unprofitable": float(probs[0]),
87
+ "marginal": float(probs[1]),
88
+ "profitable": float(probs[2]),
89
+ },
90
+ "confidence": float(probs[predicted_class]),
91
+ }
preprocessing.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data preprocessing and feature engineering"""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from miner_specs import MINER_SPECS, ELECTRICITY_RATES
6
+ from fetch_blockchain_data import get_days_since_halving
7
+ from electricity_prices import get_electricity_rate
8
+
9
+
10
+ def engineer_features(blockchain_df):
11
+ """Engineer features from blockchain data - keep it simple"""
12
+
13
+ df = blockchain_df.copy().sort_values('date').reset_index(drop=True)
14
+ df['date'] = pd.to_datetime(df['date'])
15
+
16
+ # Just return as is, we'll select features later
17
+ return df
18
+
19
+
20
+ def prepare_miner_features(blockchain_df, miner_name, miner_price, region='texas'):
21
+ """Add miner-specific features - EXACTLY 14 features"""
22
+
23
+ df = blockchain_df.copy()
24
+ specs = MINER_SPECS[miner_name]
25
+
26
+ # Keep only these columns from blockchain data
27
+ df = df[['date', 'bitcoin_price', 'difficulty', 'fees', 'hashrate', 'revenue', 'block_reward']].copy()
28
+ df['date'] = pd.to_datetime(df['date'])
29
+
30
+ # Add miner features
31
+ df['machine_price'] = miner_price
32
+ df['machine_hashrate'] = specs['hashrate']
33
+ df['power'] = specs['power']
34
+ df['efficiency'] = specs['efficiency']
35
+
36
+ # Calculate age_days (days since miner was released)
37
+ release_date = pd.to_datetime(specs['release_date'])
38
+ df['age_days'] = (df['date'] - release_date).dt.days
39
+
40
+ # Days since halving
41
+ df['days_since_halving'] = df['date'].apply(get_days_since_halving)
42
+
43
+ # Revenue potential
44
+ hashrate_hs = df['machine_hashrate'] * 1e12
45
+ btc_per_day = (hashrate_hs * 86400) / (df['difficulty'] * (2**32)) * (df['block_reward'] + (df['fees']/144))
46
+ df['Revenue_Potential'] = btc_per_day * df['bitcoin_price']
47
+
48
+ # Electricity rate
49
+ # df['electricity_rate'] = ELECTRICITY_RATES.get(region, 0.10)
50
+ df['electricity_rate'] = df['date'].dt.date.apply(
51
+ lambda day: get_electricity_rate(region, day)
52
+ )
53
+
54
+ return df
55
+
56
+
57
+ def get_latest_sequence(blockchain_df, miner_name, miner_price, region='texas', window_size=30):
58
+ """Get the most recent sequence for prediction - EXACTLY 14 features in CORRECT ORDER"""
59
+
60
+ df_features = engineer_features(blockchain_df)
61
+ df_miner = prepare_miner_features(df_features, miner_name, miner_price, region)
62
+
63
+ # CRITICAL: This order MUST match your training data CSV exactly!
64
+ # Your training CSV: bitcoin_price,difficulty,fees,hashrate,revenue,machine_price,machine_hashrate,power,efficiency,block_reward,age_days,days_since_halving,Revenue_Potential,electricity_rate
65
+ feature_cols = [
66
+ 'bitcoin_price', # 1
67
+ 'difficulty', # 2
68
+ 'fees', # 3
69
+ 'hashrate', # 4
70
+ 'revenue', # 5
71
+ 'machine_price', # 6
72
+ 'machine_hashrate', # 7
73
+ 'power', # 8
74
+ 'efficiency', # 9
75
+ 'block_reward', # 10
76
+ 'age_days', # 11
77
+ 'days_since_halving',# 12
78
+ 'Revenue_Potential', # 13
79
+ 'electricity_rate' # 14
80
+ ]
81
+
82
+ df_miner = df_miner.dropna().reset_index(drop=True)
83
+
84
+ if len(df_miner) < window_size:
85
+ raise ValueError(f"Not enough data: need {window_size} days, have {len(df_miner)}")
86
+
87
+ # Get last window_size days with exactly 14 features
88
+ sequence = df_miner[feature_cols].values[-window_size:]
89
+ latest_date = df_miner['date'].iloc[-1]
90
+
91
+ # Verify shape
92
+ if sequence.shape[1] != 14:
93
+ raise ValueError(f"Expected 14 features, got {sequence.shape[1]}")
94
+
95
+ return sequence, feature_cols, latest_date
96
+
97
+
98
+
99
+ if __name__ == "__main__":
100
+ from fetch_blockchain_data import get_latest_blockchain_data
101
+
102
+ print("\n" + "="*80)
103
+ print("TESTING PREPROCESSING PIPELINE")
104
+ print("="*80 + "\n")
105
+
106
+ # Fetch blockchain data
107
+ print("πŸ“‘ Fetching blockchain data...")
108
+ blockchain_df = get_latest_blockchain_data(days=90)
109
+
110
+ if blockchain_df is None:
111
+ print("❌ Failed to fetch blockchain data")
112
+ exit(1)
113
+
114
+ print(f"βœ… Fetched {len(blockchain_df)} days of data\n")
115
+
116
+ # Test configuration
117
+ miner_name = 's19pro'
118
+ miner_price = 2500
119
+ region = 'texas'
120
+ window_size = 30
121
+
122
+ print("βš™οΈ Test Configuration:")
123
+ print(f" Miner: {MINER_SPECS[miner_name]['full_name']}")
124
+ print(f" Price: ${miner_price:,}")
125
+ print(f" Region: {region.title()}")
126
+ print(f" Window: {window_size} days")
127
+ print(f" Electricity: ${ELECTRICITY_RATES[region]}/kWh\n")
128
+
129
+ # Step 1: Engineer features
130
+ print("="*80)
131
+ print("STEP 1: ENGINEER FEATURES")
132
+ print("="*80)
133
+ df_engineered = engineer_features(blockchain_df)
134
+ print(f"βœ… Engineered features")
135
+ print(f" Shape: {df_engineered.shape}")
136
+ print(f" Columns: {list(df_engineered.columns)}\n")
137
+ print("First 3 rows:")
138
+ print(df_engineered.head(3))
139
+ print("\nLast 3 rows:")
140
+ print(df_engineered.tail(3))
141
+
142
+ # Step 2: Prepare miner features
143
+ print("\n" + "="*80)
144
+ print("STEP 2: PREPARE MINER FEATURES")
145
+ print("="*80)
146
+ df_miner = prepare_miner_features(df_engineered, miner_name, miner_price, region)
147
+ print(f"βœ… Added miner-specific features")
148
+ print(f" Shape: {df_miner.shape}")
149
+ print(f" Columns: {list(df_miner.columns)}\n")
150
+
151
+ print("Miner-specific values (constant across all days):")
152
+ print(f" machine_hashrate: {df_miner['machine_hashrate'].iloc[0]} TH/s")
153
+ print(f" power: {df_miner['power'].iloc[0]} W")
154
+ print(f" efficiency: {df_miner['efficiency'].iloc[0]} W/TH")
155
+ print(f" machine_price: ${df_miner['machine_price'].iloc[0]:,.2f}")
156
+ print(f" electricity_rate: ${df_miner['electricity_rate'].iloc[0]:.4f}/kWh")
157
+
158
+ print("\nDynamic values (change over time):")
159
+ print(f" age_days: {df_miner['age_days'].iloc[0]} β†’ {df_miner['age_days'].iloc[-1]} days")
160
+ print(f" days_since_halving: {df_miner['days_since_halving'].iloc[0]} β†’ {df_miner['days_since_halving'].iloc[-1]} days")
161
+ print(f" Revenue_Potential: ${df_miner['Revenue_Potential'].iloc[0]:.2f} β†’ ${df_miner['Revenue_Potential'].iloc[-1]:.2f}/day")
162
+
163
+ print("\nFirst 3 rows:")
164
+ print(df_miner.head(3))
165
+ print("\nLast 3 rows:")
166
+ print(df_miner.tail(3))
167
+
168
+ # Step 3: Get latest sequence
169
+ print("\n" + "="*80)
170
+ print("STEP 3: GET LATEST SEQUENCE")
171
+ print("="*80)
172
+ sequence, feature_cols, latest_date = get_latest_sequence(blockchain_df, miner_name, miner_price, region, window_size)
173
+
174
+ print(f"βœ… Created sequence for model")
175
+ print(f" Shape: {sequence.shape}")
176
+ print(f" Expected: ({window_size}, 14)")
177
+ print(f" Latest date: {latest_date.strftime('%Y-%m-%d')}\n")
178
+
179
+ print("14 Features (in order):")
180
+ for i, col in enumerate(feature_cols, 1):
181
+ print(f" {i:2d}. {col:25s} β†’ First: {sequence[0, i-1]:>15.2f} Last: {sequence[-1, i-1]:>15.2f}")
182
+
183
+ print("\n" + "="*80)
184
+ print("SEQUENCE STATISTICS")
185
+ print("="*80)
186
+ print("\nFirst day in sequence:")
187
+ for i, col in enumerate(feature_cols):
188
+ print(f" {col:25s} = {sequence[0, i]:>15.2f}")
189
+
190
+ print(f"\nLast day in sequence (for prediction on {latest_date.strftime('%Y-%m-%d')}):")
191
+ for i, col in enumerate(feature_cols):
192
+ print(f" {col:25s} = {sequence[-1, i]:>15.2f}")
193
+
194
+ # Show some statistics
195
+ print("\n" + "="*80)
196
+ print("FEATURE RANGES")
197
+ print("="*80)
198
+ for i, col in enumerate(feature_cols):
199
+ min_val = sequence[:, i].min()
200
+ max_val = sequence[:, i].max()
201
+ mean_val = sequence[:, i].mean()
202
+ print(f"{col:25s} β†’ Min: {min_val:>12.2f} Max: {max_val:>12.2f} Mean: {mean_val:>12.2f}")
203
+
204
+ print("\n" + "="*80)
205
+ print("βœ… PREPROCESSING PIPELINE TEST COMPLETE")
206
+ print("="*80 + "\n")