Spaces:
Running
Running
upload 7 .py files
Browse files- electricity_prices.py +129 -0
- fetch_asic_prices.py +291 -0
- miner_specs.py +34 -0
- model.py +96 -0
- predictor.py +91 -0
- preprocessing.py +206 -0
electricity_prices.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# electricity_prices.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from datetime import date as Date
|
| 5 |
+
from typing import Dict, Optional
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from huggingface_hub import hf_hub_download
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------------------------
|
| 11 |
+
# Configuration
|
| 12 |
+
# ---------------------------------------------------------------------------
|
| 13 |
+
|
| 14 |
+
# Private dataset repo on Hugging Face containing the CSV files
|
| 15 |
+
HF_DATASET_REPO = "sithuWiki/electricity"
|
| 16 |
+
HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets
|
| 17 |
+
|
| 18 |
+
# Fallback / base rates used when a date is outside the CSV range
|
| 19 |
+
BASE_ELECTRICITY_RATES: Dict[str, float] = {
|
| 20 |
+
"texas": 0.1549,
|
| 21 |
+
"china": 0.08,
|
| 22 |
+
"ethiopia": 0.01,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
# Mapping from region name -> CSV filename in the private dataset
|
| 26 |
+
REGION_FILES: Dict[str, str] = {
|
| 27 |
+
"texas": "texas_residential_daily_df.csv",
|
| 28 |
+
"china": "china_electricity_prices_daily.csv",
|
| 29 |
+
"ethiopia": "ethiopia_electricity_prices_daily.csv",
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# In-memory cache: region -> pandas.Series indexed by python date with float prices
|
| 33 |
+
_ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _get_token() -> str:
|
| 37 |
+
token = os.environ.get(HF_DATASET_TOKEN_ENV)
|
| 38 |
+
if not token:
|
| 39 |
+
raise RuntimeError(
|
| 40 |
+
f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. "
|
| 41 |
+
"Add a read token for the private dataset to your Space secrets."
|
| 42 |
+
)
|
| 43 |
+
return token
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _load_region_series(region: str, filename: str) -> Optional[pd.Series]:
|
| 47 |
+
"""
|
| 48 |
+
Load a single region's CSV from the private HF dataset as a Series.
|
| 49 |
+
|
| 50 |
+
Expected columns in CSV:
|
| 51 |
+
- 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15')
|
| 52 |
+
- 'price' (electricity price per kWh)
|
| 53 |
+
"""
|
| 54 |
+
try:
|
| 55 |
+
token = _get_token()
|
| 56 |
+
file_path = hf_hub_download(
|
| 57 |
+
repo_id=HF_DATASET_REPO,
|
| 58 |
+
filename=filename,
|
| 59 |
+
repo_type="dataset",
|
| 60 |
+
token=token,
|
| 61 |
+
)
|
| 62 |
+
df = pd.read_csv(file_path)
|
| 63 |
+
|
| 64 |
+
if "date" not in df.columns or "price" not in df.columns:
|
| 65 |
+
raise ValueError(f"{filename} must contain 'date' and 'price' columns.")
|
| 66 |
+
|
| 67 |
+
# Normalize date to python date objects
|
| 68 |
+
df["date"] = pd.to_datetime(df["date"]).dt.date
|
| 69 |
+
df = df[["date", "price"]].copy()
|
| 70 |
+
df = df.sort_values("date")
|
| 71 |
+
series = df.set_index("date")["price"].astype(float)
|
| 72 |
+
return series
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"β οΈ Could not load electricity data for {region} from {filename}: {e}")
|
| 75 |
+
return None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Load all regions at import time (one-time cost)
|
| 79 |
+
for _region, _fname in REGION_FILES.items():
|
| 80 |
+
_ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def get_electricity_rate(region: str, d) -> float:
|
| 84 |
+
"""
|
| 85 |
+
Return the electricity rate (USD/kWh) for a given region and date.
|
| 86 |
+
|
| 87 |
+
- If d is inside the CSV range, we use that day's price (or last available
|
| 88 |
+
before d, to handle gaps).
|
| 89 |
+
- If d is outside the CSV range or data is missing, we fall back to
|
| 90 |
+
BASE_ELECTRICITY_RATES[region].
|
| 91 |
+
"""
|
| 92 |
+
if region not in BASE_ELECTRICITY_RATES:
|
| 93 |
+
raise ValueError(
|
| 94 |
+
f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}"
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# Normalise input date
|
| 98 |
+
if isinstance(d, pd.Timestamp):
|
| 99 |
+
d = d.date()
|
| 100 |
+
elif isinstance(d, str):
|
| 101 |
+
d = pd.to_datetime(d).date()
|
| 102 |
+
elif isinstance(d, Date):
|
| 103 |
+
pass # already ok
|
| 104 |
+
else:
|
| 105 |
+
raise TypeError(
|
| 106 |
+
f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
base_rate = BASE_ELECTRICITY_RATES[region]
|
| 110 |
+
series = _ELECTRICITY_SERIES.get(region)
|
| 111 |
+
|
| 112 |
+
if series is None or series.empty:
|
| 113 |
+
return base_rate
|
| 114 |
+
|
| 115 |
+
idx = series.index
|
| 116 |
+
|
| 117 |
+
# Outside known range β use base constant rate
|
| 118 |
+
if d < idx[0] or d > idx[-1]:
|
| 119 |
+
return base_rate
|
| 120 |
+
|
| 121 |
+
# Exact match
|
| 122 |
+
if d in series.index:
|
| 123 |
+
return float(series.loc[d])
|
| 124 |
+
|
| 125 |
+
# Otherwise, use the last available price before this date
|
| 126 |
+
prev = series.loc[:d]
|
| 127 |
+
if prev.empty:
|
| 128 |
+
return base_rate
|
| 129 |
+
return float(prev.iloc[-1])
|
fetch_asic_prices.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fetch ASIC miner prices from Hashrate Index API
|
| 3 |
+
Downloads price indices for efficiency categories and calculates individual miner prices
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import requests
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import os
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from miner_specs import MINER_SPECS
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Efficiency category mapping for each miner
|
| 14 |
+
EFFICIENCY_MAPPING = {
|
| 15 |
+
's19pro': '25to38', # 30 W/TH
|
| 16 |
+
's19jpro': '25to38', # 30 W/TH
|
| 17 |
+
's19kpro': '19to25', # 23 W/TH
|
| 18 |
+
's21': 'under19', # 18 W/TH
|
| 19 |
+
'ka3': 'under19', # 19 W/TH
|
| 20 |
+
't19': '25to38', # 38 W/TH
|
| 21 |
+
's19xp': '19to25', # 21 W/TH
|
| 22 |
+
's19apro': '25to38', # 31 W/TH
|
| 23 |
+
'm50s': '19to25', # 24 W/TH
|
| 24 |
+
'm53': '25to38' , # 29 W/TH
|
| 25 |
+
'm30s':'25to38' # 31 W/TH
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def fetch_asic_price_for_date(target_date, api_key=None, currency='USD'):
|
| 30 |
+
"""
|
| 31 |
+
Fetch ASIC price for a specific historical date
|
| 32 |
+
|
| 33 |
+
Parameters:
|
| 34 |
+
-----------
|
| 35 |
+
target_date : str or datetime
|
| 36 |
+
Target date in format 'YYYY-MM-DD' or datetime object
|
| 37 |
+
api_key : str, optional
|
| 38 |
+
Hashrate Index API key
|
| 39 |
+
currency : str
|
| 40 |
+
Currency (default: USD)
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
--------
|
| 44 |
+
tuple: (dict of miner_name -> price, bool indicating if data exists)
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
# Get API key from environment if not provided
|
| 48 |
+
if not api_key:
|
| 49 |
+
api_key = os.environ.get('HASHRATE_API_KEY')
|
| 50 |
+
|
| 51 |
+
if not api_key:
|
| 52 |
+
print("β οΈ No API key found, using fallback prices")
|
| 53 |
+
return get_fallback_prices(), False
|
| 54 |
+
|
| 55 |
+
# Convert to datetime if string
|
| 56 |
+
if isinstance(target_date, str):
|
| 57 |
+
target_date = pd.to_datetime(target_date)
|
| 58 |
+
|
| 59 |
+
url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
|
| 60 |
+
|
| 61 |
+
headers = {
|
| 62 |
+
"Accept": "application/json",
|
| 63 |
+
"X-Hi-Api-Key": api_key
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
params = {
|
| 67 |
+
"currency": currency,
|
| 68 |
+
"span": "ALL" # Get all historical data
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 73 |
+
response.raise_for_status()
|
| 74 |
+
|
| 75 |
+
data = response.json().get("data", [])
|
| 76 |
+
|
| 77 |
+
if not data:
|
| 78 |
+
print("β οΈ No data returned from API")
|
| 79 |
+
return get_fallback_prices(), False
|
| 80 |
+
|
| 81 |
+
# Create DataFrame
|
| 82 |
+
df = pd.DataFrame(data)
|
| 83 |
+
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 84 |
+
df['date'] = df['timestamp'].dt.date
|
| 85 |
+
|
| 86 |
+
# Find closest date
|
| 87 |
+
target_date_only = target_date.date()
|
| 88 |
+
df['date_diff'] = abs((df['timestamp'].dt.date - target_date_only).apply(lambda x: x.days))
|
| 89 |
+
|
| 90 |
+
# Get row with closest date (within 7 days tolerance)
|
| 91 |
+
closest_row = df.loc[df['date_diff'].idxmin()]
|
| 92 |
+
|
| 93 |
+
if closest_row['date_diff'] > 7:
|
| 94 |
+
print(f"β οΈ No price data within 7 days of {target_date_only}")
|
| 95 |
+
return get_fallback_prices(), False
|
| 96 |
+
|
| 97 |
+
# Extract efficiency prices
|
| 98 |
+
efficiency_prices = {
|
| 99 |
+
'under19': closest_row.get('under19', None),
|
| 100 |
+
'19to25': closest_row.get('19to25', None),
|
| 101 |
+
'25to38': closest_row.get('25to38', None)
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
print(f"β
Found price data for {closest_row['date']} (requested: {target_date_only})")
|
| 105 |
+
print(f" under19: ${efficiency_prices['under19']}/TH")
|
| 106 |
+
print(f" 19to25: ${efficiency_prices['19to25']}/TH")
|
| 107 |
+
print(f" 25to38: ${efficiency_prices['25to38']}/TH")
|
| 108 |
+
|
| 109 |
+
# Calculate miner prices
|
| 110 |
+
miner_prices = {}
|
| 111 |
+
|
| 112 |
+
for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
|
| 113 |
+
if miner_name in MINER_SPECS:
|
| 114 |
+
hashrate = MINER_SPECS[miner_name]['hashrate']
|
| 115 |
+
price_per_th = efficiency_prices.get(efficiency_cat)
|
| 116 |
+
|
| 117 |
+
if price_per_th and price_per_th > 0:
|
| 118 |
+
miner_prices[miner_name] = hashrate * price_per_th
|
| 119 |
+
else:
|
| 120 |
+
miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
|
| 121 |
+
|
| 122 |
+
return miner_prices, True
|
| 123 |
+
|
| 124 |
+
except requests.exceptions.RequestException as e:
|
| 125 |
+
print(f"β οΈ API request failed: {e}")
|
| 126 |
+
return get_fallback_prices(), False
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"β οΈ Error: {e}")
|
| 129 |
+
return get_fallback_prices(), False
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def fetch_asic_price_index(api_key=None, currency='USD'):
|
| 133 |
+
"""
|
| 134 |
+
Fetch ASIC price index from Hashrate Index API
|
| 135 |
+
|
| 136 |
+
Parameters:
|
| 137 |
+
-----------
|
| 138 |
+
api_key : str, optional
|
| 139 |
+
Hashrate Index API key (if None, returns fallback prices)
|
| 140 |
+
currency : str
|
| 141 |
+
Currency (default: USD)
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
--------
|
| 145 |
+
dict
|
| 146 |
+
Dictionary of miner_name -> price (USD)
|
| 147 |
+
"""
|
| 148 |
+
|
| 149 |
+
if not api_key:
|
| 150 |
+
api_key = os.environ.get('HASHRATE_API_KEY')
|
| 151 |
+
|
| 152 |
+
if not api_key:
|
| 153 |
+
print("β οΈ No API key provided, using fallback prices")
|
| 154 |
+
return get_fallback_prices()
|
| 155 |
+
|
| 156 |
+
url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
|
| 157 |
+
|
| 158 |
+
headers = {
|
| 159 |
+
"Accept": "application/json",
|
| 160 |
+
"X-Hi-Api-Key": api_key
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
params = {
|
| 164 |
+
"currency": currency,
|
| 165 |
+
"span": "1Y" # Last year of data
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 170 |
+
response.raise_for_status()
|
| 171 |
+
|
| 172 |
+
data = response.json().get("data", [])
|
| 173 |
+
|
| 174 |
+
if not data:
|
| 175 |
+
print("β οΈ No data returned from API, using fallback prices")
|
| 176 |
+
return get_fallback_prices()
|
| 177 |
+
|
| 178 |
+
# Get most recent data
|
| 179 |
+
df = pd.DataFrame(data)
|
| 180 |
+
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 181 |
+
df = df.sort_values('timestamp', ascending=False)
|
| 182 |
+
|
| 183 |
+
# Get latest row
|
| 184 |
+
latest = df.iloc[0]
|
| 185 |
+
|
| 186 |
+
# Extract efficiency category prices ($/TH)
|
| 187 |
+
efficiency_prices = {
|
| 188 |
+
'under19': latest.get('under19', None),
|
| 189 |
+
'19to25': latest.get('19to25', None),
|
| 190 |
+
'25to38': latest.get('25to38', None)
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
print(f"β
Fetched price index (date: {latest['timestamp'].date()})")
|
| 194 |
+
print(f" under19: ${efficiency_prices['under19']}/TH")
|
| 195 |
+
print(f" 19to25: ${efficiency_prices['19to25']}/TH")
|
| 196 |
+
print(f" 25to38: ${efficiency_prices['25to38']}/TH")
|
| 197 |
+
|
| 198 |
+
# Calculate individual miner prices
|
| 199 |
+
miner_prices = {}
|
| 200 |
+
|
| 201 |
+
for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
|
| 202 |
+
if miner_name in MINER_SPECS:
|
| 203 |
+
hashrate = MINER_SPECS[miner_name]['hashrate']
|
| 204 |
+
price_per_th = efficiency_prices.get(efficiency_cat)
|
| 205 |
+
|
| 206 |
+
if price_per_th:
|
| 207 |
+
miner_prices[miner_name] = hashrate * price_per_th
|
| 208 |
+
else:
|
| 209 |
+
# Fallback if category not available
|
| 210 |
+
miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
|
| 211 |
+
|
| 212 |
+
return miner_prices
|
| 213 |
+
|
| 214 |
+
except requests.exceptions.RequestException as e:
|
| 215 |
+
print(f"β οΈ API request failed: {e}")
|
| 216 |
+
print(" Using fallback prices")
|
| 217 |
+
return get_fallback_prices()
|
| 218 |
+
except Exception as e:
|
| 219 |
+
print(f"β οΈ Error processing API data: {e}")
|
| 220 |
+
print(" Using fallback prices")
|
| 221 |
+
return get_fallback_prices()
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def get_fallback_prices():
|
| 225 |
+
"""
|
| 226 |
+
Fallback prices when API is unavailable
|
| 227 |
+
Based on approximate market prices as of Dec 2024
|
| 228 |
+
|
| 229 |
+
Returns:
|
| 230 |
+
--------
|
| 231 |
+
dict
|
| 232 |
+
Dictionary of miner_name -> price (USD)
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
return {
|
| 236 |
+
's19pro': 2500,
|
| 237 |
+
's19jpro': 2200,
|
| 238 |
+
's19kpro': 3500,
|
| 239 |
+
's21': 5500,
|
| 240 |
+
'ka3': 5000,
|
| 241 |
+
't19': 2000,
|
| 242 |
+
's19xp': 4000,
|
| 243 |
+
's19apro': 2800,
|
| 244 |
+
'm50s': 3200,
|
| 245 |
+
'm53': 8000
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def get_fallback_price_for_miner(miner_name):
|
| 250 |
+
"""Get fallback price for a specific miner"""
|
| 251 |
+
fallback = get_fallback_prices()
|
| 252 |
+
return fallback.get(miner_name, 2500)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
# For backward compatibility with your existing code
|
| 256 |
+
FALLBACK_PRICES = get_fallback_prices()
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
if __name__ == "__main__":
|
| 260 |
+
print("\n" + "="*60)
|
| 261 |
+
print("ASIC Price Fetcher Test")
|
| 262 |
+
print("="*60 + "\n")
|
| 263 |
+
|
| 264 |
+
# Try to get API key from environment variable
|
| 265 |
+
api_key = os.environ.get('HASHRATE_API_KEY')
|
| 266 |
+
|
| 267 |
+
if api_key:
|
| 268 |
+
print(f"Using API key: {api_key[:8]}...")
|
| 269 |
+
else:
|
| 270 |
+
print("No API key found in HASHRATE_API_KEY environment variable")
|
| 271 |
+
print("Set it with: export HASHRATE_API_KEY='your-key-here'")
|
| 272 |
+
|
| 273 |
+
# Fetch prices
|
| 274 |
+
prices = fetch_asic_price_index(api_key)
|
| 275 |
+
|
| 276 |
+
print("\n" + "="*60)
|
| 277 |
+
print("Current Miner Prices")
|
| 278 |
+
print("="*60)
|
| 279 |
+
|
| 280 |
+
for miner_name, price in sorted(prices.items()):
|
| 281 |
+
specs = MINER_SPECS.get(miner_name, {})
|
| 282 |
+
full_name = specs.get('full_name', miner_name)
|
| 283 |
+
hashrate = specs.get('hashrate', 0)
|
| 284 |
+
efficiency = specs.get('efficiency', 0)
|
| 285 |
+
efficiency_cat = EFFICIENCY_MAPPING.get(miner_name, 'unknown')
|
| 286 |
+
|
| 287 |
+
print(f"{full_name:25s} ({hashrate:3.0f} TH/s, {efficiency:4.1f} W/TH)")
|
| 288 |
+
print(f" Category: {efficiency_cat:15s} β Price: ${price:,.2f}")
|
| 289 |
+
print()
|
| 290 |
+
|
| 291 |
+
print("="*60)
|
miner_specs.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Miner specifications and electricity rates"""
|
| 2 |
+
|
| 3 |
+
# 's9': {'hashrate': 13.5, 'power': 1323, 'efficiency': 98, 'release_date': '2016-07-01', 'full_name': 'Antminer S9'},
|
| 4 |
+
# 's15': {'hashrate': 28, 'power': 1596, 'efficiency': 57, 'release_date': '2018-12-01', 'full_name': 'Antminer S15'},
|
| 5 |
+
# 's17pro': {'hashrate': 50, 'power': 1975, 'efficiency': 40, 'release_date': '2019-04-09', 'full_name': 'Antminer S17 Pro'},
|
| 6 |
+
# 'M32': {'hashrate': 62, 'power': 3348, 'efficiency': 54, 'release_date': '2019-09-01', 'full_name': 'WhatsMiner M32'},
|
| 7 |
+
# 's7': {'hashrate': 4.73, 'power': 1293, 'efficiency': 273, 'release_date': '2015-09-01', 'full_name': 'Antminer S7'},
|
| 8 |
+
# 't17': {'hashrate': 40, 'power': 2200, 'efficiency': 55, 'release_date': '2019-05-01', 'full_name': 'Antminer T17'},
|
| 9 |
+
# 'm21s': {'hashrate': 56, 'power': 3360, 'efficiency': 60, 'release_date': '2019-04-01', 'full_name': 'WhatsMiner M21S'},
|
| 10 |
+
# 'm10s': {'hashrate': 55, 'power': 3500, 'efficiency': 64, 'release_date': '2019-11-01', 'full_name': 'WhatsMiner M10S'},
|
| 11 |
+
# 'r4': {'hashrate': 8.7, 'power': 845, 'efficiency': 97, 'release_date': '2017-02-01', 'full_name': 'Antminer R4'},
|
| 12 |
+
|
| 13 |
+
MINER_SPECS = {
|
| 14 |
+
's19pro': {'hashrate': 110, 'power': 3250, 'efficiency': 30, 'release_date': '2020-05-01', 'full_name': 'Antminer S19 Pro'},
|
| 15 |
+
's19jpro': {'hashrate': 100, 'power': 2950, 'efficiency': 30, 'release_date': '2021-06-01', 'full_name': 'Antminer S19j Pro'},
|
| 16 |
+
's19kpro': {'hashrate': 120, 'power': 2760, 'efficiency': 23, 'release_date': '2023-04-02', 'full_name': 'Antminer S19k Pro'},
|
| 17 |
+
's21': {'hashrate': 200, 'power': 3500, 'efficiency': 18, 'release_date': '2023-08-14', 'full_name': 'Antminer S21'},
|
| 18 |
+
'ka3': {'hashrate': 166, 'power': 3154, 'efficiency': 19, 'release_date': '2022-09-01', 'full_name': 'AvalonMiner KA3'},
|
| 19 |
+
't19': {'hashrate': 84, 'power': 3344, 'efficiency': 38, 'release_date': '2020-08-01', 'full_name': 'Antminer T19'},
|
| 20 |
+
's19xp': {'hashrate': 141, 'power': 3031, 'efficiency': 21, 'release_date': '2021-11-11', 'full_name': 'Antminer S19 XP'},
|
| 21 |
+
's19apro': {'hashrate': 104, 'power': 3250, 'efficiency': 31, 'release_date': '2021-11-01', 'full_name': 'Antminer S19a Pro'},
|
| 22 |
+
'm50s': {'hashrate': 136, 'power': 3264, 'efficiency': 24, 'release_date': '2022-12-01', 'full_name': 'WhatsMiner M50S'},
|
| 23 |
+
'm53': {'hashrate': 226, 'power': 6554, 'efficiency': 29, 'release_date': '2021-04-02', 'full_name': 'WhatsMiner M53'},
|
| 24 |
+
'm30s': {'hashrate': 112, 'power': 3472, 'efficiency': 31, 'release_date': '2020-10-01', 'full_name': 'WhatsMiner M30S'}
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
ELECTRICITY_RATES = {
|
| 28 |
+
'texas': 0.1549,
|
| 29 |
+
'china': 0.08,
|
| 30 |
+
'ethiopia': 0.01
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def get_miner_list():
|
| 34 |
+
return list(MINER_SPECS.keys())
|
model.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class SpectralFeatureExtractor(nn.Module):
|
| 7 |
+
def __init__(self, num_features):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.complex_weight = nn.Parameter(torch.randn(num_features, 2, dtype=torch.float32) * 0.02)
|
| 10 |
+
|
| 11 |
+
def forward(self, x):
|
| 12 |
+
B, L, C = x.shape
|
| 13 |
+
x = x.transpose(1, 2)
|
| 14 |
+
x_fft = torch.fft.rfft(x, dim=2, norm="ortho")
|
| 15 |
+
weight = torch.view_as_complex(self.complex_weight)
|
| 16 |
+
x_weighted = x_fft * weight.unsqueeze(0).unsqueeze(-1)
|
| 17 |
+
x_out = torch.fft.irfft(x_weighted, n=L, dim=2, norm="ortho")
|
| 18 |
+
return x_out.transpose(1, 2)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ChannelMixing(nn.Module):
|
| 22 |
+
def __init__(self, num_features, reduction=4):
|
| 23 |
+
super().__init__()
|
| 24 |
+
self.fc1 = nn.Linear(num_features, num_features // reduction)
|
| 25 |
+
self.fc2 = nn.Linear(num_features // reduction, num_features)
|
| 26 |
+
self.act = nn.GELU()
|
| 27 |
+
|
| 28 |
+
def forward(self, x):
|
| 29 |
+
identity = x
|
| 30 |
+
x_pooled = x.mean(dim=1)
|
| 31 |
+
x_weighted = self.fc2(self.act(self.fc1(x_pooled)))
|
| 32 |
+
out = identity * x_weighted.unsqueeze(1)
|
| 33 |
+
return out
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class PositionalEncoding(nn.Module):
|
| 37 |
+
def __init__(self, d_model, max_len=5000, dropout=0.2):
|
| 38 |
+
super().__init__()
|
| 39 |
+
self.dropout = nn.Dropout(p=dropout)
|
| 40 |
+
|
| 41 |
+
pe = torch.zeros(max_len, d_model)
|
| 42 |
+
position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
|
| 43 |
+
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
|
| 44 |
+
|
| 45 |
+
pe[:, 0::2] = torch.sin(position * div_term)
|
| 46 |
+
pe[:, 1::2] = torch.cos(position * div_term)
|
| 47 |
+
pe = pe.unsqueeze(0)
|
| 48 |
+
|
| 49 |
+
self.register_buffer("pe", pe)
|
| 50 |
+
|
| 51 |
+
def forward(self, x):
|
| 52 |
+
x = x + self.pe[:, : x.size(1), :]
|
| 53 |
+
return self.dropout(x)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class MineROINet(nn.Module):
|
| 57 |
+
def __init__(self, input_dim, d_model=64, nhead=2, num_layers=2, dim_feedforward=256, dropout=0.2, num_classes=3, seq_len=30):
|
| 58 |
+
super().__init__()
|
| 59 |
+
|
| 60 |
+
self.spectral = SpectralFeatureExtractor(input_dim)
|
| 61 |
+
self.channel_mix = ChannelMixing(input_dim)
|
| 62 |
+
self.input_projection = nn.Linear(input_dim, d_model) if input_dim != d_model else nn.Identity()
|
| 63 |
+
self.pos_encoder = PositionalEncoding(d_model, max_len=seq_len, dropout=dropout)
|
| 64 |
+
|
| 65 |
+
encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
|
| 66 |
+
dropout=dropout, activation="gelu", batch_first=True)
|
| 67 |
+
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
|
| 68 |
+
|
| 69 |
+
self.classifier = nn.Sequential(
|
| 70 |
+
nn.LayerNorm(d_model),
|
| 71 |
+
nn.Dropout(dropout),
|
| 72 |
+
nn.Linear(d_model, d_model // 2),
|
| 73 |
+
nn.GELU(),
|
| 74 |
+
nn.Dropout(dropout),
|
| 75 |
+
nn.Linear(d_model // 2, num_classes),
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
def forward(self, seq):
|
| 79 |
+
seq = self.spectral(seq)
|
| 80 |
+
seq = self.channel_mix(seq)
|
| 81 |
+
seq = self.input_projection(seq)
|
| 82 |
+
seq = self.pos_encoder(seq)
|
| 83 |
+
z = self.transformer_encoder(seq)
|
| 84 |
+
pooled = z.mean(dim=1)
|
| 85 |
+
out = self.classifier(pooled)
|
| 86 |
+
return out
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def create_model_30day(input_dim, num_classes=3):
|
| 90 |
+
return MineROINet(input_dim=input_dim, d_model=64, nhead=2, num_layers=2,
|
| 91 |
+
dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=30)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# def create_model_60day(input_dim, num_classes=3):
|
| 95 |
+
# return MineROINet(input_dim=input_dim, d_model=64, nhead=4, num_layers=2,
|
| 96 |
+
# dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=60)
|
predictor.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# predictor.py
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
import joblib
|
| 6 |
+
from model import create_model_30day
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
SCALER_PATHS = {
|
| 10 |
+
"texas": "scaler_texas.joblib",
|
| 11 |
+
"china": "scaler_china.joblib",
|
| 12 |
+
"ethiopia": "scaler_ethiopia.joblib",
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class MineROIPredictor:
|
| 17 |
+
def __init__(self, model_path, device=None):
|
| 18 |
+
self.window_size = 30
|
| 19 |
+
self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
+
self.class_names = [
|
| 21 |
+
'Unprofitable (ROI β€ 0)',
|
| 22 |
+
'Marginal (0 < ROI < 1)',
|
| 23 |
+
'Profitable (ROI β₯ 1)'
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
# β
Load all scalers that were used in preprocessing
|
| 27 |
+
self.scalers = {}
|
| 28 |
+
for region, path in SCALER_PATHS.items():
|
| 29 |
+
if not os.path.exists(path):
|
| 30 |
+
raise FileNotFoundError(f"Scaler not found for {region}: {path}")
|
| 31 |
+
self.scalers[region] = joblib.load(path)
|
| 32 |
+
|
| 33 |
+
# Load model weights
|
| 34 |
+
state_dict = torch.load(model_path, map_location=self.device)
|
| 35 |
+
|
| 36 |
+
# Infer input_dim from spectral layer weights
|
| 37 |
+
# (works because you saved the full state_dict from training)
|
| 38 |
+
self.input_dim = state_dict['spectral.complex_weight'].shape[0]
|
| 39 |
+
|
| 40 |
+
# Build model with same hyperparams used in training
|
| 41 |
+
self.model = create_model_30day(self.input_dim)
|
| 42 |
+
self.model.load_state_dict(state_dict)
|
| 43 |
+
self.model.to(self.device)
|
| 44 |
+
self.model.eval()
|
| 45 |
+
|
| 46 |
+
def normalize_sequence(self, sequence: np.ndarray, region: str) -> np.ndarray:
|
| 47 |
+
"""
|
| 48 |
+
sequence: shape (L, C)
|
| 49 |
+
region: 'texas', 'china', or 'ethiopia'
|
| 50 |
+
"""
|
| 51 |
+
if region not in self.scalers:
|
| 52 |
+
raise ValueError(f"Unknown region '{region}'. Expected one of {list(self.scalers.keys())}")
|
| 53 |
+
|
| 54 |
+
scaler = self.scalers[region]
|
| 55 |
+
|
| 56 |
+
original_shape = sequence.shape # (L, C)
|
| 57 |
+
seq_2d = sequence.reshape(-1, original_shape[-1])
|
| 58 |
+
|
| 59 |
+
# β
Only transform, never fit here
|
| 60 |
+
seq_scaled = scaler.transform(seq_2d)
|
| 61 |
+
|
| 62 |
+
return seq_scaled.reshape(original_shape)
|
| 63 |
+
|
| 64 |
+
def predict(self, sequence: np.ndarray, region: str):
|
| 65 |
+
"""
|
| 66 |
+
sequence: np.ndarray of shape (L, C) with *raw* features (same as training CSV)
|
| 67 |
+
region: which country scaler to use
|
| 68 |
+
"""
|
| 69 |
+
# 1) scale using the correct countryβs scaler
|
| 70 |
+
sequence = self.normalize_sequence(sequence, region)
|
| 71 |
+
|
| 72 |
+
# 2) to torch: [B, C, L]
|
| 73 |
+
seq_tensor = torch.from_numpy(sequence).float().unsqueeze(0).to(self.device) # (1, L, C)
|
| 74 |
+
|
| 75 |
+
with torch.no_grad():
|
| 76 |
+
logits = self.model(seq_tensor)
|
| 77 |
+
probabilities = torch.softmax(logits, dim=1)
|
| 78 |
+
predicted_class = torch.argmax(probabilities, dim=1).item()
|
| 79 |
+
|
| 80 |
+
probs = probabilities.cpu().numpy()[0]
|
| 81 |
+
|
| 82 |
+
return {
|
| 83 |
+
"predicted_class": predicted_class,
|
| 84 |
+
"predicted_label": self.class_names[predicted_class],
|
| 85 |
+
"probabilities": {
|
| 86 |
+
"unprofitable": float(probs[0]),
|
| 87 |
+
"marginal": float(probs[1]),
|
| 88 |
+
"profitable": float(probs[2]),
|
| 89 |
+
},
|
| 90 |
+
"confidence": float(probs[predicted_class]),
|
| 91 |
+
}
|
preprocessing.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data preprocessing and feature engineering"""
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from miner_specs import MINER_SPECS, ELECTRICITY_RATES
|
| 6 |
+
from fetch_blockchain_data import get_days_since_halving
|
| 7 |
+
from electricity_prices import get_electricity_rate
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def engineer_features(blockchain_df):
|
| 11 |
+
"""Engineer features from blockchain data - keep it simple"""
|
| 12 |
+
|
| 13 |
+
df = blockchain_df.copy().sort_values('date').reset_index(drop=True)
|
| 14 |
+
df['date'] = pd.to_datetime(df['date'])
|
| 15 |
+
|
| 16 |
+
# Just return as is, we'll select features later
|
| 17 |
+
return df
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def prepare_miner_features(blockchain_df, miner_name, miner_price, region='texas'):
|
| 21 |
+
"""Add miner-specific features - EXACTLY 14 features"""
|
| 22 |
+
|
| 23 |
+
df = blockchain_df.copy()
|
| 24 |
+
specs = MINER_SPECS[miner_name]
|
| 25 |
+
|
| 26 |
+
# Keep only these columns from blockchain data
|
| 27 |
+
df = df[['date', 'bitcoin_price', 'difficulty', 'fees', 'hashrate', 'revenue', 'block_reward']].copy()
|
| 28 |
+
df['date'] = pd.to_datetime(df['date'])
|
| 29 |
+
|
| 30 |
+
# Add miner features
|
| 31 |
+
df['machine_price'] = miner_price
|
| 32 |
+
df['machine_hashrate'] = specs['hashrate']
|
| 33 |
+
df['power'] = specs['power']
|
| 34 |
+
df['efficiency'] = specs['efficiency']
|
| 35 |
+
|
| 36 |
+
# Calculate age_days (days since miner was released)
|
| 37 |
+
release_date = pd.to_datetime(specs['release_date'])
|
| 38 |
+
df['age_days'] = (df['date'] - release_date).dt.days
|
| 39 |
+
|
| 40 |
+
# Days since halving
|
| 41 |
+
df['days_since_halving'] = df['date'].apply(get_days_since_halving)
|
| 42 |
+
|
| 43 |
+
# Revenue potential
|
| 44 |
+
hashrate_hs = df['machine_hashrate'] * 1e12
|
| 45 |
+
btc_per_day = (hashrate_hs * 86400) / (df['difficulty'] * (2**32)) * (df['block_reward'] + (df['fees']/144))
|
| 46 |
+
df['Revenue_Potential'] = btc_per_day * df['bitcoin_price']
|
| 47 |
+
|
| 48 |
+
# Electricity rate
|
| 49 |
+
# df['electricity_rate'] = ELECTRICITY_RATES.get(region, 0.10)
|
| 50 |
+
df['electricity_rate'] = df['date'].dt.date.apply(
|
| 51 |
+
lambda day: get_electricity_rate(region, day)
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return df
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_latest_sequence(blockchain_df, miner_name, miner_price, region='texas', window_size=30):
|
| 58 |
+
"""Get the most recent sequence for prediction - EXACTLY 14 features in CORRECT ORDER"""
|
| 59 |
+
|
| 60 |
+
df_features = engineer_features(blockchain_df)
|
| 61 |
+
df_miner = prepare_miner_features(df_features, miner_name, miner_price, region)
|
| 62 |
+
|
| 63 |
+
# CRITICAL: This order MUST match your training data CSV exactly!
|
| 64 |
+
# Your training CSV: bitcoin_price,difficulty,fees,hashrate,revenue,machine_price,machine_hashrate,power,efficiency,block_reward,age_days,days_since_halving,Revenue_Potential,electricity_rate
|
| 65 |
+
feature_cols = [
|
| 66 |
+
'bitcoin_price', # 1
|
| 67 |
+
'difficulty', # 2
|
| 68 |
+
'fees', # 3
|
| 69 |
+
'hashrate', # 4
|
| 70 |
+
'revenue', # 5
|
| 71 |
+
'machine_price', # 6
|
| 72 |
+
'machine_hashrate', # 7
|
| 73 |
+
'power', # 8
|
| 74 |
+
'efficiency', # 9
|
| 75 |
+
'block_reward', # 10
|
| 76 |
+
'age_days', # 11
|
| 77 |
+
'days_since_halving',# 12
|
| 78 |
+
'Revenue_Potential', # 13
|
| 79 |
+
'electricity_rate' # 14
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
df_miner = df_miner.dropna().reset_index(drop=True)
|
| 83 |
+
|
| 84 |
+
if len(df_miner) < window_size:
|
| 85 |
+
raise ValueError(f"Not enough data: need {window_size} days, have {len(df_miner)}")
|
| 86 |
+
|
| 87 |
+
# Get last window_size days with exactly 14 features
|
| 88 |
+
sequence = df_miner[feature_cols].values[-window_size:]
|
| 89 |
+
latest_date = df_miner['date'].iloc[-1]
|
| 90 |
+
|
| 91 |
+
# Verify shape
|
| 92 |
+
if sequence.shape[1] != 14:
|
| 93 |
+
raise ValueError(f"Expected 14 features, got {sequence.shape[1]}")
|
| 94 |
+
|
| 95 |
+
return sequence, feature_cols, latest_date
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
from fetch_blockchain_data import get_latest_blockchain_data
|
| 101 |
+
|
| 102 |
+
print("\n" + "="*80)
|
| 103 |
+
print("TESTING PREPROCESSING PIPELINE")
|
| 104 |
+
print("="*80 + "\n")
|
| 105 |
+
|
| 106 |
+
# Fetch blockchain data
|
| 107 |
+
print("π‘ Fetching blockchain data...")
|
| 108 |
+
blockchain_df = get_latest_blockchain_data(days=90)
|
| 109 |
+
|
| 110 |
+
if blockchain_df is None:
|
| 111 |
+
print("β Failed to fetch blockchain data")
|
| 112 |
+
exit(1)
|
| 113 |
+
|
| 114 |
+
print(f"β
Fetched {len(blockchain_df)} days of data\n")
|
| 115 |
+
|
| 116 |
+
# Test configuration
|
| 117 |
+
miner_name = 's19pro'
|
| 118 |
+
miner_price = 2500
|
| 119 |
+
region = 'texas'
|
| 120 |
+
window_size = 30
|
| 121 |
+
|
| 122 |
+
print("βοΈ Test Configuration:")
|
| 123 |
+
print(f" Miner: {MINER_SPECS[miner_name]['full_name']}")
|
| 124 |
+
print(f" Price: ${miner_price:,}")
|
| 125 |
+
print(f" Region: {region.title()}")
|
| 126 |
+
print(f" Window: {window_size} days")
|
| 127 |
+
print(f" Electricity: ${ELECTRICITY_RATES[region]}/kWh\n")
|
| 128 |
+
|
| 129 |
+
# Step 1: Engineer features
|
| 130 |
+
print("="*80)
|
| 131 |
+
print("STEP 1: ENGINEER FEATURES")
|
| 132 |
+
print("="*80)
|
| 133 |
+
df_engineered = engineer_features(blockchain_df)
|
| 134 |
+
print(f"β
Engineered features")
|
| 135 |
+
print(f" Shape: {df_engineered.shape}")
|
| 136 |
+
print(f" Columns: {list(df_engineered.columns)}\n")
|
| 137 |
+
print("First 3 rows:")
|
| 138 |
+
print(df_engineered.head(3))
|
| 139 |
+
print("\nLast 3 rows:")
|
| 140 |
+
print(df_engineered.tail(3))
|
| 141 |
+
|
| 142 |
+
# Step 2: Prepare miner features
|
| 143 |
+
print("\n" + "="*80)
|
| 144 |
+
print("STEP 2: PREPARE MINER FEATURES")
|
| 145 |
+
print("="*80)
|
| 146 |
+
df_miner = prepare_miner_features(df_engineered, miner_name, miner_price, region)
|
| 147 |
+
print(f"β
Added miner-specific features")
|
| 148 |
+
print(f" Shape: {df_miner.shape}")
|
| 149 |
+
print(f" Columns: {list(df_miner.columns)}\n")
|
| 150 |
+
|
| 151 |
+
print("Miner-specific values (constant across all days):")
|
| 152 |
+
print(f" machine_hashrate: {df_miner['machine_hashrate'].iloc[0]} TH/s")
|
| 153 |
+
print(f" power: {df_miner['power'].iloc[0]} W")
|
| 154 |
+
print(f" efficiency: {df_miner['efficiency'].iloc[0]} W/TH")
|
| 155 |
+
print(f" machine_price: ${df_miner['machine_price'].iloc[0]:,.2f}")
|
| 156 |
+
print(f" electricity_rate: ${df_miner['electricity_rate'].iloc[0]:.4f}/kWh")
|
| 157 |
+
|
| 158 |
+
print("\nDynamic values (change over time):")
|
| 159 |
+
print(f" age_days: {df_miner['age_days'].iloc[0]} β {df_miner['age_days'].iloc[-1]} days")
|
| 160 |
+
print(f" days_since_halving: {df_miner['days_since_halving'].iloc[0]} β {df_miner['days_since_halving'].iloc[-1]} days")
|
| 161 |
+
print(f" Revenue_Potential: ${df_miner['Revenue_Potential'].iloc[0]:.2f} β ${df_miner['Revenue_Potential'].iloc[-1]:.2f}/day")
|
| 162 |
+
|
| 163 |
+
print("\nFirst 3 rows:")
|
| 164 |
+
print(df_miner.head(3))
|
| 165 |
+
print("\nLast 3 rows:")
|
| 166 |
+
print(df_miner.tail(3))
|
| 167 |
+
|
| 168 |
+
# Step 3: Get latest sequence
|
| 169 |
+
print("\n" + "="*80)
|
| 170 |
+
print("STEP 3: GET LATEST SEQUENCE")
|
| 171 |
+
print("="*80)
|
| 172 |
+
sequence, feature_cols, latest_date = get_latest_sequence(blockchain_df, miner_name, miner_price, region, window_size)
|
| 173 |
+
|
| 174 |
+
print(f"β
Created sequence for model")
|
| 175 |
+
print(f" Shape: {sequence.shape}")
|
| 176 |
+
print(f" Expected: ({window_size}, 14)")
|
| 177 |
+
print(f" Latest date: {latest_date.strftime('%Y-%m-%d')}\n")
|
| 178 |
+
|
| 179 |
+
print("14 Features (in order):")
|
| 180 |
+
for i, col in enumerate(feature_cols, 1):
|
| 181 |
+
print(f" {i:2d}. {col:25s} β First: {sequence[0, i-1]:>15.2f} Last: {sequence[-1, i-1]:>15.2f}")
|
| 182 |
+
|
| 183 |
+
print("\n" + "="*80)
|
| 184 |
+
print("SEQUENCE STATISTICS")
|
| 185 |
+
print("="*80)
|
| 186 |
+
print("\nFirst day in sequence:")
|
| 187 |
+
for i, col in enumerate(feature_cols):
|
| 188 |
+
print(f" {col:25s} = {sequence[0, i]:>15.2f}")
|
| 189 |
+
|
| 190 |
+
print(f"\nLast day in sequence (for prediction on {latest_date.strftime('%Y-%m-%d')}):")
|
| 191 |
+
for i, col in enumerate(feature_cols):
|
| 192 |
+
print(f" {col:25s} = {sequence[-1, i]:>15.2f}")
|
| 193 |
+
|
| 194 |
+
# Show some statistics
|
| 195 |
+
print("\n" + "="*80)
|
| 196 |
+
print("FEATURE RANGES")
|
| 197 |
+
print("="*80)
|
| 198 |
+
for i, col in enumerate(feature_cols):
|
| 199 |
+
min_val = sequence[:, i].min()
|
| 200 |
+
max_val = sequence[:, i].max()
|
| 201 |
+
mean_val = sequence[:, i].mean()
|
| 202 |
+
print(f"{col:25s} β Min: {min_val:>12.2f} Max: {max_val:>12.2f} Mean: {mean_val:>12.2f}")
|
| 203 |
+
|
| 204 |
+
print("\n" + "="*80)
|
| 205 |
+
print("β
PREPROCESSING PIPELINE TEST COMPLETE")
|
| 206 |
+
print("="*80 + "\n")
|