Spaces:

amaai-lab
/

MineROI-Net

Running

App Files Files Community

sithuWiki commited on 4 days ago

Commit

f481275

verified ·

1 Parent(s): b0a6f60

upload 7 .py files

Browse files

Files changed (6) hide show

electricity_prices.py +129 -0
fetch_asic_prices.py +291 -0
miner_specs.py +34 -0
model.py +96 -0
predictor.py +91 -0
preprocessing.py +206 -0

electricity_prices.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# electricity_prices.py
+import os
+from datetime import date as Date
+from typing import Dict, Optional
+import pandas as pd
+from huggingface_hub import hf_hub_download
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+# Private dataset repo on Hugging Face containing the CSV files
+HF_DATASET_REPO = "sithuWiki/electricity"
+HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN"  # set this in your Space secrets
+# Fallback / base rates used when a date is outside the CSV range
+BASE_ELECTRICITY_RATES: Dict[str, float] = {
+    "texas": 0.1549,
+    "china": 0.08,
+    "ethiopia": 0.01,
+}
+# Mapping from region name -> CSV filename in the private dataset
+REGION_FILES: Dict[str, str] = {
+    "texas": "texas_residential_daily_df.csv",
+    "china": "china_electricity_prices_daily.csv",
+    "ethiopia": "ethiopia_electricity_prices_daily.csv",
+}
+# In-memory cache: region -> pandas.Series indexed by python date with float prices
+_ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {}
+def _get_token() -> str:
+    token = os.environ.get(HF_DATASET_TOKEN_ENV)
+    if not token:
+        raise RuntimeError(
+            f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. "
+            "Add a read token for the private dataset to your Space secrets."
+        )
+    return token
+def _load_region_series(region: str, filename: str) -> Optional[pd.Series]:
+    """
+    Load a single region's CSV from the private HF dataset as a Series.
+    Expected columns in CSV:
+      - 'date'  (any format parsable by pandas.to_datetime, e.g. '10/1/15')
+      - 'price' (electricity price per kWh)
+    """
+    try:
+        token = _get_token()
+        file_path = hf_hub_download(
+            repo_id=HF_DATASET_REPO,
+            filename=filename,
+            repo_type="dataset",
+            token=token,
+        )
+        df = pd.read_csv(file_path)
+        if "date" not in df.columns or "price" not in df.columns:
+            raise ValueError(f"{filename} must contain 'date' and 'price' columns.")
+        # Normalize date to python date objects
+        df["date"] = pd.to_datetime(df["date"]).dt.date
+        df = df[["date", "price"]].copy()
+        df = df.sort_values("date")
+        series = df.set_index("date")["price"].astype(float)
+        return series
+    except Exception as e:
+        print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}")
+        return None
+# Load all regions at import time (one-time cost)
+for _region, _fname in REGION_FILES.items():
+    _ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname)
+def get_electricity_rate(region: str, d) -> float:
+    """
+    Return the electricity rate (USD/kWh) for a given region and date.
+    - If d is inside the CSV range, we use that day's price (or last available
+      before d, to handle gaps).
+    - If d is outside the CSV range or data is missing, we fall back to
+      BASE_ELECTRICITY_RATES[region].
+    """
+    if region not in BASE_ELECTRICITY_RATES:
+        raise ValueError(
+            f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}"
+        )
+    # Normalise input date
+    if isinstance(d, pd.Timestamp):
+        d = d.date()
+    elif isinstance(d, str):
+        d = pd.to_datetime(d).date()
+    elif isinstance(d, Date):
+        pass  # already ok
+    else:
+        raise TypeError(
+            f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str"
+        )
+    base_rate = BASE_ELECTRICITY_RATES[region]
+    series = _ELECTRICITY_SERIES.get(region)
+    if series is None or series.empty:
+        return base_rate
+    idx = series.index
+    # Outside known range → use base constant rate
+    if d < idx[0] or d > idx[-1]:
+        return base_rate
+    # Exact match
+    if d in series.index:
+        return float(series.loc[d])
+    # Otherwise, use the last available price before this date
+    prev = series.loc[:d]
+    if prev.empty:
+        return base_rate
+    return float(prev.iloc[-1])

fetch_asic_prices.py ADDED Viewed

	@@ -0,0 +1,291 @@

+"""
+Fetch ASIC miner prices from Hashrate Index API
+Downloads price indices for efficiency categories and calculates individual miner prices
+"""
+import requests
+import pandas as pd
+import os
+from datetime import datetime
+from miner_specs import MINER_SPECS
+# Efficiency category mapping for each miner
+EFFICIENCY_MAPPING = {
+    's19pro': '25to38',     # 30 W/TH
+    's19jpro': '25to38',    # 30 W/TH
+    's19kpro': '19to25',    # 23 W/TH
+    's21': 'under19',           # 18 W/TH
+    'ka3': 'under19',           # 19 W/TH
+    't19': '25to38',        # 38 W/TH
+    's19xp': '19to25',      # 21 W/TH
+    's19apro': '25to38',    # 31 W/TH
+    'm50s': '19to25',       # 24 W/TH
+    'm53': '25to38'   ,      # 29 W/TH
+    'm30s':'25to38'    # 31 W/TH
+}
+def fetch_asic_price_for_date(target_date, api_key=None, currency='USD'):
+    """
+    Fetch ASIC price for a specific historical date
+    Parameters:
+    -----------
+    target_date : str or datetime
+        Target date in format 'YYYY-MM-DD' or datetime object
+    api_key : str, optional
+        Hashrate Index API key
+    currency : str
+        Currency (default: USD)
+    Returns:
+    --------
+    tuple: (dict of miner_name -> price, bool indicating if data exists)
+    """
+    # Get API key from environment if not provided
+    if not api_key:
+        api_key = os.environ.get('HASHRATE_API_KEY')
+    if not api_key:
+        print("⚠️  No API key found, using fallback prices")
+        return get_fallback_prices(), False
+    # Convert to datetime if string
+    if isinstance(target_date, str):
+        target_date = pd.to_datetime(target_date)
+    url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
+    headers = {
+        "Accept": "application/json",
+        "X-Hi-Api-Key": api_key
+    }
+    params = {
+        "currency": currency,
+        "span": "ALL"  # Get all historical data
+    }
+    try:
+        response = requests.get(url, headers=headers, params=params, timeout=30)
+        response.raise_for_status()
+        data = response.json().get("data", [])
+        if not data:
+            print("⚠️  No data returned from API")
+            return get_fallback_prices(), False
+        # Create DataFrame
+        df = pd.DataFrame(data)
+        df['timestamp'] = pd.to_datetime(df['timestamp'])
+        df['date'] = df['timestamp'].dt.date
+        # Find closest date
+        target_date_only = target_date.date()
+        df['date_diff'] = abs((df['timestamp'].dt.date - target_date_only).apply(lambda x: x.days))
+        # Get row with closest date (within 7 days tolerance)
+        closest_row = df.loc[df['date_diff'].idxmin()]
+        if closest_row['date_diff'] > 7:
+            print(f"⚠️  No price data within 7 days of {target_date_only}")
+            return get_fallback_prices(), False
+        # Extract efficiency prices
+        efficiency_prices = {
+            'under19': closest_row.get('under19', None),
+            '19to25': closest_row.get('19to25', None),
+            '25to38': closest_row.get('25to38', None)
+        }
+        print(f"✅ Found price data for {closest_row['date']} (requested: {target_date_only})")
+        print(f"   under19: ${efficiency_prices['under19']}/TH")
+        print(f"   19to25: ${efficiency_prices['19to25']}/TH")
+        print(f"   25to38: ${efficiency_prices['25to38']}/TH")
+        # Calculate miner prices
+        miner_prices = {}
+        for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
+            if miner_name in MINER_SPECS:
+                hashrate = MINER_SPECS[miner_name]['hashrate']
+                price_per_th = efficiency_prices.get(efficiency_cat)
+                if price_per_th and price_per_th > 0:
+                    miner_prices[miner_name] = hashrate * price_per_th
+                else:
+                    miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
+        return miner_prices, True
+    except requests.exceptions.RequestException as e:
+        print(f"⚠️  API request failed: {e}")
+        return get_fallback_prices(), False
+    except Exception as e:
+        print(f"⚠️  Error: {e}")
+        return get_fallback_prices(), False
+def fetch_asic_price_index(api_key=None, currency='USD'):
+    """
+    Fetch ASIC price index from Hashrate Index API
+    Parameters:
+    -----------
+    api_key : str, optional
+        Hashrate Index API key (if None, returns fallback prices)
+    currency : str
+        Currency (default: USD)
+    Returns:
+    --------
+    dict
+        Dictionary of miner_name -> price (USD)
+    """
+    if not api_key:
+        api_key = os.environ.get('HASHRATE_API_KEY')
+    if not api_key:
+        print("⚠️  No API key provided, using fallback prices")
+        return get_fallback_prices()
+    url = "https://api.hashrateindex.com/v1/hashrateindex/asic/price-index"
+    headers = {
+        "Accept": "application/json",
+        "X-Hi-Api-Key": api_key
+    }
+    params = {
+        "currency": currency,
+        "span": "1Y"  # Last year of data
+    }
+    try:
+        response = requests.get(url, headers=headers, params=params, timeout=30)
+        response.raise_for_status()
+        data = response.json().get("data", [])
+        if not data:
+            print("⚠️  No data returned from API, using fallback prices")
+            return get_fallback_prices()
+        # Get most recent data
+        df = pd.DataFrame(data)
+        df['timestamp'] = pd.to_datetime(df['timestamp'])
+        df = df.sort_values('timestamp', ascending=False)
+        # Get latest row
+        latest = df.iloc[0]
+        # Extract efficiency category prices ($/TH)
+        efficiency_prices = {
+            'under19': latest.get('under19', None),
+            '19to25': latest.get('19to25', None),
+            '25to38': latest.get('25to38', None)
+        }
+        print(f"✅ Fetched price index (date: {latest['timestamp'].date()})")
+        print(f"   under19: ${efficiency_prices['under19']}/TH")
+        print(f"   19to25: ${efficiency_prices['19to25']}/TH")
+        print(f"   25to38: ${efficiency_prices['25to38']}/TH")
+        # Calculate individual miner prices
+        miner_prices = {}
+        for miner_name, efficiency_cat in EFFICIENCY_MAPPING.items():
+            if miner_name in MINER_SPECS:
+                hashrate = MINER_SPECS[miner_name]['hashrate']
+                price_per_th = efficiency_prices.get(efficiency_cat)
+                if price_per_th:
+                    miner_prices[miner_name] = hashrate * price_per_th
+                else:
+                    # Fallback if category not available
+                    miner_prices[miner_name] = get_fallback_price_for_miner(miner_name)
+        return miner_prices
+    except requests.exceptions.RequestException as e:
+        print(f"⚠️  API request failed: {e}")
+        print("   Using fallback prices")
+        return get_fallback_prices()
+    except Exception as e:
+        print(f"⚠️  Error processing API data: {e}")
+        print("   Using fallback prices")
+        return get_fallback_prices()
+def get_fallback_prices():
+    """
+    Fallback prices when API is unavailable
+    Based on approximate market prices as of Dec 2024
+    Returns:
+    --------
+    dict
+        Dictionary of miner_name -> price (USD)
+    """
+    return {
+        's19pro': 2500,
+        's19jpro': 2200,
+        's19kpro': 3500,
+        's21': 5500,
+        'ka3': 5000,
+        't19': 2000,
+        's19xp': 4000,
+        's19apro': 2800,
+        'm50s': 3200,
+        'm53': 8000
+    }
+def get_fallback_price_for_miner(miner_name):
+    """Get fallback price for a specific miner"""
+    fallback = get_fallback_prices()
+    return fallback.get(miner_name, 2500)
+# For backward compatibility with your existing code
+FALLBACK_PRICES = get_fallback_prices()
+if __name__ == "__main__":
+    print("\n" + "="*60)
+    print("ASIC Price Fetcher Test")
+    print("="*60 + "\n")
+    # Try to get API key from environment variable
+    api_key = os.environ.get('HASHRATE_API_KEY')
+    if api_key:
+        print(f"Using API key: {api_key[:8]}...")
+    else:
+        print("No API key found in HASHRATE_API_KEY environment variable")
+        print("Set it with: export HASHRATE_API_KEY='your-key-here'")
+    # Fetch prices
+    prices = fetch_asic_price_index(api_key)
+    print("\n" + "="*60)
+    print("Current Miner Prices")
+    print("="*60)
+    for miner_name, price in sorted(prices.items()):
+        specs = MINER_SPECS.get(miner_name, {})
+        full_name = specs.get('full_name', miner_name)
+        hashrate = specs.get('hashrate', 0)
+        efficiency = specs.get('efficiency', 0)
+        efficiency_cat = EFFICIENCY_MAPPING.get(miner_name, 'unknown')
+        print(f"{full_name:25s} ({hashrate:3.0f} TH/s, {efficiency:4.1f} W/TH)")
+        print(f"  Category: {efficiency_cat:15s} → Price: ${price:,.2f}")
+        print()
+    print("="*60)

miner_specs.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""Miner specifications and electricity rates"""
+    # 's9': {'hashrate': 13.5, 'power': 1323, 'efficiency': 98, 'release_date': '2016-07-01', 'full_name': 'Antminer S9'},
+    # 's15': {'hashrate': 28, 'power': 1596, 'efficiency': 57, 'release_date': '2018-12-01', 'full_name': 'Antminer S15'},
+    # 's17pro': {'hashrate': 50, 'power': 1975, 'efficiency': 40, 'release_date': '2019-04-09', 'full_name': 'Antminer S17 Pro'},
+    # 'M32': {'hashrate': 62, 'power': 3348, 'efficiency': 54, 'release_date': '2019-09-01', 'full_name': 'WhatsMiner M32'},
+    # 's7': {'hashrate': 4.73, 'power': 1293, 'efficiency': 273, 'release_date': '2015-09-01', 'full_name': 'Antminer S7'},
+    # 't17': {'hashrate': 40, 'power': 2200, 'efficiency': 55, 'release_date': '2019-05-01', 'full_name': 'Antminer T17'},
+    # 'm21s': {'hashrate': 56, 'power': 3360, 'efficiency': 60, 'release_date': '2019-04-01', 'full_name': 'WhatsMiner M21S'},
+    # 'm10s': {'hashrate': 55, 'power': 3500, 'efficiency': 64, 'release_date': '2019-11-01', 'full_name': 'WhatsMiner M10S'},
+    # 'r4': {'hashrate': 8.7, 'power': 845, 'efficiency': 97, 'release_date': '2017-02-01', 'full_name': 'Antminer R4'},
+MINER_SPECS = {
+    's19pro': {'hashrate': 110, 'power': 3250, 'efficiency': 30, 'release_date': '2020-05-01', 'full_name': 'Antminer S19 Pro'},
+    's19jpro': {'hashrate': 100, 'power': 2950, 'efficiency': 30, 'release_date': '2021-06-01', 'full_name': 'Antminer S19j Pro'},
+    's19kpro': {'hashrate': 120, 'power': 2760, 'efficiency': 23, 'release_date': '2023-04-02', 'full_name': 'Antminer S19k Pro'},
+    's21': {'hashrate': 200, 'power': 3500, 'efficiency': 18, 'release_date': '2023-08-14', 'full_name': 'Antminer S21'},
+    'ka3': {'hashrate': 166, 'power': 3154, 'efficiency': 19, 'release_date': '2022-09-01', 'full_name': 'AvalonMiner KA3'},
+    't19': {'hashrate': 84, 'power': 3344, 'efficiency': 38, 'release_date': '2020-08-01', 'full_name': 'Antminer T19'},
+    's19xp': {'hashrate': 141, 'power': 3031, 'efficiency': 21, 'release_date': '2021-11-11', 'full_name': 'Antminer S19 XP'},
+    's19apro': {'hashrate': 104, 'power': 3250, 'efficiency': 31, 'release_date': '2021-11-01', 'full_name': 'Antminer S19a Pro'},
+    'm50s': {'hashrate': 136, 'power': 3264, 'efficiency': 24, 'release_date': '2022-12-01', 'full_name': 'WhatsMiner M50S'},
+    'm53': {'hashrate': 226, 'power': 6554, 'efficiency': 29, 'release_date': '2021-04-02', 'full_name': 'WhatsMiner M53'},
+    'm30s': {'hashrate': 112, 'power': 3472, 'efficiency': 31, 'release_date': '2020-10-01', 'full_name': 'WhatsMiner M30S'}
+}
+ELECTRICITY_RATES = {
+    'texas': 0.1549,
+    'china': 0.08,
+    'ethiopia': 0.01
+}
+def get_miner_list():
+    return list(MINER_SPECS.keys())

model.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+import torch.nn as nn
+import numpy as np
+class SpectralFeatureExtractor(nn.Module):
+    def __init__(self, num_features):
+        super().__init__()
+        self.complex_weight = nn.Parameter(torch.randn(num_features, 2, dtype=torch.float32) * 0.02)
+    def forward(self, x):
+        B, L, C = x.shape
+        x = x.transpose(1, 2)
+        x_fft = torch.fft.rfft(x, dim=2, norm="ortho")
+        weight = torch.view_as_complex(self.complex_weight)
+        x_weighted = x_fft * weight.unsqueeze(0).unsqueeze(-1)
+        x_out = torch.fft.irfft(x_weighted, n=L, dim=2, norm="ortho")
+        return x_out.transpose(1, 2)
+class ChannelMixing(nn.Module):
+    def __init__(self, num_features, reduction=4):
+        super().__init__()
+        self.fc1 = nn.Linear(num_features, num_features // reduction)
+        self.fc2 = nn.Linear(num_features // reduction, num_features)
+        self.act = nn.GELU()
+    def forward(self, x):
+        identity = x
+        x_pooled = x.mean(dim=1)
+        x_weighted = self.fc2(self.act(self.fc1(x_pooled)))
+        out = identity * x_weighted.unsqueeze(1)
+        return out
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, max_len=5000, dropout=0.2):
+        super().__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer("pe", pe)
+    def forward(self, x):
+        x = x + self.pe[:, : x.size(1), :]
+        return self.dropout(x)
+class MineROINet(nn.Module):
+    def __init__(self, input_dim, d_model=64, nhead=2, num_layers=2, dim_feedforward=256, dropout=0.2, num_classes=3, seq_len=30):
+        super().__init__()
+        self.spectral = SpectralFeatureExtractor(input_dim)
+        self.channel_mix = ChannelMixing(input_dim)
+        self.input_projection = nn.Linear(input_dim, d_model) if input_dim != d_model else nn.Identity()
+        self.pos_encoder = PositionalEncoding(d_model, max_len=seq_len, dropout=dropout)
+        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
+                                                     dropout=dropout, activation="gelu", batch_first=True)
+        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
+        self.classifier = nn.Sequential(
+            nn.LayerNorm(d_model),
+            nn.Dropout(dropout),
+            nn.Linear(d_model, d_model // 2),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(d_model // 2, num_classes),
+        )
+    def forward(self, seq):
+        seq = self.spectral(seq)
+        seq = self.channel_mix(seq)
+        seq = self.input_projection(seq)
+        seq = self.pos_encoder(seq)
+        z = self.transformer_encoder(seq)
+        pooled = z.mean(dim=1)
+        out = self.classifier(pooled)
+        return out
+def create_model_30day(input_dim, num_classes=3):
+    return MineROINet(input_dim=input_dim, d_model=64, nhead=2, num_layers=2,
+                       dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=30)
+# def create_model_60day(input_dim, num_classes=3):
+#     return MineROINet(input_dim=input_dim, d_model=64, nhead=4, num_layers=2,
+#                        dim_feedforward=256, dropout=0.2, num_classes=num_classes, seq_len=60)

predictor.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# predictor.py
+import os
+import torch
+import numpy as np
+import joblib
+from model import create_model_30day
+SCALER_PATHS = {
+    "texas": "scaler_texas.joblib",
+    "china": "scaler_china.joblib",
+    "ethiopia": "scaler_ethiopia.joblib",
+}
+class MineROIPredictor:
+    def __init__(self, model_path, device=None):
+        self.window_size = 30
+        self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.class_names = [
+            'Unprofitable (ROI ≤ 0)',
+            'Marginal (0 < ROI < 1)',
+            'Profitable (ROI ≥ 1)'
+        ]
+        # ✅ Load all scalers that were used in preprocessing
+        self.scalers = {}
+        for region, path in SCALER_PATHS.items():
+            if not os.path.exists(path):
+                raise FileNotFoundError(f"Scaler not found for {region}: {path}")
+            self.scalers[region] = joblib.load(path)
+        # Load model weights
+        state_dict = torch.load(model_path, map_location=self.device)
+        # Infer input_dim from spectral layer weights
+        # (works because you saved the full state_dict from training)
+        self.input_dim = state_dict['spectral.complex_weight'].shape[0]
+        # Build model with same hyperparams used in training
+        self.model = create_model_30day(self.input_dim)
+        self.model.load_state_dict(state_dict)
+        self.model.to(self.device)
+        self.model.eval()
+    def normalize_sequence(self, sequence: np.ndarray, region: str) -> np.ndarray:
+        """
+        sequence: shape (L, C)
+        region: 'texas', 'china', or 'ethiopia'
+        """
+        if region not in self.scalers:
+            raise ValueError(f"Unknown region '{region}'. Expected one of {list(self.scalers.keys())}")
+        scaler = self.scalers[region]
+        original_shape = sequence.shape   # (L, C)
+        seq_2d = sequence.reshape(-1, original_shape[-1])
+        # ✅ Only transform, never fit here
+        seq_scaled = scaler.transform(seq_2d)
+        return seq_scaled.reshape(original_shape)
+    def predict(self, sequence: np.ndarray, region: str):
+        """
+        sequence: np.ndarray of shape (L, C) with *raw* features (same as training CSV)
+        region: which country scaler to use
+        """
+        # 1) scale using the correct country’s scaler
+        sequence = self.normalize_sequence(sequence, region)
+        # 2) to torch: [B, C, L]
+        seq_tensor = torch.from_numpy(sequence).float().unsqueeze(0).to(self.device)  # (1, L, C)
+        with torch.no_grad():
+            logits = self.model(seq_tensor)
+            probabilities = torch.softmax(logits, dim=1)
+            predicted_class = torch.argmax(probabilities, dim=1).item()
+        probs = probabilities.cpu().numpy()[0]
+        return {
+            "predicted_class": predicted_class,
+            "predicted_label": self.class_names[predicted_class],
+            "probabilities": {
+                "unprofitable": float(probs[0]),
+                "marginal": float(probs[1]),
+                "profitable": float(probs[2]),
+            },
+            "confidence": float(probs[predicted_class]),
+        }

preprocessing.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""Data preprocessing and feature engineering"""
+import pandas as pd
+import numpy as np
+from miner_specs import MINER_SPECS, ELECTRICITY_RATES
+from fetch_blockchain_data import get_days_since_halving
+from electricity_prices import get_electricity_rate
+def engineer_features(blockchain_df):
+    """Engineer features from blockchain data - keep it simple"""
+    df = blockchain_df.copy().sort_values('date').reset_index(drop=True)
+    df['date'] = pd.to_datetime(df['date'])
+    # Just return as is, we'll select features later
+    return df
+def prepare_miner_features(blockchain_df, miner_name, miner_price, region='texas'):
+    """Add miner-specific features - EXACTLY 14 features"""
+    df = blockchain_df.copy()
+    specs = MINER_SPECS[miner_name]
+    # Keep only these columns from blockchain data
+    df = df[['date', 'bitcoin_price', 'difficulty', 'fees', 'hashrate', 'revenue', 'block_reward']].copy()
+    df['date'] = pd.to_datetime(df['date'])
+    # Add miner features
+    df['machine_price'] = miner_price
+    df['machine_hashrate'] = specs['hashrate']
+    df['power'] = specs['power']
+    df['efficiency'] = specs['efficiency']
+    # Calculate age_days (days since miner was released)
+    release_date = pd.to_datetime(specs['release_date'])
+    df['age_days'] = (df['date'] - release_date).dt.days
+    # Days since halving
+    df['days_since_halving'] = df['date'].apply(get_days_since_halving)
+    # Revenue potential
+    hashrate_hs = df['machine_hashrate'] * 1e12
+    btc_per_day = (hashrate_hs * 86400) / (df['difficulty'] * (2**32)) * (df['block_reward'] + (df['fees']/144))
+    df['Revenue_Potential'] = btc_per_day * df['bitcoin_price']
+    # Electricity rate
+    # df['electricity_rate'] = ELECTRICITY_RATES.get(region, 0.10)
+    df['electricity_rate'] = df['date'].dt.date.apply(
+        lambda day: get_electricity_rate(region, day)
+    )
+    return df
+def get_latest_sequence(blockchain_df, miner_name, miner_price, region='texas', window_size=30):
+    """Get the most recent sequence for prediction - EXACTLY 14 features in CORRECT ORDER"""
+    df_features = engineer_features(blockchain_df)
+    df_miner = prepare_miner_features(df_features, miner_name, miner_price, region)
+    # CRITICAL: This order MUST match your training data CSV exactly!
+    # Your training CSV: bitcoin_price,difficulty,fees,hashrate,revenue,machine_price,machine_hashrate,power,efficiency,block_reward,age_days,days_since_halving,Revenue_Potential,electricity_rate
+    feature_cols = [
+        'bitcoin_price',      # 1
+        'difficulty',         # 2
+        'fees',              # 3
+        'hashrate',          # 4
+        'revenue',           # 5
+        'machine_price',     # 6
+        'machine_hashrate',  # 7
+        'power',             # 8
+        'efficiency',        # 9
+        'block_reward',      # 10
+        'age_days',          # 11
+        'days_since_halving',# 12
+        'Revenue_Potential', # 13
+        'electricity_rate'   # 14
+    ]
+    df_miner = df_miner.dropna().reset_index(drop=True)
+    if len(df_miner) < window_size:
+        raise ValueError(f"Not enough data: need {window_size} days, have {len(df_miner)}")
+    # Get last window_size days with exactly 14 features
+    sequence = df_miner[feature_cols].values[-window_size:]
+    latest_date = df_miner['date'].iloc[-1]
+    # Verify shape
+    if sequence.shape[1] != 14:
+        raise ValueError(f"Expected 14 features, got {sequence.shape[1]}")
+    return sequence, feature_cols, latest_date
+if __name__ == "__main__":
+    from fetch_blockchain_data import get_latest_blockchain_data
+    print("\n" + "="*80)
+    print("TESTING PREPROCESSING PIPELINE")
+    print("="*80 + "\n")
+    # Fetch blockchain data
+    print("📡 Fetching blockchain data...")
+    blockchain_df = get_latest_blockchain_data(days=90)
+    if blockchain_df is None:
+        print("❌ Failed to fetch blockchain data")
+        exit(1)
+    print(f"✅ Fetched {len(blockchain_df)} days of data\n")
+    # Test configuration
+    miner_name = 's19pro'
+    miner_price = 2500
+    region = 'texas'
+    window_size = 30
+    print("⚙️  Test Configuration:")
+    print(f"   Miner: {MINER_SPECS[miner_name]['full_name']}")
+    print(f"   Price: ${miner_price:,}")
+    print(f"   Region: {region.title()}")
+    print(f"   Window: {window_size} days")
+    print(f"   Electricity: ${ELECTRICITY_RATES[region]}/kWh\n")
+    # Step 1: Engineer features
+    print("="*80)
+    print("STEP 1: ENGINEER FEATURES")
+    print("="*80)
+    df_engineered = engineer_features(blockchain_df)
+    print(f"✅ Engineered features")
+    print(f"   Shape: {df_engineered.shape}")
+    print(f"   Columns: {list(df_engineered.columns)}\n")
+    print("First 3 rows:")
+    print(df_engineered.head(3))
+    print("\nLast 3 rows:")
+    print(df_engineered.tail(3))
+    # Step 2: Prepare miner features
+    print("\n" + "="*80)
+    print("STEP 2: PREPARE MINER FEATURES")
+    print("="*80)
+    df_miner = prepare_miner_features(df_engineered, miner_name, miner_price, region)
+    print(f"✅ Added miner-specific features")
+    print(f"   Shape: {df_miner.shape}")
+    print(f"   Columns: {list(df_miner.columns)}\n")
+    print("Miner-specific values (constant across all days):")
+    print(f"   machine_hashrate: {df_miner['machine_hashrate'].iloc[0]} TH/s")
+    print(f"   power: {df_miner['power'].iloc[0]} W")
+    print(f"   efficiency: {df_miner['efficiency'].iloc[0]} W/TH")
+    print(f"   machine_price: ${df_miner['machine_price'].iloc[0]:,.2f}")
+    print(f"   electricity_rate: ${df_miner['electricity_rate'].iloc[0]:.4f}/kWh")
+    print("\nDynamic values (change over time):")
+    print(f"   age_days: {df_miner['age_days'].iloc[0]} → {df_miner['age_days'].iloc[-1]} days")
+    print(f"   days_since_halving: {df_miner['days_since_halving'].iloc[0]} → {df_miner['days_since_halving'].iloc[-1]} days")
+    print(f"   Revenue_Potential: ${df_miner['Revenue_Potential'].iloc[0]:.2f} → ${df_miner['Revenue_Potential'].iloc[-1]:.2f}/day")
+    print("\nFirst 3 rows:")
+    print(df_miner.head(3))
+    print("\nLast 3 rows:")
+    print(df_miner.tail(3))
+    # Step 3: Get latest sequence
+    print("\n" + "="*80)
+    print("STEP 3: GET LATEST SEQUENCE")
+    print("="*80)
+    sequence, feature_cols, latest_date = get_latest_sequence(blockchain_df, miner_name, miner_price, region, window_size)
+    print(f"✅ Created sequence for model")
+    print(f"   Shape: {sequence.shape}")
+    print(f"   Expected: ({window_size}, 14)")
+    print(f"   Latest date: {latest_date.strftime('%Y-%m-%d')}\n")
+    print("14 Features (in order):")
+    for i, col in enumerate(feature_cols, 1):
+        print(f"   {i:2d}. {col:25s} → First: {sequence[0, i-1]:>15.2f}  Last: {sequence[-1, i-1]:>15.2f}")
+    print("\n" + "="*80)
+    print("SEQUENCE STATISTICS")
+    print("="*80)
+    print("\nFirst day in sequence:")
+    for i, col in enumerate(feature_cols):
+        print(f"   {col:25s} = {sequence[0, i]:>15.2f}")
+    print(f"\nLast day in sequence (for prediction on {latest_date.strftime('%Y-%m-%d')}):")
+    for i, col in enumerate(feature_cols):
+        print(f"   {col:25s} = {sequence[-1, i]:>15.2f}")
+    # Show some statistics
+    print("\n" + "="*80)
+    print("FEATURE RANGES")
+    print("="*80)
+    for i, col in enumerate(feature_cols):
+        min_val = sequence[:, i].min()
+        max_val = sequence[:, i].max()
+        mean_val = sequence[:, i].mean()
+        print(f"{col:25s} → Min: {min_val:>12.2f}  Max: {max_val:>12.2f}  Mean: {mean_val:>12.2f}")
+    print("\n" + "="*80)
+    print("✅ PREPROCESSING PIPELINE TEST COMPLETE")
+    print("="*80 + "\n")