Spaces:
Running
Running
| # electricity_prices.py | |
| import os | |
| from datetime import date as Date | |
| from typing import Dict, Optional | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| # --------------------------------------------------------------------------- | |
| # Configuration | |
| # --------------------------------------------------------------------------- | |
| # Private dataset repo on Hugging Face containing the CSV files | |
| HF_DATASET_REPO = "sithuWiki/electricity" | |
| HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets | |
| # Fallback / base rates used when a date is outside the CSV range | |
| BASE_ELECTRICITY_RATES: Dict[str, float] = { | |
| "texas": 0.1549, | |
| "china": 0.08, | |
| "ethiopia": 0.01, | |
| } | |
| # Mapping from region name -> CSV filename in the private dataset | |
| REGION_FILES: Dict[str, str] = { | |
| "texas": "texas_residential_daily_df.csv", | |
| "china": "china_electricity_prices_daily.csv", | |
| "ethiopia": "ethiopia_electricity_prices_daily.csv", | |
| } | |
| # In-memory cache: region -> pandas.Series indexed by python date with float prices | |
| _ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {} | |
| def _get_token() -> str: | |
| token = os.environ.get(HF_DATASET_TOKEN_ENV) | |
| if not token: | |
| raise RuntimeError( | |
| f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. " | |
| "Add a read token for the private dataset to your Space secrets." | |
| ) | |
| return token | |
| def _load_region_series(region: str, filename: str) -> Optional[pd.Series]: | |
| """ | |
| Load a single region's CSV from the private HF dataset as a Series. | |
| Expected columns in CSV: | |
| - 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15') | |
| - 'price' (electricity price per kWh) | |
| """ | |
| try: | |
| token = _get_token() | |
| file_path = hf_hub_download( | |
| repo_id=HF_DATASET_REPO, | |
| filename=filename, | |
| repo_type="dataset", | |
| token=token, | |
| ) | |
| df = pd.read_csv(file_path) | |
| if "date" not in df.columns or "price" not in df.columns: | |
| raise ValueError(f"{filename} must contain 'date' and 'price' columns.") | |
| # Normalize date to python date objects | |
| df["date"] = pd.to_datetime(df["date"]).dt.date | |
| df = df[["date", "price"]].copy() | |
| df = df.sort_values("date") | |
| series = df.set_index("date")["price"].astype(float) | |
| return series | |
| except Exception as e: | |
| print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}") | |
| return None | |
| # Load all regions at import time (one-time cost) | |
| for _region, _fname in REGION_FILES.items(): | |
| _ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname) | |
| def get_electricity_rate(region: str, d) -> float: | |
| """ | |
| Return the electricity rate (USD/kWh) for a given region and date. | |
| - If d is inside the CSV range, we use that day's price (or last available | |
| before d, to handle gaps). | |
| - If d is outside the CSV range or data is missing, we fall back to | |
| BASE_ELECTRICITY_RATES[region]. | |
| """ | |
| if region not in BASE_ELECTRICITY_RATES: | |
| raise ValueError( | |
| f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}" | |
| ) | |
| # Normalise input date | |
| if isinstance(d, pd.Timestamp): | |
| d = d.date() | |
| elif isinstance(d, str): | |
| d = pd.to_datetime(d).date() | |
| elif isinstance(d, Date): | |
| pass # already ok | |
| else: | |
| raise TypeError( | |
| f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str" | |
| ) | |
| base_rate = BASE_ELECTRICITY_RATES[region] | |
| series = _ELECTRICITY_SERIES.get(region) | |
| if series is None or series.empty: | |
| return base_rate | |
| idx = series.index | |
| # Outside known range → use base constant rate | |
| if d < idx[0] or d > idx[-1]: | |
| return base_rate | |
| # Exact match | |
| if d in series.index: | |
| return float(series.loc[d]) | |
| # Otherwise, use the last available price before this date | |
| prev = series.loc[:d] | |
| if prev.empty: | |
| return base_rate | |
| return float(prev.iloc[-1]) | |