MineROI-Net / electricity_prices.py
sithuWiki's picture
upload 7 .py files
f481275 verified
raw
history blame
4.12 kB
# electricity_prices.py
import os
from datetime import date as Date
from typing import Dict, Optional
import pandas as pd
from huggingface_hub import hf_hub_download
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
# Private dataset repo on Hugging Face containing the CSV files
HF_DATASET_REPO = "sithuWiki/electricity"
HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets
# Fallback / base rates used when a date is outside the CSV range
BASE_ELECTRICITY_RATES: Dict[str, float] = {
"texas": 0.1549,
"china": 0.08,
"ethiopia": 0.01,
}
# Mapping from region name -> CSV filename in the private dataset
REGION_FILES: Dict[str, str] = {
"texas": "texas_residential_daily_df.csv",
"china": "china_electricity_prices_daily.csv",
"ethiopia": "ethiopia_electricity_prices_daily.csv",
}
# In-memory cache: region -> pandas.Series indexed by python date with float prices
_ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {}
def _get_token() -> str:
token = os.environ.get(HF_DATASET_TOKEN_ENV)
if not token:
raise RuntimeError(
f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. "
"Add a read token for the private dataset to your Space secrets."
)
return token
def _load_region_series(region: str, filename: str) -> Optional[pd.Series]:
"""
Load a single region's CSV from the private HF dataset as a Series.
Expected columns in CSV:
- 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15')
- 'price' (electricity price per kWh)
"""
try:
token = _get_token()
file_path = hf_hub_download(
repo_id=HF_DATASET_REPO,
filename=filename,
repo_type="dataset",
token=token,
)
df = pd.read_csv(file_path)
if "date" not in df.columns or "price" not in df.columns:
raise ValueError(f"{filename} must contain 'date' and 'price' columns.")
# Normalize date to python date objects
df["date"] = pd.to_datetime(df["date"]).dt.date
df = df[["date", "price"]].copy()
df = df.sort_values("date")
series = df.set_index("date")["price"].astype(float)
return series
except Exception as e:
print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}")
return None
# Load all regions at import time (one-time cost)
for _region, _fname in REGION_FILES.items():
_ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname)
def get_electricity_rate(region: str, d) -> float:
"""
Return the electricity rate (USD/kWh) for a given region and date.
- If d is inside the CSV range, we use that day's price (or last available
before d, to handle gaps).
- If d is outside the CSV range or data is missing, we fall back to
BASE_ELECTRICITY_RATES[region].
"""
if region not in BASE_ELECTRICITY_RATES:
raise ValueError(
f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}"
)
# Normalise input date
if isinstance(d, pd.Timestamp):
d = d.date()
elif isinstance(d, str):
d = pd.to_datetime(d).date()
elif isinstance(d, Date):
pass # already ok
else:
raise TypeError(
f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str"
)
base_rate = BASE_ELECTRICITY_RATES[region]
series = _ELECTRICITY_SERIES.get(region)
if series is None or series.empty:
return base_rate
idx = series.index
# Outside known range → use base constant rate
if d < idx[0] or d > idx[-1]:
return base_rate
# Exact match
if d in series.index:
return float(series.loc[d])
# Otherwise, use the last available price before this date
prev = series.loc[:d]
if prev.empty:
return base_rate
return float(prev.iloc[-1])