Spaces:

amaai-lab
/

MineROI-Net

Running

App Files Files Community

MineROI-Net / electricity_prices.py

sithuWiki

upload 7 .py files

f481275 verified 3 days ago

raw

history blame

4.12 kB

	# electricity_prices.py

	import os
	from datetime import date as Date
	from typing import Dict, Optional

	import pandas as pd
	from huggingface_hub import hf_hub_download

	# ---------------------------------------------------------------------------
	# Configuration
	# ---------------------------------------------------------------------------

	# Private dataset repo on Hugging Face containing the CSV files
	HF_DATASET_REPO = "sithuWiki/electricity"
	HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets

	# Fallback / base rates used when a date is outside the CSV range
	BASE_ELECTRICITY_RATES: Dict[str, float] = {
	"texas": 0.1549,
	"china": 0.08,
	"ethiopia": 0.01,
	}

	# Mapping from region name -> CSV filename in the private dataset
	REGION_FILES: Dict[str, str] = {
	"texas": "texas_residential_daily_df.csv",
	"china": "china_electricity_prices_daily.csv",
	"ethiopia": "ethiopia_electricity_prices_daily.csv",
	}

	# In-memory cache: region -> pandas.Series indexed by python date with float prices
	_ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {}


	def _get_token() -> str:
	token = os.environ.get(HF_DATASET_TOKEN_ENV)
	if not token:
	raise RuntimeError(
	f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. "
	"Add a read token for the private dataset to your Space secrets."
	)
	return token


	def _load_region_series(region: str, filename: str) -> Optional[pd.Series]:
	"""
	Load a single region's CSV from the private HF dataset as a Series.

	Expected columns in CSV:
	- 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15')
	- 'price' (electricity price per kWh)
	"""
	try:
	token = _get_token()
	file_path = hf_hub_download(
	repo_id=HF_DATASET_REPO,
	filename=filename,
	repo_type="dataset",
	token=token,
	)
	df = pd.read_csv(file_path)

	if "date" not in df.columns or "price" not in df.columns:
	raise ValueError(f"{filename} must contain 'date' and 'price' columns.")

	# Normalize date to python date objects
	df["date"] = pd.to_datetime(df["date"]).dt.date
	df = df[["date", "price"]].copy()
	df = df.sort_values("date")
	series = df.set_index("date")["price"].astype(float)
	return series
	except Exception as e:
	print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}")
	return None


	# Load all regions at import time (one-time cost)
	for _region, _fname in REGION_FILES.items():
	_ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname)


	def get_electricity_rate(region: str, d) -> float:
	"""
	Return the electricity rate (USD/kWh) for a given region and date.

	- If d is inside the CSV range, we use that day's price (or last available
	before d, to handle gaps).
	- If d is outside the CSV range or data is missing, we fall back to
	BASE_ELECTRICITY_RATES[region].
	"""
	if region not in BASE_ELECTRICITY_RATES:
	raise ValueError(
	f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}"
	)

	# Normalise input date
	if isinstance(d, pd.Timestamp):
	d = d.date()
	elif isinstance(d, str):
	d = pd.to_datetime(d).date()
	elif isinstance(d, Date):
	pass # already ok
	else:
	raise TypeError(
	f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str"
	)

	base_rate = BASE_ELECTRICITY_RATES[region]
	series = _ELECTRICITY_SERIES.get(region)

	if series is None or series.empty:
	return base_rate

	idx = series.index

	# Outside known range → use base constant rate
	if d < idx[0] or d > idx[-1]:
	return base_rate

	# Exact match
	if d in series.index:
	return float(series.loc[d])

	# Otherwise, use the last available price before this date
	prev = series.loc[:d]
	if prev.empty:
	return base_rate
	return float(prev.iloc[-1])