Spaces:
Running
Running
Update preprocessing.py
Browse files- preprocessing.py +13 -2
preprocessing.py
CHANGED
|
@@ -161,14 +161,25 @@ def get_latest_sequence(
|
|
| 161 |
"electricity_rate", # 14
|
| 162 |
]
|
| 163 |
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
if len(df_miner) < window_size:
|
| 167 |
raise ValueError(
|
| 168 |
-
f"Not enough data to build a {window_size}-day window
|
|
|
|
| 169 |
)
|
| 170 |
|
|
|
|
| 171 |
df_window = df_miner.tail(window_size).reset_index(drop=True)
|
|
|
|
| 172 |
sequence = df_window[feature_cols].values.astype(float)
|
| 173 |
pred_date = df_window["date"].iloc[-1]
|
| 174 |
|
|
|
|
| 161 |
"electricity_rate", # 14
|
| 162 |
]
|
| 163 |
|
| 164 |
+
# ---------------------------------------------------------
|
| 165 |
+
# Ensure we only use rows with complete core blockchain data.
|
| 166 |
+
# This avoids including a "today" row where, e.g., difficulty or
|
| 167 |
+
# hashrate are still NaN while price is already updated.
|
| 168 |
+
# ---------------------------------------------------------
|
| 169 |
+
df_miner = df_miner.sort_values("date").reset_index(drop=True)
|
| 170 |
+
|
| 171 |
+
core_cols = ["bitcoin_price", "difficulty", "fees", "hashrate", "revenue"]
|
| 172 |
+
df_miner = df_miner.dropna(subset=core_cols)
|
| 173 |
|
| 174 |
if len(df_miner) < window_size:
|
| 175 |
raise ValueError(
|
| 176 |
+
f"Not enough data to build a {window_size}-day window after dropping NaNs. "
|
| 177 |
+
f"Have {len(df_miner)} rows, need at least {window_size}."
|
| 178 |
)
|
| 179 |
|
| 180 |
+
# Take the last `window_size` fully-populated days
|
| 181 |
df_window = df_miner.tail(window_size).reset_index(drop=True)
|
| 182 |
+
|
| 183 |
sequence = df_window[feature_cols].values.astype(float)
|
| 184 |
pred_date = df_window["date"].iloc[-1]
|
| 185 |
|