sithuWiki commited on
Commit
430a480
·
verified ·
1 Parent(s): ae87734

Update preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +13 -2
preprocessing.py CHANGED
@@ -161,14 +161,25 @@ def get_latest_sequence(
161
  "electricity_rate", # 14
162
  ]
163
 
164
- df_miner = df_miner.dropna().reset_index(drop=True)
 
 
 
 
 
 
 
 
165
 
166
  if len(df_miner) < window_size:
167
  raise ValueError(
168
- f"Not enough data to build a {window_size}-day window, got {len(df_miner)} rows."
 
169
  )
170
 
 
171
  df_window = df_miner.tail(window_size).reset_index(drop=True)
 
172
  sequence = df_window[feature_cols].values.astype(float)
173
  pred_date = df_window["date"].iloc[-1]
174
 
 
161
  "electricity_rate", # 14
162
  ]
163
 
164
+ # ---------------------------------------------------------
165
+ # Ensure we only use rows with complete core blockchain data.
166
+ # This avoids including a "today" row where, e.g., difficulty or
167
+ # hashrate are still NaN while price is already updated.
168
+ # ---------------------------------------------------------
169
+ df_miner = df_miner.sort_values("date").reset_index(drop=True)
170
+
171
+ core_cols = ["bitcoin_price", "difficulty", "fees", "hashrate", "revenue"]
172
+ df_miner = df_miner.dropna(subset=core_cols)
173
 
174
  if len(df_miner) < window_size:
175
  raise ValueError(
176
+ f"Not enough data to build a {window_size}-day window after dropping NaNs. "
177
+ f"Have {len(df_miner)} rows, need at least {window_size}."
178
  )
179
 
180
+ # Take the last `window_size` fully-populated days
181
  df_window = df_miner.tail(window_size).reset_index(drop=True)
182
+
183
  sequence = df_window[feature_cols].values.astype(float)
184
  pred_date = df_window["date"].iloc[-1]
185