Spaces:

BaoKhuong
/

FinRobot-Forecaster-claude-4.1-opus

Build error

BaoKhuong commited on Sep 24

Commit

7da862f

verified ·

1 Parent(s): 7df24ba

Upload 2 files

Files changed (2) hide show

app1 - claude-4.1-opus.py CHANGED Viewed

@@ -143,6 +143,17 @@ def _get_llama_instance() -> Llama:
         return _LLAMA_INSTANCE
     if Llama is None:
         raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
     model_path = _resolve_fin_o1_gguf_path()
     def try_make_llama(ctx_size: int) -> Llama | None:

         return _LLAMA_INSTANCE
     if Llama is None:
         raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
+    # Basic version guard: Fin-o1-14B GGUFs require a recent llama.cpp
+    try:
+        ver = getattr(llama_cpp_mod, "__version__", "0.0.0") or "0.0.0"
+        def _ver_tuple(s: str):
+            parts = [p for p in s.split(".") if p.isdigit()]
+            return tuple(int(p) for p in (parts + ["0", "0"])[:3])
+        if _ver_tuple(ver) < _ver_tuple("0.3.0"):
+            print(f"⚠️ Detected llama-cpp-python {ver}. Recommend >= 0.3.0 for these GGUFs.")
+            print("   Try: pip install -U llama-cpp-python")
+    except Exception:
+        pass
     model_path = _resolve_fin_o1_gguf_path()
     def try_make_llama(ctx_size: int) -> Llama | None:

app2 - gpt-5-high.py CHANGED Viewed

@@ -28,9 +28,9 @@ warnings.filterwarnings('ignore', category=FutureWarning)
 # Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
 # Default to a non-Qwen LLaMA-arch model to ensure compatibility
-GGUF_REPO = os.getenv("GGUF_REPO", "MaziyarPanahi/Llama-3.1-8B-Instruct-GGUF")
 # Default to lighter quant to reduce RAM (can override via env)
-GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Llama-3.1-8B-Instruct.Q4_K_S.gguf")
 N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
 N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
 N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))

 # Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
 # Default to a non-Qwen LLaMA-arch model to ensure compatibility
+GGUF_REPO = os.getenv("GGUF_REPO", "QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF")
 # Default to lighter quant to reduce RAM (can override via env)
+GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Meta-Llama-3.1-8B-Instruct.Q4_K_S.gguf")
 N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
 N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
 N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))