Spaces:
Build error
Build error
Upload 2 files
Browse files- app1 - claude-4.1-opus.py +11 -0
- app2 - gpt-5-high.py +2 -2
app1 - claude-4.1-opus.py
CHANGED
|
@@ -143,6 +143,17 @@ def _get_llama_instance() -> Llama:
|
|
| 143 |
return _LLAMA_INSTANCE
|
| 144 |
if Llama is None:
|
| 145 |
raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
model_path = _resolve_fin_o1_gguf_path()
|
| 147 |
|
| 148 |
def try_make_llama(ctx_size: int) -> Llama | None:
|
|
|
|
| 143 |
return _LLAMA_INSTANCE
|
| 144 |
if Llama is None:
|
| 145 |
raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
|
| 146 |
+
# Basic version guard: Fin-o1-14B GGUFs require a recent llama.cpp
|
| 147 |
+
try:
|
| 148 |
+
ver = getattr(llama_cpp_mod, "__version__", "0.0.0") or "0.0.0"
|
| 149 |
+
def _ver_tuple(s: str):
|
| 150 |
+
parts = [p for p in s.split(".") if p.isdigit()]
|
| 151 |
+
return tuple(int(p) for p in (parts + ["0", "0"])[:3])
|
| 152 |
+
if _ver_tuple(ver) < _ver_tuple("0.3.0"):
|
| 153 |
+
print(f"⚠️ Detected llama-cpp-python {ver}. Recommend >= 0.3.0 for these GGUFs.")
|
| 154 |
+
print(" Try: pip install -U llama-cpp-python")
|
| 155 |
+
except Exception:
|
| 156 |
+
pass
|
| 157 |
model_path = _resolve_fin_o1_gguf_path()
|
| 158 |
|
| 159 |
def try_make_llama(ctx_size: int) -> Llama | None:
|
app2 - gpt-5-high.py
CHANGED
|
@@ -28,9 +28,9 @@ warnings.filterwarnings('ignore', category=FutureWarning)
|
|
| 28 |
|
| 29 |
# Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
|
| 30 |
# Default to a non-Qwen LLaMA-arch model to ensure compatibility
|
| 31 |
-
GGUF_REPO = os.getenv("GGUF_REPO", "
|
| 32 |
# Default to lighter quant to reduce RAM (can override via env)
|
| 33 |
-
GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Llama-3.1-8B-Instruct.Q4_K_S.gguf")
|
| 34 |
N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
|
| 35 |
N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
|
| 36 |
N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))
|
|
|
|
| 28 |
|
| 29 |
# Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
|
| 30 |
# Default to a non-Qwen LLaMA-arch model to ensure compatibility
|
| 31 |
+
GGUF_REPO = os.getenv("GGUF_REPO", "QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF")
|
| 32 |
# Default to lighter quant to reduce RAM (can override via env)
|
| 33 |
+
GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Meta-Llama-3.1-8B-Instruct.Q4_K_S.gguf")
|
| 34 |
N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
|
| 35 |
N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
|
| 36 |
N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))
|