Spaces:

codemichaeld
/

new01

Running

File size: 8,091 Bytes

import gradio as gr
import os
import tempfile
import shutil
import subprocess
import re
import json
import datetime
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download
from safetensors.torch import save_file
import torch

# --- Utility: GGUF to FP8 Safetensors using gguf-connector CLI ---
def convert_gguf_to_fp8_safetensors(gguf_path, output_dir, progress=gr.Progress()):
    """
    Uses gguf-connector CLI to convert a GGUF file to FP8 safetensors.
    Requires 'gguf-connector' and 'torch' installed.
    """
    progress(0.1, desc="Starting GGUF to FP8 conversion...")

    try:
        # Ensure gguf-connector is installed by checking for the 'ggc' command
        subprocess.run(["ggc", "--version"], check=True, capture_output=True)
        
        # Build command: ggc t3a (GGUF → safetensors), then q8 (safetensors → FP8)
        temp_safetensors_dir = tempfile.mkdtemp()
        safetensors_path = os.path.join(temp_safetensors_dir, "intermediate.safetensors")
        fp8_safetensors_path = os.path.join(output_dir, "model.safetensors")

        progress(0.3, desc="Converting GGUF to Safetensors...")
        # Step 1: GGUF → Safetensors
        # CORRECTED: Using 't3a' subcommand and positional arguments
        result1 = subprocess.run(
            ["ggc", "t3a", gguf_path, safetensors_path],
            capture_output=True,
            text=True
        )
        if result1.returncode != 0:
            raise RuntimeError(f"GGUF to Safetensors failed: {result1.stderr}")

        progress(0.6, desc="Quantizing Safetensors to FP8...")
        # Step 2: Safetensors → FP8 Safetensors
        # CORRECTED: Using 'q8' subcommand for FP8 quantization and positional arguments
        result2 = subprocess.run(
            ["ggc", "q8", safetensors_path, fp8_safetensors_path],
            capture_output=True,
            text=True
        )
        if result2.returncode != 0:
            raise RuntimeError(f"Safetensors to FP8 failed: {result2.stderr}")

        # Create minimal config.json and tokenizer.json
        config_path = os.path.join(output_dir, "config.json")
        with open(config_path, "w") as f:
            json.dump({
                "model_type": "qwen",
                "quantization": "fp8",
                "architectures": ["QwenForCausalLM"]
            }, f)

        tokenizer_path = os.path.join(output_dir, "tokenizer.json")
        with open(tokenizer_path, "w") as f:
            json.dump({"model_type": "qwen", "vocab_size": 152064}, f)

        progress(1.0, desc="Conversion to FP8 Safetensors complete!")
        return True, "Conversion successful."

    except Exception as e:
        return False, str(e)
    finally:
        if 'temp_safetensors_dir' in locals():
            shutil.rmtree(temp_safetensors_dir, ignore_errors=True)

# --- Main Processing Function ---
def process_and_upload(gguf_url, hf_token, new_repo_id, private_repo, progress=gr.Progress()):
    if not all([gguf_url, hf_token, new_repo_id]):
        return None, "❌ Error: Please fill in all fields.", ""

    if not re.match(r"^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$", new_repo_id):
        return None, "❌ Error: Invalid repository ID format. Use 'username/model-name'.", ""

    temp_download_dir = tempfile.mkdtemp()
    final_output_dir = tempfile.mkdtemp()

    try:
        # Authenticate
        progress(0.05, desc="Logging into Hugging Face...")
        api = HfApi(token=hf_token)
        user_info = api.whoami()
        user_name = user_info['name']
        progress(0.1, desc=f"Logged in as {user_name}.")

        # Parse URL
        clean_url = gguf_url.strip()
        if "huggingface.co" not in clean_url:
            return None, "❌ Error: URL must be from Hugging Face.", ""
        parts = clean_url.replace("https://huggingface.co/", "").split("/")
        if len(parts) < 3 or not parts[-1].endswith(".gguf"):
            return None, "❌ Error: Invalid GGUF URL format.", ""
        repo_id = "/".join(parts[:2])
        filename = parts[-1]

        # Download
        progress(0.15, desc="Downloading GGUF file...")
        gguf_path = hf_hub_download(
            repo_id=repo_id,
            filename=filename,
            cache_dir=temp_download_dir,
            resume_download=True,
            token=hf_token
        )
        progress(0.3, desc=f"Downloaded '{filename}'.")

        # Convert
        success, msg = convert_gguf_to_fp8_safetensors(gguf_path, final_output_dir, progress)
        if not success:
            return None, f"❌ Conversion failed: {msg}", ""

        progress(0.8, desc="Preparing upload...")

        # Create repo
        repo_url = api.create_repo(
            repo_id=new_repo_id,
            private=private_repo,
            repo_type="model",
            exist_ok=True
        )

        # Generate README
        readme_content = f"""---
license: other
library_name: transformers
tags:
- gguf
- fp8
- safetensors
- converted-by-gradio
- gguf-to-fp8
model-index:
- name: {new_repo_id.split('/')[-1]}
  results: []
---

# Model Card for {new_repo_id}

Converted from GGUF:
- **Source:** `{gguf_url}`
- **Filename:** `{filename}`

## Conversion
Dequantized from GGUF and requantized to **FP8** using `gguf-connector`.
- **Converted by:** {user_name}
- **Date:** {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
        with open(os.path.join(final_output_dir, "README.md"), "w", encoding="utf-8") as f:
            f.write(readme_content)

        # Upload
        progress(0.9, desc="Uploading to Hugging Face Hub...")
        api.upload_folder(
            repo_id=new_repo_id,
            folder_path=final_output_dir,
            repo_type="model",
            token=hf_token,
            commit_message="Upload FP8 Safetensors model converted via gguf-connector"
        )

        progress(1.0, desc="✅ Upload complete!")
        result_html = f"""
✅ Success!
Your FP8 Safetensors model is ready.

**Repository:** [{new_repo_id}](https://huggingface.co/{new_repo_id})  
**Visibility:** {'Private' if private_repo else 'Public'}
"""
        return gr.HTML(result_html), "✅ Conversion and upload completed!", ""

    except Exception as e:
        return None, f"❌ Unexpected error: {str(e)}", ""
    finally:
        shutil.rmtree(temp_download_dir, ignore_errors=True)
        shutil.rmtree(final_output_dir, ignore_errors=True)

# --- Gradio Interface ---
with gr.Blocks(title="GGUF → FP8 Safetensors Converter") as demo:
    gr.Markdown("# 🔄 GGUF to FP8 Safetensors Converter")
    gr.Markdown("Uses `gguf-connector` to dequantize GGUF → Safetensors → FP8, then uploads to your Hugging Face account.")

    with gr.Row():
        with gr.Column():
            gguf_url = gr.Textbox(
                label="GGUF File URL",
                placeholder="https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/qwen3-4b.Q5_K_M.gguf",
                info="Must be a direct .gguf file URL from Hugging Face."
            )
            hf_token = gr.Textbox(
                label="Hugging Face Token",
                type="password",
                info="Token with write access. Get it at https://huggingface.co/settings/tokens"
            )
        with gr.Column():
            new_repo_id = gr.Textbox(
                label="New Repository ID",
                placeholder="your-username/qwen3-4b-fp8",
                info="Format: username/model-name"
            )
            private_repo = gr.Checkbox(label="Make Repository Private", value=False)

    convert_btn = gr.Button("🚀 Convert & Upload", variant="primary")

    with gr.Row():
        status_output = gr.Markdown()
        repo_link_output = gr.HTML()

    convert_btn.click(
        fn=process_and_upload,
        inputs=[gguf_url, hf_token, new_repo_id, private_repo],
        outputs=[repo_link_output, status_output],
        show_progress=True
    )

    gr.Examples(
        examples=[
            ["https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/qwen3-4b.Q5_K_M.gguf"]
        ],
        inputs=[gguf_url]
    )

demo.launch()