Spaces:

sakaltcommunity
/

Pythiajapneser

Sleeping

App Files Files Community

Sakalti commited on Oct 10, 2024

Commit

1cd0800

verified ·

1 Parent(s): 48fa0bf

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
+from datasets import load_dataset
+import os
+def train_and_deploy(write_token, repo_name, license_text):
+    # トークンを環境変数に設定
+    os.environ['HF_WRITE_TOKEN'] = write_token
+    # ライセンスファイルを作成
+    with open("LICENSE", "w") as f:
+        f.write(license_text)
+    # モデルとトークナイザーの読み込み
+    model_name = "HuggingfaceH4/zephyr-7b-beta"  # トレーニング対象のモデル
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # 日本語データセットの読み込み
+    dataset = load_dataset("Sakalti/hachiwari")
+    # データセットのトークン化
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
+    tokenized_datasets = dataset.map(tokenize_function, batched=True)
+    # トレーニング設定
+    training_args = TrainingArguments(
+        output_dir="./results",
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        evaluation_strategy="epoch",
+        save_strategy="epoch",
+        logging_dir="./logs",
+        logging_steps=10,
+        num_train_epochs=3,  # トレーニングエポック数
+        push_to_hub=True,  # Hugging Face Hubにプッシュ
+        hub_token=write_token,
+        hub_model_id=repo_name  # ユーザーが入力したリポジトリ名
+    )
+    # Trainerの設定
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_datasets["train"],
+        eval_dataset=tokenized_datasets["test"],
+    )
+    # トレーニング実行
+    trainer.train()
+    # モデルをHugging Face Hubにプッシュ
+    trainer.push_to_hub()
+    return f"モデルが'{repo_name}'リポジトリにデプロイされました！"
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("### Zephyr-7B モデルの日本語特化トレーニングとデプロイ")
+    token_input = gr.Textbox(label="Hugging Face Write Token", placeholder="トークンを入力してください...")
+    repo_input = gr.Textbox(label="リポジトリ名", placeholder="デプロイするリポジトリ名を入力してください...")
+    license_input = gr.Textbox(label="ライセンス", placeholder="ライセンス情報を入力してください...")
+    output = gr.Textbox(label="出力")
+    train_button = gr.Button("デプロイ")
+    train_button.click(fn=train_and_deploy, inputs=[token_input, repo_input, license_input], outputs=output)
+demo.launch()