Spaces:

TideDra
/

HDBenchAnnotator

Runtime error

App Files Files Community

TideDra commited on Feb 26, 2024

Commit

38601f1

1 Parent(s): f967ea9

init repo

Browse files

Files changed (5) hide show

.gitignore +2 -0
app.py +111 -0
assets/example.png +0 -0
assets/example2.png +0 -0
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ data_dir/
2	+ raw_annotations

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import json
+from pathlib import Path
+import os
+from uuid import uuid4
+from threading import Lock
+import gradio as gr
+import jsonlines
+from huggingface_hub import CommitScheduler, snapshot_download
+snapshot_download(repo_id="TideDra/HDBench", local_dir="./",repo_type="dataset",allow_patterns=["data_dir/*"])
+JSON_DATASET_DIR = Path("raw_annotations")
+JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
+JSON_DATASET_PATH = JSON_DATASET_DIR / f"{uuid4()}.json"
+scheduler = CommitScheduler(
+    repo_id="HDBench",
+    repo_type="dataset",
+    folder_path=JSON_DATASET_DIR,
+    path_in_repo="data_dir/raw_annotations",
+    token=os.environ["HF_TOKEN"],
+)
+global dataset
+with open("./data_dir/qwenvl_test2017.json") as f:
+    dataset = json.load(f)
+dataset = {d['image']:d for d in dataset}
+if os.path.exists("./data_dir/raw_annotations"):
+    exist_annotations = os.listdir("./data_dir/raw_annotations")
+    for anno in exist_annotations:
+        anno = os.path.join("./data_dir/raw_annotations",anno)
+        with jsonlines.open(anno) as reader:
+            for obj in reader:
+                if obj['image'] in dataset:
+                    dataset.pop(obj['image'])
+dataset = list(dataset.values())
+dispatcher_lock = Lock()
+def submit(name: str,answer:str,img:str) -> str:
+    global dataset
+    global JSON_DATASET_DIR
+    global JSON_DATASET_PATH
+    with scheduler.lock:
+        # file size should less than 5 mb
+        with JSON_DATASET_PATH.open("a") as f:
+            json.dump({"annotator": name, "image": img, "caption": answer}, f)
+            f.write("\n")
+        if JSON_DATASET_PATH.stat().st_size > 1024 * 1024 * 4:
+            JSON_DATASET_PATH = JSON_DATASET_DIR / f"{uuid4()}.json"
+    return None,None,f"Number of samples to be annotated: {len(dataset)}"
+def disable_and_enable_buttons():
+    return gr.update(interactive=False),gr.update(interactive=True)
+def get_new_data():
+    global dataset
+    with dispatcher_lock:
+        if len(dataset) == 0:
+            data = (None,None,None)
+        else:
+            data = dataset[-1]
+            dataset = dataset[:-1]
+            data = (data["image"],data["caption"],data["image"])
+    return data
+instruction = """
+# 任务说明:
+给定一张图片和该图片的描述，描述中可能存在错误，你需要用<f>和</f>标签将错误部分标出，并紧接着在其后用<t>和</t>标签给出你的纠正。如果描述完全正确则不需要做任何修改。请在Your Name中填写你的昵称，以方便我们统计与审核你的贡献。点击Next按钮获取下一个样本，修改完描述后，点击Submit按钮提交你的标注。标注结果可在[此仓库](https://huggingface.co/datasets/TideDra/HDBench/tree/main/data_dir/raw_annotations)查看（每十分钟更新一次），请确保你的标注成功存储到了仓库中，我们最终根据仓库中的标注结果进行统计你的贡献。
+标注时需满足一些要求，以下要求按优先级从高到低依次列出，当要求冲突时，满足高优先级要求：
+1. 将错误部分替换为正确答案后，整体语句通顺，没有语法错误。反例: "It is a cat eating a \<f>mouse\</f>\<t>rice\</t>" 正例: "It is a cat eating \<f>a mouse\</f>\<t>rice\</t>"。解释: rice不可数，所以要把"a"替换掉。
+2. 改动应尽量少,尽量不改变原来的句式。反例: "There are \<f>two people in the image\</f>\<t>three people in the image\</t>" 正例: "There are \<f>two\</f>\<t>three\</t> people in the image"。解释：只把two改成three，用最少的改动实现了纠错。
+3. 标签内部不要有冗余空格。反例: "There are\<f> two \</f>\<t> three \</t> people in the image" 正例: "There are \<f>two\</f>\<t>three\</t> people in the image"
+4. 当描述中提及图片中完全不存在或不相干的事物，应直接删除该部分.\<t>\</t>内部不加任何文字表示删除。例："In the image, there is a dog. \<f>There are also some cats.\</f>\<t>\</t>"
+"""
+example1 = ["./assets/example.png","The image features a misty canal with two wooden benches placed alongside it. One of the benches is positioned closer to the water, while the other is a bit further back. The foggy atmosphere creates a sense of serenity and calmness, as if the benches are the only beings in the scene.\n\nIn the distance, there <f>are two cars</f><t>is one car</t> parked near a bridge, adding to the serene ambiance. <f>A person can be seen in the far end of the scene, likely enjoying the peaceful environment.</f><t></t>  The bench placement and the misty canal make this scene an ideal spot for relaxation or reflection."]
+example2 = ["./assets/example2.png","The image features a smiling stuffed lion sitting on a wooden picnic table. The picnic table is located in a park-like setting, with green grass surrounding the bench on which the stuffed lion is placed. \nThere <f>are two</f><t>is one</t> bench visible in the scene, with the main bench featuring the stuffed lion on it. <f>The other bench is situated a little to the right and is empty.</f><t></t>The arrangement creates a playful atmosphere, as if the lion is waiting for someone or enjoying the company of the empty bench."]
+with gr.Blocks() as demo:
+    gr.Markdown(instruction)
+    with gr.Row():
+        image = gr.Image()
+        with gr.Column():
+            image_path = gr.State()
+            rest_num = gr.Markdown(label="rest_num")
+            name = gr.Textbox(label="Your Name",placeholder="Set your name here to mark your annotations")
+            text = gr.Textbox(label="Caption",lines=20)
+            with gr.Row():
+                next_button = gr.Button("Next")
+                submit_button = gr.Button("Submit",interactive=False)
+    gr.Markdown("# 示例:")
+    gr.Examples(
+        examples=[example1,example2],
+        inputs=[image,text]
+    )
+    submit_button.click(fn=submit,inputs=[name,text,image_path],outputs=[image,text,rest_num]).success(
+        fn=disable_and_enable_buttons,
+        outputs=[submit_button,next_button]
+    )
+    next_button.click(fn=get_new_data,outputs=[image_path,text,image]).success(
+        fn=disable_and_enable_buttons,
+        outputs=[next_button,submit_button]
+    )
+demo.launch(share=True)

assets/example.png ADDED Viewed

assets/example2.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ jsonlines