Spaces:
Runtime error
Runtime error
init repo
Browse files- .gitignore +2 -0
- app.py +111 -0
- assets/example.png +0 -0
- assets/example2.png +0 -0
- requirements.txt +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data_dir/
|
| 2 |
+
raw_annotations
|
app.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import os
|
| 4 |
+
from uuid import uuid4
|
| 5 |
+
from threading import Lock
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import jsonlines
|
| 8 |
+
from huggingface_hub import CommitScheduler, snapshot_download
|
| 9 |
+
|
| 10 |
+
snapshot_download(repo_id="TideDra/HDBench", local_dir="./",repo_type="dataset",allow_patterns=["data_dir/*"])
|
| 11 |
+
|
| 12 |
+
JSON_DATASET_DIR = Path("raw_annotations")
|
| 13 |
+
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
| 14 |
+
|
| 15 |
+
JSON_DATASET_PATH = JSON_DATASET_DIR / f"{uuid4()}.json"
|
| 16 |
+
|
| 17 |
+
scheduler = CommitScheduler(
|
| 18 |
+
repo_id="HDBench",
|
| 19 |
+
repo_type="dataset",
|
| 20 |
+
folder_path=JSON_DATASET_DIR,
|
| 21 |
+
path_in_repo="data_dir/raw_annotations",
|
| 22 |
+
token=os.environ["HF_TOKEN"],
|
| 23 |
+
)
|
| 24 |
+
global dataset
|
| 25 |
+
with open("./data_dir/qwenvl_test2017.json") as f:
|
| 26 |
+
dataset = json.load(f)
|
| 27 |
+
|
| 28 |
+
dataset = {d['image']:d for d in dataset}
|
| 29 |
+
|
| 30 |
+
if os.path.exists("./data_dir/raw_annotations"):
|
| 31 |
+
exist_annotations = os.listdir("./data_dir/raw_annotations")
|
| 32 |
+
for anno in exist_annotations:
|
| 33 |
+
anno = os.path.join("./data_dir/raw_annotations",anno)
|
| 34 |
+
with jsonlines.open(anno) as reader:
|
| 35 |
+
for obj in reader:
|
| 36 |
+
if obj['image'] in dataset:
|
| 37 |
+
dataset.pop(obj['image'])
|
| 38 |
+
|
| 39 |
+
dataset = list(dataset.values())
|
| 40 |
+
|
| 41 |
+
dispatcher_lock = Lock()
|
| 42 |
+
|
| 43 |
+
def submit(name: str,answer:str,img:str) -> str:
|
| 44 |
+
global dataset
|
| 45 |
+
global JSON_DATASET_DIR
|
| 46 |
+
global JSON_DATASET_PATH
|
| 47 |
+
with scheduler.lock:
|
| 48 |
+
# file size should less than 5 mb
|
| 49 |
+
with JSON_DATASET_PATH.open("a") as f:
|
| 50 |
+
json.dump({"annotator": name, "image": img, "caption": answer}, f)
|
| 51 |
+
f.write("\n")
|
| 52 |
+
if JSON_DATASET_PATH.stat().st_size > 1024 * 1024 * 4:
|
| 53 |
+
JSON_DATASET_PATH = JSON_DATASET_DIR / f"{uuid4()}.json"
|
| 54 |
+
|
| 55 |
+
return None,None,f"Number of samples to be annotated: {len(dataset)}"
|
| 56 |
+
|
| 57 |
+
def disable_and_enable_buttons():
|
| 58 |
+
return gr.update(interactive=False),gr.update(interactive=True)
|
| 59 |
+
|
| 60 |
+
def get_new_data():
|
| 61 |
+
global dataset
|
| 62 |
+
with dispatcher_lock:
|
| 63 |
+
if len(dataset) == 0:
|
| 64 |
+
data = (None,None,None)
|
| 65 |
+
else:
|
| 66 |
+
data = dataset[-1]
|
| 67 |
+
dataset = dataset[:-1]
|
| 68 |
+
data = (data["image"],data["caption"],data["image"])
|
| 69 |
+
return data
|
| 70 |
+
|
| 71 |
+
instruction = """
|
| 72 |
+
# 任务说明:
|
| 73 |
+
|
| 74 |
+
给定一张图片和该图片的描述,描述中可能存在错误,你需要用<f>和</f>标签将错误部分标出,并紧接着在其后用<t>和</t>标签给出你的纠正。如果描述完全正确则不需要做任何修改。请在Your Name中填写你的昵称,以方便我们统计与审核你的贡献。点击Next按钮获取下一个样本,修改完描述后,点击Submit按钮提交你的标注。标注结果可在[此仓库](https://huggingface.co/datasets/TideDra/HDBench/tree/main/data_dir/raw_annotations)查看(每十分钟更新一次),请确保你的标注成功存储到了仓库中,我们最终根据仓库中的标注结果进行统计你的贡献。
|
| 75 |
+
|
| 76 |
+
标注时需满足一些要求,以下要求按优先级从高到低依次列出,当要求冲突时,满足高优先级要求:
|
| 77 |
+
1. 将错误部分替换为正确答案后,整体语句通顺,没有语法错误。反例: "It is a cat eating a \<f>mouse\</f>\<t>rice\</t>" 正例: "It is a cat eating \<f>a mouse\</f>\<t>rice\</t>"。解释: rice不可数,所以要把"a"替换掉。
|
| 78 |
+
2. 改动应尽量少,尽量不改变原来的句式。反例: "There are \<f>two people in the image\</f>\<t>three people in the image\</t>" 正例: "There are \<f>two\</f>\<t>three\</t> people in the image"。解释:只把two改成three,用最少的改动实现了纠错。
|
| 79 |
+
3. 标签内部不要有冗余空格。反例: "There are\<f> two \</f>\<t> three \</t> people in the image" 正例: "There are \<f>two\</f>\<t>three\</t> people in the image"
|
| 80 |
+
4. 当描述中提及图片中完全不存在或不相干的事物,应直接删除该部分.\<t>\</t>内部不加任何文字表示删除。例:"In the image, there is a dog. \<f>There are also some cats.\</f>\<t>\</t>"
|
| 81 |
+
"""
|
| 82 |
+
example1 = ["./assets/example.png","The image features a misty canal with two wooden benches placed alongside it. One of the benches is positioned closer to the water, while the other is a bit further back. The foggy atmosphere creates a sense of serenity and calmness, as if the benches are the only beings in the scene.\n\nIn the distance, there <f>are two cars</f><t>is one car</t> parked near a bridge, adding to the serene ambiance. <f>A person can be seen in the far end of the scene, likely enjoying the peaceful environment.</f><t></t> The bench placement and the misty canal make this scene an ideal spot for relaxation or reflection."]
|
| 83 |
+
example2 = ["./assets/example2.png","The image features a smiling stuffed lion sitting on a wooden picnic table. The picnic table is located in a park-like setting, with green grass surrounding the bench on which the stuffed lion is placed. \nThere <f>are two</f><t>is one</t> bench visible in the scene, with the main bench featuring the stuffed lion on it. <f>The other bench is situated a little to the right and is empty.</f><t></t>The arrangement creates a playful atmosphere, as if the lion is waiting for someone or enjoying the company of the empty bench."]
|
| 84 |
+
with gr.Blocks() as demo:
|
| 85 |
+
gr.Markdown(instruction)
|
| 86 |
+
with gr.Row():
|
| 87 |
+
image = gr.Image()
|
| 88 |
+
with gr.Column():
|
| 89 |
+
image_path = gr.State()
|
| 90 |
+
rest_num = gr.Markdown(label="rest_num")
|
| 91 |
+
name = gr.Textbox(label="Your Name",placeholder="Set your name here to mark your annotations")
|
| 92 |
+
text = gr.Textbox(label="Caption",lines=20)
|
| 93 |
+
with gr.Row():
|
| 94 |
+
next_button = gr.Button("Next")
|
| 95 |
+
submit_button = gr.Button("Submit",interactive=False)
|
| 96 |
+
gr.Markdown("# 示例:")
|
| 97 |
+
gr.Examples(
|
| 98 |
+
examples=[example1,example2],
|
| 99 |
+
inputs=[image,text]
|
| 100 |
+
)
|
| 101 |
+
submit_button.click(fn=submit,inputs=[name,text,image_path],outputs=[image,text,rest_num]).success(
|
| 102 |
+
fn=disable_and_enable_buttons,
|
| 103 |
+
outputs=[submit_button,next_button]
|
| 104 |
+
)
|
| 105 |
+
next_button.click(fn=get_new_data,outputs=[image_path,text,image]).success(
|
| 106 |
+
fn=disable_and_enable_buttons,
|
| 107 |
+
outputs=[next_button,submit_button]
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
demo.launch(share=True)
|
assets/example.png
ADDED
|
assets/example2.png
ADDED
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
jsonlines
|