| import json | |
| import os | |
| subsets = ["train", "val", "test"] | |
| save_path = 'msvd_dataset/new_annotations' | |
| subset = subsets[1] | |
| videoindex = open("msvd_dataset/txt_labels/youtube_mapping.txt", 'r').readlines() | |
| sentence_count = 1 | |
| for subset in subsets: | |
| name2idx = dict() | |
| idx2name = dict() | |
| for v in videoindex: | |
| name2idx[v.split()[0]] = v.split()[1] | |
| idx2name[v.split()[1]] = v.split()[0] | |
| images_field = [] | |
| annotations_field = [] | |
| visited_imames = set() | |
| txtfile = "msvd_dataset/txt_labels/sents_{}_lc_nopunc.txt".format(subset) | |
| capinfos = open(txtfile, 'r').readlines() | |
| for caption in capinfos: | |
| vidindex = caption.split('\t')[0] | |
| if vidindex not in visited_imames: | |
| visited_imames.add(vidindex) | |
| images_field.append( | |
| { | |
| "id": int(vidindex.replace('vid', '')), | |
| "file_name": idx2name[vidindex] | |
| } | |
| ) | |
| annotations_field.append( | |
| { | |
| "image_id":int(caption.split()[0].replace('vid', '')), | |
| "id": sentence_count, | |
| "caption": caption.split('\t')[1].strip() | |
| } | |
| ) | |
| sentence_count += 1 | |
| data = { | |
| "images": images_field, | |
| "annotations": annotations_field | |
| } | |
| json.dump(data, open(os.path.join(save_path, "caption_msvd_{}_cocostyle.json".format(subset)), "w")) |