| import os | |
| captions = [] | |
| urls = [] | |
| with open('Train_GCC-training.tsv') as fp: | |
| for cnt, line in enumerate(fp): | |
| s = line.split('\t') | |
| captions.append(s[0].split(' ')) | |
| urls.append(s[1][:-1]) | |
| with open('train4download.txt', 'w') as fp: | |
| for cnt, url in enumerate(urls): | |
| fp.write("../train_image/{:08d}.jpg\t\"{}\"\n".format(cnt, url)) | |
| if not os.path.exists('../train_image'): | |
| os.makedirs('../train_image') |