Spaces:

Derendering
/

Model-Output-Playground

Running

App Files Files Community

Charlie Li commited on Feb 26, 2024

Commit

988d509

1 Parent(s): 015a301

pregenerate samples

Browse files

Files changed (3) hide show

app.py +36 -65
requirements.txt +1 -0
utils.py +30 -0

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import random
 import datetime
 from utils import *
 from pathlib import Path
 file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
 filename = "derendering_supp.zip"
@@ -14,7 +17,7 @@ video_cache_dir.mkdir(exist_ok=True)
 download_file(file_url, filename)
 unzip_file(filename)
-print("Downloaded and unzipped the file.")
 diagram = get_svg_content("derendering_supp/derender_diagram.svg")
 org = get_svg_content("org/cor.svg")
@@ -51,43 +54,23 @@ sketches_base64_strings = {
     name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches
 }
-datasets = ["IAM", "IMGUR5K", "HierText"]
-models = ["Small-i", "Large-i", "Small-p"]
-query_modes = ["d+t", "r+d", "vanilla"]
-def pregenerate_videos():
-    for Dataset in datasets:
-        for Model in models:
-            inkml_path_base = f"./derendering_supp/{Model.lower()}_{Dataset}_inkml"
-            for mode in query_modes:
-                path = f"./derendering_supp/{Dataset}/images_sample"
-                if not os.path.exists(path):
-                    continue
-                samples = os.listdir(path)
-                selected_samples = random.sample(samples, len(samples) // 3)
-                for name in tqdm(
-                    selected_samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"
-                ):
-                    example_id = name.strip(".png")
-                    inkml_file = os.path.join(
-                        inkml_path_base, mode, f"{example_id}.inkml"
-                    )
-                    if not os.path.exists(inkml_file):
-                        continue
-                    video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
-                    video_filepath = video_cache_dir / video_filename
-                    if not video_filepath.exists():
-                        img_path = os.path.join(path, name)
-                        img = load_and_pad_img_dir(img_path)
-                        ink = inkml_to_ink(inkml_file)
-                        plot_ink_to_video(ink, str(video_filepath), input_image=img)
-pregenerate_videos()
-def demo(Dataset, Model, Output_Format):
     if Model == "Small-i":
         inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
     elif Model == "Small-p":
@@ -104,8 +87,6 @@ def demo(Dataset, Model, Output_Format):
         Dataset,
         "and model:",
         Model,
-        "with output format:",
-        Output_Format,
     )
     path = f"./derendering_supp/{Dataset}/images_sample"
     samples = os.listdir(path)
@@ -132,13 +113,10 @@ def demo(Dataset, Model, Output_Format):
             video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
             video_filepath = video_cache_dir / video_filename
-            if Output_Format == "Image+Video":
-                if not video_filepath.exists():
-                    plot_ink_to_video(ink, str(video_filepath), input_image=img)
-                    print("Cached video at:", video_filepath)
-                video_outputs.append("./" + str(video_filepath))
-            else:
-                video_outputs.append(None)
             fig, ax = plt.subplots()
             ax.axis("off")
@@ -152,13 +130,13 @@ def demo(Dataset, Model, Output_Format):
     return (
         img,
         text_outputs[0],
-        img_outputs[0],
         video_outputs[0],
         text_outputs[1],
-        img_outputs[1],
         video_outputs[1],
         text_outputs[2],
-        img_outputs[2],
         video_outputs[2],
     )
@@ -182,7 +160,6 @@ with gr.Blocks() as app:
         """
         🚀 This demo highlights the capabilities of Small-i, Small-p, and Large-i across three public datasets (word-level, with 100 random samples each).<br>
         🎲 Select a model variant and dataset (IAM, IMGUR5K, HierText), then hit 'Sample' to view a randomly selected input alongside its corresponding outputs for all three types of inference.<br>
-        🖼️ Output options: Image or Image+Video. Opting for images yields quicker results, adding videos offers a dynamic view of the digital ink writing process.<br>
         """
     )
     with gr.Row():
@@ -194,15 +171,12 @@ with gr.Blocks() as app:
             label="InkSight Model Variant",
             value="Small-i",
         )
-        output_format = gr.Dropdown(
-            ["Image", "Image+Video"], label="Output Format", value="Image"
-        )
         im = gr.Image(label="Input Image")
-    with gr.Row():
-        d_t_img = gr.Image(label="Derender with Text")
-        r_d_img = gr.Image(label="Recognize and Derender")
-        vanilla_img = gr.Image(label="Vanilla")
     with gr.Row():
         d_t_text = gr.Textbox(
@@ -210,9 +184,6 @@ with gr.Blocks() as app:
         )
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
-    gr.Markdown(
-        "To visualize the writing process in video, select *Output format* as **Image+Video**."
-    )
     with gr.Row():
         d_t_vid = gr.Video(
             label="Derender with Text (Click to stop/play)", autoplay=True
@@ -227,17 +198,17 @@ with gr.Blocks() as app:
     btn_sub.click(
         fn=demo,
-        inputs=[dataset, model, output_format],
         outputs=[
             im,
             d_t_text,
-            d_t_img,
             d_t_vid,
             r_d_text,
-            r_d_img,
             r_d_vid,
             vanilla_text,
-            vanilla_img,
             vanilla_vid,
         ],
     )

 import datetime
 from utils import *
 from pathlib import Path
+import gdown
+pre_generate = False
 file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
 filename = "derendering_supp.zip"
 download_file(file_url, filename)
 unzip_file(filename)
+print("Downloaded and unzipped the inks.")
 diagram = get_svg_content("derendering_supp/derender_diagram.svg")
 org = get_svg_content("org/cor.svg")
     name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches
 }
+if not pre_generate:
+    print("Downloading pre-generated videos from google drive.")
+    # Download from gdown 1oT6zw1EbWg3lavBMXsL28piULGNmqJzA
+    gdown.download(
+        "https://drive.google.com/uc?id=1oT6zw1EbWg3lavBMXsL28piULGNmqJzA",
+        str(video_cache_dir / "gdrive_file.zip"),
+        quiet=False,
+    )
+    # Unzip the file to video_cache_dir
+    unzip_file(str(video_cache_dir / "gdrive_file.zip"))
+else:
+    pregenerate_videos(video_cache_dir=video_cache_dir)
+    print("Videos cached.")
+def demo(Dataset, Model):
     if Model == "Small-i":
         inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
     elif Model == "Small-p":
         Dataset,
         "and model:",
         Model,
     )
     path = f"./derendering_supp/{Dataset}/images_sample"
     samples = os.listdir(path)
             video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
             video_filepath = video_cache_dir / video_filename
+            if not video_filepath.exists():
+                plot_ink_to_video(ink, str(video_filepath), input_image=img)
+                print("Cached video at:", video_filepath)
+            video_outputs.append("./" + str(video_filepath))
             fig, ax = plt.subplots()
             ax.axis("off")
     return (
         img,
         text_outputs[0],
+        # img_outputs[0],
         video_outputs[0],
         text_outputs[1],
+        # img_outputs[1],
         video_outputs[1],
         text_outputs[2],
+        # img_outputs[2],
         video_outputs[2],
     )
         """
         🚀 This demo highlights the capabilities of Small-i, Small-p, and Large-i across three public datasets (word-level, with 100 random samples each).<br>
         🎲 Select a model variant and dataset (IAM, IMGUR5K, HierText), then hit 'Sample' to view a randomly selected input alongside its corresponding outputs for all three types of inference.<br>
         """
     )
     with gr.Row():
             label="InkSight Model Variant",
             value="Small-i",
         )
         im = gr.Image(label="Input Image")
+    # with gr.Row():
+    #     d_t_img = gr.Image(label="Derender with Text")
+    #     r_d_img = gr.Image(label="Recognize and Derender")
+    #     vanilla_img = gr.Image(label="Vanilla")
     with gr.Row():
         d_t_text = gr.Textbox(
         )
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
     with gr.Row():
         d_t_vid = gr.Video(
             label="Derender with Text (Click to stop/play)", autoplay=True
     btn_sub.click(
         fn=demo,
+        inputs=[dataset, model],
         outputs=[
             im,
             d_t_text,
+            # d_t_img,
             d_t_vid,
             r_d_text,
+            # r_d_img,
             r_d_vid,
             vanilla_text,
+            # vanilla_img,
             vanilla_vid,
         ],
     )

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ numpy
 matplotlib
 Pillow
 numpy

 matplotlib
 Pillow
 numpy
+gdown

utils.py CHANGED Viewed

@@ -240,3 +240,33 @@ def parse_inkml_annotations(inkml_file):
         annotation_dict[annotation_type] = annotation_text
     return annotation_dict

         annotation_dict[annotation_type] = annotation_text
     return annotation_dict
+def pregenerate_videos(video_cache_dir):
+    datasets = ["IAM", "IMGUR5K", "HierText"]
+    models = ["Small-i", "Large-i", "Small-p"]
+    query_modes = ["d+t", "r+d", "vanilla"]
+    for Dataset in datasets:
+        for Model in models:
+            inkml_path_base = f"./derendering_supp/{Model.lower()}_{Dataset}_inkml"
+            for mode in query_modes:
+                path = f"./derendering_supp/{Dataset}/images_sample"
+                if not os.path.exists(path):
+                    continue
+                samples = os.listdir(path)
+                for name in tqdm(
+                    samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"
+                ):
+                    example_id = name.strip(".png")
+                    inkml_file = os.path.join(
+                        inkml_path_base, mode, f"{example_id}.inkml"
+                    )
+                    if not os.path.exists(inkml_file):
+                        continue
+                    video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
+                    video_filepath = video_cache_dir / video_filename
+                    if not video_filepath.exists():
+                        img_path = os.path.join(path, name)
+                        img = load_and_pad_img_dir(img_path)
+                        ink = inkml_to_ink(inkml_file)
+                        plot_ink_to_video(ink, str(video_filepath), input_image=img)