Spaces:

waleko
/

TikZ-Assistant

Configuration error

App Files Files Community

waleko commited on May 24, 2024

Commit

755b6ea

1 Parent(s): 6786a25

change branding

Browse files

Files changed (4) hide show

pix2tikz/config.yaml +67 -0
pix2tikz/mixed_e362_step201.pth +3 -0
requirements.txt +2 -0
webui.py +38 -2

pix2tikz/config.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+backbone_layers:
+- 2
+- 3
+- 7
+batchsize: 12
+betas:
+- 0.9
+- 0.999
+bos_token: 1
+channels: 1
+config: colab.yaml
+data: dataset/data/simple_train.pkl
+debug: false
+decoder_args:
+  attn_on_attn: true
+  cross_attend: true
+  ff_glu: true
+  rel_pos_bias: false
+  use_scalenorm: false
+device: cuda:0
+dim: 256
+encoder_depth: 4
+encoder_structure: hybrid
+eos_token: 2
+epoch: 429
+epochs: 500
+gamma: 0.9995
+gpu_devices:
+- 0
+heads: 8
+id: v9h46w6a
+load_chkpt: /home/coder/project/LaTeX-OCR/weights.pth
+lr: 0.001
+lr_step: 30
+max_dimensions:
+- 336
+- 336
+max_height: 336
+max_seq_len: 2048
+max_width: 336
+min_dimensions:
+- 32
+- 32
+min_height: 32
+min_width: 32
+model_path: simple_checkpoints
+name: mixed
+no_cuda: false
+num_layers: 4
+num_tokens: 8000
+optimizer: Adam
+output_path: simple_outputs
+pad: false
+pad_token: 0
+patch_size: 16
+resume: false
+sample_freq: 201
+save_freq: 1
+scheduler: StepLR
+seed: 42
+temperature: 0.2
+test_samples: 5
+testbatchsize: 8
+tokenizer: dataset/tokenizer.json
+valbatches: 1
+valdata: dataset/data/simple_val.pkl
+wandb: true

pix2tikz/mixed_e362_step201.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eaae4b58f528da4eb5090f6addc717f1b135509f7e763100b90468ad9bfe1a8
+size 103619970

requirements.txt CHANGED Viewed

@@ -7,3 +7,5 @@ transformers
 gradio
 accelerate
 bitsandbytes

 gradio
 accelerate
 bitsandbytes
+altair<5
+pix2tex[api]

webui.py CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 from argparse import ArgumentParser
 from functools import lru_cache
@@ -15,10 +16,16 @@ import fitz
 import gradio as gr
 from transformers import TextIteratorStreamer, pipeline, ImageToTextPipeline, AutoModelForPreTraining, AutoProcessor
 from infer import TikzDocument, TikzGenerator
 # assets = files(__package__) / "assets" if __package__ else files("assets") / "."
 models = {
     "llava-1.5-7b-hf": "waleko/TikZ-llava-1.5-7b"
 }
@@ -43,6 +50,24 @@ def convert_to_svg(pdf):
     return doc[0].get_svg_image()
 def inference(
     model_name: str,
     image_dict: dict,
@@ -52,6 +77,11 @@ def inference(
     expand_to_square: bool,
 ):
     try:
         generate = TikzGenerator(
             cached_load(model_name, device_map="auto"),
             temperature=temperature,
@@ -66,7 +96,7 @@ def inference(
         )
         thread = ThreadPool(processes=1)
-        async_result = thread.apply_async(generate, kwds=dict(image=image_dict['composite'], streamer=streamer))
         generated_text = ""
         for new_text in streamer:
             generated_text += new_text
@@ -171,7 +201,13 @@ def build_ui(model=list(models)[0], lock=False, rasterize=False, force_light=Fal
                     with gr.TabItem(label:="Compiled Image", id=1):
                         result_image = gr.Image(label=label, show_label=False, show_share_button=rasterize)
                     clear_btn.add([tikz_code, result_image])
-        gr.Examples(examples=[["https://waleko.github.io/data/image.jpg"]], inputs=[image])
         events = list()
         finished = gr.Textbox(visible=False) # hack to cancel compile on canceled inference

 #!/usr/bin/env python
+import re
 from argparse import ArgumentParser
 from functools import lru_cache
 import gradio as gr
 from transformers import TextIteratorStreamer, pipeline, ImageToTextPipeline, AutoModelForPreTraining, AutoProcessor
+import os
+from pix2tex.cli import LatexOCR
+from munch import Munch
 from infer import TikzDocument, TikzGenerator
 # assets = files(__package__) / "assets" if __package__ else files("assets") / "."
 models = {
+    "pix2tikz": "pix2tikz/mixed_e362_step201.pth",
     "llava-1.5-7b-hf": "waleko/TikZ-llava-1.5-7b"
 }
     return doc[0].get_svg_image()
+def pix2tikz(
+    checkpoint: str,
+    image: Image.Image,
+    temperature: float,
+    _: float,
+    __: int,
+    ___: bool,
+):
+    args = Munch({'config': os.path.realpath(os.path.join(os.path.dirname(__file__), 'pix2tikz/config.yaml')),
+                  'checkpoint': os.path.realpath(os.path.join(os.path.dirname(__file__), checkpoint)),
+                  'no_resize': False,
+                  'temperature': temperature})
+    model = LatexOCR(args)
+    res = model(image)
+    text = re.sub(r'\\n(?=\W)', '\n', res)
+    return text, None, True
 def inference(
     model_name: str,
     image_dict: dict,
     expand_to_square: bool,
 ):
     try:
+        image = image_dict['composite']
+        if model_name == "pix2tikz":
+            yield pix2tikz(model_name, image, temperature, top_p, top_k, expand_to_square)
+            return
         generate = TikzGenerator(
             cached_load(model_name, device_map="auto"),
             temperature=temperature,
         )
         thread = ThreadPool(processes=1)
+        async_result = thread.apply_async(generate, kwds=dict(image=image, streamer=streamer))
         generated_text = ""
         for new_text in streamer:
             generated_text += new_text
                     with gr.TabItem(label:="Compiled Image", id=1):
                         result_image = gr.Image(label=label, show_label=False, show_share_button=rasterize)
                     clear_btn.add([tikz_code, result_image])
+        gr.Examples(examples=[
+            ["https://waleko.github.io/data/image.jpg",
+             "https://waleko.github.io/data/image2.jpg",
+             "https://waleko.github.io/data/image3.jpg"
+             "https://waleko.github.io/data/image4.jpg",
+             ]
+        ], inputs=[image])
         events = list()
         finished = gr.Textbox(visible=False) # hack to cancel compile on canceled inference