Spaces:

mrm8488
/

idefics-9b-ft-describe-diffusion-mj

Runtime error

App Files Files Community

Suggestion to simplify

by multimodalart HF Staff - opened Sep 25, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+23

-19

Files changed (7) hide show

.gitattributes +2 -0
0_6as5rHi0sgG4W2Tq.png +3 -0
1_sTXgMwDUW0pk-1yK4iHYFw.png +3 -0
app.py +15 -19
cute-photos-of-cats-in-grass-1593184777.jpg +0 -0
inZdRVn7eafZNvaVre2iW1a538.webp +0 -0
zoomout_2-1440x807.jpg +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+0_6as5rHi0sgG4W2Tq.png filter=lfs diff=lfs merge=lfs -text
+1_sTXgMwDUW0pk-1yK4iHYFw.png filter=lfs diff=lfs merge=lfs -text

0_6as5rHi0sgG4W2Tq.png ADDED Viewed

Git LFS Details

SHA256: 7429f34bb2a061c58f983697c8a06318e0d9a772e301ae0fee11b2fbd1b67de4
Pointer size: 132 Bytes
Size of remote file: 1.64 MB

1_sTXgMwDUW0pk-1yK4iHYFw.png ADDED Viewed

Git LFS Details

SHA256: e652ca3bead8f10de24efe201d3c60dc4d3a7a0d5196eee7091000e4818a9053
Pointer size: 132 Bytes
Size of remote file: 1.16 MB

app.py CHANGED Viewed

@@ -14,39 +14,35 @@ processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)
 model = model.to(device)
 model.eval()
-def predict(prompt, image_url, image_pil=None, max_length=64):
-    if image_pil is not None:
-        image = image_pil
-    else:
-        image = processor.image_processor.fetch_images(image_url)
     prompts = [[image, prompt]]
     inputs = processor(prompts[0], return_tensors="pt").to(device)
     generated_ids = model.generate(**inputs, max_length=max_length)
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return image, generated_text
 title = "Midjourney-like Image Captioning with IDEFICS"
 description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
 examples = [
-    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:0/1*sTXgMwDUW0pk-1yK4iHYFw.png", None, 64],
-    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:1400/0*6as5rHi0sgG4W2Tq.png", None, 64],
-    ["Describe the following image:", "https://cdn.arstechnica.net/wp-content/uploads/2023/06/zoomout_2-1440x807.jpg", None, 64],
-    ["Describe the following image:", "https://framerusercontent.com/images/inZdRVn7eafZNvaVre2iW1a538.png", None, 64],
-    ["Describe the following image:", "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg", None, 64]
 ]
 io = gr.Interface(fn=predict,
                   inputs=[
-                      gr.Textbox(label="Prompt", value="Describe the following image:", interactive=False),
-                      gr.Textbox(label="image URL", placeholder="Insert the URL of the image to be described"),
-                      gr.Image(label="or upload an image", type="pil"),
-                      gr.Slider(label="Max tokens", value=64, max=128, min=16, step=8)
                   ],
                   outputs=[
-                      gr.Image(type='pil', label="Image"),
                       gr.Textbox(label="IDEFICS Description")
                   ],
                   title=title, description=description, examples=examples,

 model = model.to(device)
 model.eval()
+#Pre-determined best prompt for this fine-tune
+prompt="Describe the following image:"
+#Max generated tokens for your prompt
+max_length=64
+def predict(image):
     prompts = [[image, prompt]]
     inputs = processor(prompts[0], return_tensors="pt").to(device)
     generated_ids = model.generate(**inputs, max_length=max_length)
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    generated_text = generated_text.replace(prompt,"")
+    return generated_text
 title = "Midjourney-like Image Captioning with IDEFICS"
 description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
 examples = [
+    ["1_sTXgMwDUW0pk-1yK4iHYFw.png"],
+    ["0_6as5rHi0sgG4W2Tq.png"],
+    ["zoomout_2-1440x807.jpg"],
+    ["inZdRVn7eafZNvaVre2iW1a538.webp"],
+    ["cute-photos-of-cats-in-grass-1593184777.jpg"]
 ]
 io = gr.Interface(fn=predict,
                   inputs=[
+                      gr.Image(label="Upload an image", type="pil"),
                   ],
                   outputs=[
                       gr.Textbox(label="IDEFICS Description")
                   ],
                   title=title, description=description, examples=examples,

cute-photos-of-cats-in-grass-1593184777.jpg ADDED Viewed

inZdRVn7eafZNvaVre2iW1a538.webp ADDED Viewed

zoomout_2-1440x807.jpg ADDED Viewed