Spaces:

lillab-demos
/

cogen

Sleeping

App Files Files Community

momergul commited on Sep 24, 2024

Commit

d1a5104

1 Parent(s): 72c2e5e

Pushed all model initialization to the main app

Browse files

Files changed (1) hide show

app.py +45 -11

app.py CHANGED Viewed

@@ -8,7 +8,45 @@ from typing import List, Tuple
 from config_generator import generate_complete_game
 from dataset import get_processor, joint_speaker_input, joint_listener_input, get_index_to_token
-from models import get_model
 css="""
 .radio-group .wrap {
@@ -70,9 +108,8 @@ def get_model_response(
 @spaces.GPU(duration=20)
 def get_speaker_response(model, images, input_tokens, attn_mask, image_attn_mask, label, image_paths, processor, img_dir, index_to_token, adapter_name):
-    model.model.set_adapter(adapter_name)
-    print(adapter_name)
-    model = model.cuda()
     with torch.no_grad():
         captions, _, _, _, _ = model.generate(
             images.cuda(), input_tokens.cuda(), attn_mask.cuda(), image_attn_mask.cuda(), label.cuda(),
@@ -85,9 +122,8 @@ def get_speaker_response(model, images, input_tokens, attn_mask, image_attn_mask
 @spaces.GPU(duration=20)
 def get_listener_response(model, images, l_input_tokens, l_attn_mask, l_image_attn_mask, index_to_token,
                           s_input_tokens, s_attn_mask, s_image_attn_mask, s_target_mask, s_target_label, image_paths, adapter_name):
-    model.model.set_adapter(adapter_name)
-    print(adapter_name)
-    model = model.cuda()
     with torch.no_grad():
         _, _, joint_log_probs = model.comprehension_side([
             images.cuda(), l_input_tokens.cuda(), l_attn_mask.cuda(), l_image_attn_mask.cuda(), index_to_token,
@@ -119,7 +155,7 @@ def initialize_interaction(model_iteration):
     return new_history
-def progress_game(user_message, model, processor, index_to_token, current_state):
     # First get the game state
     turn = current_state['turn']
     image_role_pairs = current_state['image_role_pairs']
@@ -257,7 +293,6 @@ def create_app():
                 )
         send_btn = gr.Button("Send", interactive=False)
-        model = get_model()
         processor = get_processor()
         index_to_token = get_index_to_token()
@@ -281,7 +316,6 @@ def create_app():
                 gr.update(interactive=not human_listener), gr.update(interactive=human_listener), gr.update(interactive=True), gr.update(interactive=False), current_history
         def send_message(message, radio_choice, current_state):
-            nonlocal model
             nonlocal processor
             nonlocal index_to_token
@@ -292,7 +326,7 @@ def create_app():
             # Regular game progress
             user_output = message if radio_choice is None else radio_choice
-            images, conversation, role, turn, acc_message, current_state = progress_game(user_output, model, processor, index_to_token, current_state)
             human_listener = role == "Listener"
             return [(f"tangram_pngs/{img}", f"Image {i+1}") for i, img in enumerate(images)], "\n".join(conversation), role, turn, \
                 acc_message, gr.update(interactive=not human_listener, value=""), gr.update(interactive=human_listener, value=None), \

 from config_generator import generate_complete_game
 from dataset import get_processor, joint_speaker_input, joint_listener_input, get_index_to_token
+import torch
+import transformers
+from transformers import Idefics2ForConditionalGeneration
+from peft import LoraConfig, get_peft_model
+from joint_inference import IdeficsJointInferenceModel
+# Initialize the model globally
+repo = 'lil-lab/cogen'
+checkpoint = "HuggingFaceM4/idefics2-8b"
+model = Idefics2ForConditionalGeneration.from_pretrained(checkpoint, torch_dtype=torch.bfloat16)
+target_modules=r'(.*(vision_model|modality_projection|perceiver_resampler).*(out_proj|fc1|fc2|down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)|(.*(k_proj|q_proj|v_proj).*$)'
+lora_config = LoraConfig(
+    r=16, lora_alpha=8,
+    lora_dropout=0.1,
+    target_modules=target_modules,
+    init_lora_weights="gaussian"
+)
+model = get_peft_model(model, lora_config, adapter_name="initial")
+model.load_adapter(repo, "initial", revision="r0_full")
+# Add other adapter
+new_targets = set()
+for n, p in model.named_parameters():
+    if 'lora' in n:
+        new_targets.add(n[17:n.find('lora')-1])
+new_targets = list(new_targets)
+lora_config = LoraConfig(
+    r=16, lora_alpha=8,
+    lora_dropout=0.1,
+    target_modules=new_targets,
+    init_lora_weights="gaussian"
+)
+model.add_adapter('final', lora_config)
+model.load_adapter(repo, "final", revision="r3_full")
+model = IdeficsJointInferenceModel(0.5, 0, model=model).cuda()
+model.eval()
 css="""
 .radio-group .wrap {
 @spaces.GPU(duration=20)
 def get_speaker_response(model, images, input_tokens, attn_mask, image_attn_mask, label, image_paths, processor, img_dir, index_to_token, adapter_name):
+    if model.model.active_adapter != adapter_name:
+        model.model.set_adapter(adapter_name)
     with torch.no_grad():
         captions, _, _, _, _ = model.generate(
             images.cuda(), input_tokens.cuda(), attn_mask.cuda(), image_attn_mask.cuda(), label.cuda(),
 @spaces.GPU(duration=20)
 def get_listener_response(model, images, l_input_tokens, l_attn_mask, l_image_attn_mask, index_to_token,
                           s_input_tokens, s_attn_mask, s_image_attn_mask, s_target_mask, s_target_label, image_paths, adapter_name):
+    if model.model.active_adapter != adapter_name:
+        model.model.set_adapter(adapter_name)
     with torch.no_grad():
         _, _, joint_log_probs = model.comprehension_side([
             images.cuda(), l_input_tokens.cuda(), l_attn_mask.cuda(), l_image_attn_mask.cuda(), index_to_token,
     return new_history
+def progress_game(user_message, processor, index_to_token, current_state):
     # First get the game state
     turn = current_state['turn']
     image_role_pairs = current_state['image_role_pairs']
                 )
         send_btn = gr.Button("Send", interactive=False)
         processor = get_processor()
         index_to_token = get_index_to_token()
                 gr.update(interactive=not human_listener), gr.update(interactive=human_listener), gr.update(interactive=True), gr.update(interactive=False), current_history
         def send_message(message, radio_choice, current_state):
             nonlocal processor
             nonlocal index_to_token
             # Regular game progress
             user_output = message if radio_choice is None else radio_choice
+            images, conversation, role, turn, acc_message, current_state = progress_game(user_output, processor, index_to_token, current_state)
             human_listener = role == "Listener"
             return [(f"tangram_pngs/{img}", f"Image {i+1}") for i, img in enumerate(images)], "\n".join(conversation), role, turn, \
                 acc_message, gr.update(interactive=not human_listener, value=""), gr.update(interactive=human_listener, value=None), \