Bria_2.3_ID_preservation

Running on Zero

App Files Files Community

yonishafir commited on Dec 2, 2024

Commit

d929725

1 Parent(s): e834bcc

wip

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +23 -91
requirements.txt +8 -7

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ */__pycache__/
2	+ *.pyc

app.py CHANGED Viewed

@@ -3,20 +3,16 @@ import os
 import random
 import gradio as gr
 import cv2
 import torch
 import numpy as np
 from PIL import Image
 from transformers import CLIPVisionModelWithProjection
-from diffusers.utils import load_image
 from diffusers.models import ControlNetModel
-# from diffusers.image_processor import IPAdapterMaskProcessor
 from insightface.app import FaceAnalysis
-# import sys
-# import glob
-# import os
 import io
 import spaces
@@ -25,8 +21,8 @@ from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPip
 import pandas as pd
 import json
 import requests
-from PIL import Image
 from io import BytesIO
 def resize_img(input_image, max_side=1280, min_side=1024, size=None,
@@ -128,25 +124,6 @@ def calc_emb_cropped(image, app):
     return cropped_face_image
-def process_benchmark_csv(banchmark_csv_path):
-    # Reading the first CSV file into a DataFrame
-    df = pd.read_csv(banchmark_csv_path)
-    # Drop any unnamed columns
-    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
-    # Drop columns with all NaN values
-    df.dropna(axis=1, how='all', inplace=True)
-    # Drop rows with all NaN values
-    df.dropna(axis=0, how='all', inplace=True)
-    df = df.loc[df['High resolution'] == 1]
-    df.reset_index(drop=True, inplace=True)
-    return df
 def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
     if w_bilateral:
         image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
@@ -167,50 +144,45 @@ default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly
 CURRENT_LORA_NAME = None
 # Load face detection and recognition package
-app = FaceAnalysis(name='antelopev2', root='./', providers=['CPUExecutionProvider'])
 app.prepare(ctx_id=0, det_size=(640, 640))
 # download checkpoints
-from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="controlnet/config.json", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="ip-adapter.bin", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="image_encoder/config.json", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/3D_illustration/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Avatar_internlm/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Characters/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Storyboards/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Vangogh_Vanilla/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
-# base_dir = "./instantID_ckpt/checkpoint_174000"
-# face_adapter = f'{base_dir}/pytorch_model.bin'
-# controlnet_path = f'{base_dir}/controlnet'
 face_adapter = f"./checkpoints/ip-adapter.bin"
 controlnet_path = f"./checkpoints/controlnet"
 base_model_path = f'briaai/BRIA-2.3'
 resolution = 1024
 controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
-                                                   torch_dtype=torch.float16)
 controlnet = [controlnet_lnmks, controlnet_canny]
-device = "cuda" if torch.cuda.is_available() else "cpu"
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         f"./checkpoints/image_encoder",
         torch_dtype=torch.float16,
     )
 pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
         base_model_path,
         controlnet=controlnet,
@@ -220,14 +192,13 @@ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
 pipe = pipe.to(device)
-use_native_ip_adapter = True
-pipe.use_native_ip_adapter=use_native_ip_adapter
 pipe.load_ip_adapter_instantid(face_adapter)
 clip_embeds=None
 Loras_dict = {
     "":"",
     "Vangogh_Vanilla": "bold, dramatic brush strokes, vibrant colors, swirling patterns, intense, emotionally charged paintings of",
@@ -239,8 +210,6 @@ Loras_dict = {
 lora_names = Loras_dict.keys()
-lora_base_path = "./checkpoints/LoRAs"
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, 99999999)
@@ -254,13 +223,11 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
     if image_path is None:
         raise gr.Error(f"Cannot find any input face image! Please upload a face image.")
-    # img = np.array(Image.open(image_path))[:,:,::-1]
     img = Image.open(image_path)
-    face_image_orig = img #Image.open(BytesIO(response.content))
     face_image_cropped = calc_emb_cropped(face_image_orig, app)
     face_image = resize_img(face_image_cropped, max_side=resolution, min_side=resolution)
-    # face_image_padded = resize_img(face_image_cropped, max_side=resolution, min_side=resolution, pad_to_max_side=True)
     face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
     face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
     face_emb = face_info['embedding']
@@ -305,19 +272,6 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
     generator = torch.Generator(device=device).manual_seed(seed)
-    # if lora_name != "":
-    #     lora_path = os.path.join(lora_base_path, lora_name, "pytorch_lora_weights.safetensors")
-    #     pipe.load_lora_weights(lora_path)
-    #     pipe.fuse_lora(lora_scale)
-    #     pipe.enable_lora()
-    #     lora_prefix = Loras_dict[lora_name]
-    #     prompt = f"{lora_prefix} {prompt}"
-    #     print("Using LoRA: ", lora_name)
     if lora_name != CURRENT_LORA_NAME:  # Check if LoRA needs to be changed
         if CURRENT_LORA_NAME is not None:  # If a LoRA is already loaded, unload it
             pipe.disable_lora()
@@ -326,12 +280,13 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
             print(f"Unloaded LoRA: {CURRENT_LORA_NAME}")
         if lora_name != "":  # Load the new LoRA if specified
             lora_path = os.path.join(lora_base_path, lora_name, "pytorch_lora_weights.safetensors")
             pipe.load_lora_weights(lora_path)
             pipe.fuse_lora(lora_scale)
             pipe.enable_lora()
-            lora_prefix = Loras_dict[lora_name]
             print(f"Loaded new LoRA: {lora_name}")
@@ -339,7 +294,7 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
         CURRENT_LORA_NAME = lora_name
     if lora_name != "":
-        full_prompt = f"{lora_prefix} {prompt}"
     else:
         full_prompt = prompt
@@ -348,9 +303,9 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
         prompt = full_prompt,
         negative_prompt = default_negative_prompt,
         image_embeds = face_emb,
-        image = [face_kps, canny_img] if canny_scale>0.0 else face_kps,
         controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
-        control_guidance_end = [1.0, 1.0] if canny_scale>0.0 else 1.0,
         ip_adapter_scale = ip_adapter_scale,
         num_inference_steps = num_steps,
         guidance_scale = guidance_scale,
@@ -358,12 +313,8 @@ def generate_image(image_path, prompt, num_steps, guidance_scale, seed, num_imag
         visual_prompt_embds = clip_embeds,
         cross_attention_kwargs = None,
         num_images_per_prompt=num_images,
-    ).images #[0]
-    # if lora_name != "":
-    #     pipe.disable_lora()
-    #     pipe.unfuse_lora()
-    #     pipe.unload_lora_weights()
     gc.collect()
     torch.cuda.empty_cache()
@@ -412,12 +363,7 @@ with gr.Blocks(css=css) as demo:
             lora_name = gr.Dropdown(choices=lora_names, label="LoRA", value="",  info="Select a LoRA name from the list, not selecting any will disable LoRA.")
             submit = gr.Button("Submit", variant="primary")
-            # use_lcm = gr.Checkbox(
-            #     label="Use LCM-LoRA to accelerate sampling", value=False,
-            #     info="Reduces sampling steps significantly, but may decrease quality.",
-            # )
             with gr.Accordion(open=False, label="Advanced Options"):
                 num_steps = gr.Slider(
                     label="Number of sample steps",
@@ -436,7 +382,7 @@ with gr.Blocks(css=css) as demo:
                 num_images = gr.Slider(
                     label="Number of output images",
                     minimum=1,
-                    maximum=3,
                     step=1,
                     value=1,
                 )
@@ -491,22 +437,8 @@ with gr.Blocks(css=css) as demo:
             inputs=[img_file, prompt, num_steps, guidance_scale, seed, num_images, ip_adapter_scale, kps_scale, canny_scale, lora_name, lora_scale],
             outputs=[gallery]
         )
-    # use_lcm.input(
-    #         fn=toggle_lcm_ui,
-    #         inputs=[use_lcm],
-    #         outputs=[num_steps, guidance_scale],
-    #         queue=False,
-    #     )
-    # gr.Examples(
-    #     examples=get_example(),
-    #     inputs=[img_file],
-    #     run_on_click=True,
-    #     fn=run_example,
-    #     outputs=[gallery],
-    # )
     gr.Markdown(Footer)
 demo.launch()

 import random
 import gradio as gr
 import cv2
 import torch
 import numpy as np
 from PIL import Image
 from transformers import CLIPVisionModelWithProjection
 from diffusers.models import ControlNetModel
 from insightface.app import FaceAnalysis
 import io
 import spaces
 import pandas as pd
 import json
 import requests
 from io import BytesIO
+from huggingface_hub import hf_hub_download
 def resize_img(input_image, max_side=1280, min_side=1024, size=None,
     return cropped_face_image
 def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
     if w_bilateral:
         image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
 CURRENT_LORA_NAME = None
 # Load face detection and recognition package
+app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
 app.prepare(ctx_id=0, det_size=(640, 640))
 # download checkpoints
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="controlnet/config.json", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="ip-adapter.bin", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="image_encoder/config.json", local_dir="./checkpoints")
+# Download Lora weights
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/3D_illustration/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Avatar_internlm/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Characters/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Storyboards/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="briaai/ID_preservation_2.3", filename="LoRAs/Vangogh_Vanilla/pytorch_lora_weights.safetensors", local_dir="./checkpoints")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ckpts paths
 face_adapter = f"./checkpoints/ip-adapter.bin"
 controlnet_path = f"./checkpoints/controlnet"
+lora_base_path = "./checkpoints/LoRAs"
 base_model_path = f'briaai/BRIA-2.3'
 resolution = 1024
+# Load ControlNet models
 controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
+                                                torch_dtype=torch.float16)
 controlnet = [controlnet_lnmks, controlnet_canny]
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         f"./checkpoints/image_encoder",
         torch_dtype=torch.float16,
     )
 pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
         base_model_path,
         controlnet=controlnet,
 pipe = pipe.to(device)
+# use_native_ip_adapter = True
+pipe.use_native_ip_adapter=True
 pipe.load_ip_adapter_instantid(face_adapter)
 clip_embeds=None
 Loras_dict = {
     "":"",
     "Vangogh_Vanilla": "bold, dramatic brush strokes, vibrant colors, swirling patterns, intense, emotionally charged paintings of",
 lora_names = Loras_dict.keys()
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, 99999999)
     if image_path is None:
         raise gr.Error(f"Cannot find any input face image! Please upload a face image.")
     img = Image.open(image_path)
+    face_image_orig = img
     face_image_cropped = calc_emb_cropped(face_image_orig, app)
     face_image = resize_img(face_image_cropped, max_side=resolution, min_side=resolution)
     face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
     face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
     face_emb = face_info['embedding']
     generator = torch.Generator(device=device).manual_seed(seed)
     if lora_name != CURRENT_LORA_NAME:  # Check if LoRA needs to be changed
         if CURRENT_LORA_NAME is not None:  # If a LoRA is already loaded, unload it
             pipe.disable_lora()
             print(f"Unloaded LoRA: {CURRENT_LORA_NAME}")
         if lora_name != "":  # Load the new LoRA if specified
+            # pipe.enable_model_cpu_offload()
             lora_path = os.path.join(lora_base_path, lora_name, "pytorch_lora_weights.safetensors")
             pipe.load_lora_weights(lora_path)
             pipe.fuse_lora(lora_scale)
             pipe.enable_lora()
+            # lora_prefix = Loras_dict[lora_name]
             print(f"Loaded new LoRA: {lora_name}")
         CURRENT_LORA_NAME = lora_name
     if lora_name != "":
+        full_prompt = f"{Loras_dict[lora_name]} + " " + {prompt}"
     else:
         full_prompt = prompt
         prompt = full_prompt,
         negative_prompt = default_negative_prompt,
         image_embeds = face_emb,
+        image = [face_kps, canny_img] if canny_scale > 0.0 else face_kps,
         controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
+        # control_guidance_end = [1.0, 1.0] if canny_scale>0.0 else 1.0,
         ip_adapter_scale = ip_adapter_scale,
         num_inference_steps = num_steps,
         guidance_scale = guidance_scale,
         visual_prompt_embds = clip_embeds,
         cross_attention_kwargs = None,
         num_images_per_prompt=num_images,
+    ).images
     gc.collect()
     torch.cuda.empty_cache()
             lora_name = gr.Dropdown(choices=lora_names, label="LoRA", value="",  info="Select a LoRA name from the list, not selecting any will disable LoRA.")
             submit = gr.Button("Submit", variant="primary")
             with gr.Accordion(open=False, label="Advanced Options"):
                 num_steps = gr.Slider(
                     label="Number of sample steps",
                 num_images = gr.Slider(
                     label="Number of output images",
                     minimum=1,
+                    maximum=2,
                     step=1,
                     value=1,
                 )
             inputs=[img_file, prompt, num_steps, guidance_scale, seed, num_images, ip_adapter_scale, kps_scale, canny_scale, lora_name, lora_scale],
             outputs=[gallery]
         )
     gr.Markdown(Footer)
+# demo.launch(server_port=7865)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,15 +1,16 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
-torch
-torchvision
-transformers
-accelerate
 ftfy
 numpy
 matplotlib
 uuid
 opencv-python
-diffusers==0.26.0
 spaces
-insightface
-onnxruntime
 peft==0.12.0

 --extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.4.0
+torchvision==0.19.0
+transformers==4.43.4
+accelerate==0.33.0
 ftfy
 numpy
 matplotlib
 uuid
 opencv-python
+diffusers==0.29.2
 spaces
+insightface==0.7.3
+onnx==1.16.2
+onnxruntime==1.18.1
 peft==0.12.0