Spaces:

BharathK333
/

DOOMGAN

Sleeping

App Files Files Community

BharathK333 commited on Jul 17

Commit

24c8da0

verified ·

1 Parent(s): cf3edca

Upload 29 files

Browse files

Files changed (30) hide show

.gitattributes +7 -0
app.py +130 -0
apps/__init__.py +0 -0
apps/__pycache__/__init__.cpython-310.pyc +0 -0
apps/__pycache__/classical_morpher.cpython-310.pyc +0 -0
apps/__pycache__/gan_morpher.cpython-310.pyc +0 -0
apps/__pycache__/utils.cpython-310.pyc +0 -0
apps/classical_morpher.py +107 -0
apps/gan_morpher.py +58 -0
apps/utils.py +61 -0
assets/1144_r_1.png +3 -0
assets/1147_r_1.png +3 -0
assets/1162_r_9.png +3 -0
assets/1163_r_17.png +3 -0
assets/1172_l_1.png +3 -0
assets/1177_l_1.png +3 -0
assets/2517_r_9.png +3 -0
assets/3243_l_1.png +0 -0
config/__init__.py +0 -0
config/config.yaml +62 -0
models/ResNet_Model.py +51 -0
models/__init__.py +22 -0
models/__pycache__/ResNet_Model.cpython-310.pyc +0 -0
models/__pycache__/__init__.cpython-310.pyc +0 -0
models/__pycache__/landmark_predictor.cpython-310.pyc +0 -0
models/__pycache__/models.cpython-310.pyc +0 -0
models/__pycache__/test_models.cpython-310.pyc +0 -0
models/landmark_predictor.py +24 -0
models/models.py +132 -0
requirements.txt +10 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/1144_r_1.png filter=lfs diff=lfs merge=lfs -text
+assets/1147_r_1.png filter=lfs diff=lfs merge=lfs -text
+assets/1162_r_9.png filter=lfs diff=lfs merge=lfs -text
+assets/1163_r_17.png filter=lfs diff=lfs merge=lfs -text
+assets/1172_l_1.png filter=lfs diff=lfs merge=lfs -text
+assets/1177_l_1.png filter=lfs diff=lfs merge=lfs -text
+assets/2517_r_9.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+import torch
+import yaml
+from huggingface_hub import hf_hub_download
+from collections import OrderedDict
+# --- Local project imports ---
+from apps.gan_morpher import morph_images_with_gan
+from models.models import Generator, Encoder, LandmarkEncoder
+from models.landmark_predictor import OcularLMGenerator
+# --- 1. Define Constants and Configuration ---
+MODEL_REPO_ID = "BharathK333/DOOMGAN"
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+EPOCH = 450 # The epoch for the models we want to load
+print("--- Initializing Gradio App: Downloading and Loading Models ---")
+# --- 2. Function to Load Models from Hugging Face Hub ---
+@gr.cache_resource() # This decorator caches the models, so they are loaded only once.
+def load_models_from_hub():
+    """
+    Downloads all necessary files from the Hugging Face Hub and loads the models.
+    """
+    # --- Download Model Files from the Model Repo ---
+    g_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=f"G_{EPOCH}.pth")
+    e_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=f"E_{EPOCH}.pth")
+    le_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=f"LE_{EPOCH}.pth")
+    lp_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="landmark_predictor.pth")
+    with open('config/config.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    model_cfg = config['model']
+    data_cfg = config['data']
+    # Helper function to remove 'module.' prefix from state dict keys
+    def remove_module_prefix(state_dict):
+        new_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            name = k[7:] if k.startswith('module.') else k
+            new_state_dict[name] = v
+        return new_state_dict
+    # --- Initialize and Load the specific GAN Models (G, E, LE) ---
+    gan_models_init = {
+        'netG': Generator(nz=model_cfg['nz'], ngf=model_cfg['ngf'], nc=data_cfg['nc'], landmark_feature_size=model_cfg['landmark_feature_size']),
+        'netE': Encoder(nc=data_cfg['nc'], ndf=model_cfg['ndf'], nz=model_cfg['nz'], num_landmarks=model_cfg['num_landmarks']),
+        'landmark_encoder': LandmarkEncoder(input_dim=model_cfg['num_landmarks'] * 2, output_dim=model_cfg['landmark_feature_size'])
+    }
+    model_paths = {'netG': g_path, 'netE': e_path, 'landmark_encoder': le_path}
+    loaded_models = {}
+    for name, model in gan_models_init.items():
+        print(f"Loading {name} model...")
+        state_dict = torch.load(model_paths[name], map_location=DEVICE)
+        state_dict = remove_module_prefix(state_dict)
+        model.load_state_dict(state_dict)
+        model.to(DEVICE).eval()
+        loaded_models[name] = model
+    # --- Initialize and Load Landmark Predictor ---
+    print("Loading Landmark Predictor model...")
+    landmark_predictor = OcularLMGenerator().to(DEVICE)
+    state_dict_lp = torch.load(lp_path, map_location=DEVICE)
+    state_dict_lp = remove_module_prefix(state_dict_lp)
+    landmark_predictor.load_state_dict(state_dict_lp)
+    landmark_predictor.eval()
+    loaded_models['landmark_predictor'] = landmark_predictor
+    print(f"--- All models loaded successfully on {DEVICE} ---")
+    return config, loaded_models
+# Load everything when the app starts
+config, models = load_models_from_hub()
+# --- 3. Define the core processing function for Gradio ---
+def run_gan_morph(image1, image2, alpha):
+    if image1 is None or image2 is None:
+        raise gr.Error("Please upload both source images to generate a morph.")
+    print(f"Performing GAN morph with alpha={alpha}...")
+    morphed_image_numpy = morph_images_with_gan(image1, image2, config, DEVICE, models, alpha)
+    print("GAN morph complete.")
+    return morphed_image_numpy
+# --- 4. Build the Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Soft(), title="DOOMGAN Morphing") as demo:
+    gr.Markdown(
+        """
+        # DOOMGAN: High-Fidelity Ocular Image Morphing
+        An interactive demonstration of the IJCB-accepted **DOOMGAN** project.
+        Upload two ocular images, or use the examples below, and use the slider to morph between them.
+        """
+    )
+    with gr.Row():
+        img1 = gr.Image(type="pil", label="Source Image 1")
+        img2 = gr.Image(type="pil", label="Source Image 2")
+    alpha_slider = gr.Slider(
+        minimum=0.0, maximum=1.0, value=0.5, step=0.05,
+        label="Interpolation Factor (Image 1 <-> Image 2)",
+        info="Slide towards 0 to resemble Image 1, or towards 1 to resemble Image 2."
+    )
+    output_img = gr.Image(type="pil", label="Morphed Result")
+    run_button = gr.Button("Generate Morph", variant="primary")
+    gr.Examples(
+        examples=[
+            ["assets/1144_r_1.png", "assets/1147_r_1.png", 0.5],
+            ["assets/1162_r_9.png", "assets/1163_r_17.png", 0.3],
+            ["assets/1172_l_1.png", "assets/1177_l_1.png", 0.7],
+            ["assets/2517_r_9.png", "assets/3243_l_1.png", 0.5],
+        ],
+        inputs=[img1, img2, alpha_slider],
+        outputs=output_img,
+        fn=run_gan_morph,
+        cache_examples=True # Caches the results for instant loading
+    )
+    # --- 5. Connect the UI components to the function ---
+    run_button.click(
+        fn=run_gan_morph,
+        inputs=[img1, img2, alpha_slider],
+        outputs=[output_img],
+        api_name="morph"
+    )

apps/__init__.py ADDED Viewed

File without changes

apps/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (136 Bytes). View file

apps/__pycache__/classical_morpher.cpython-310.pyc ADDED Viewed

Binary file (4.07 kB). View file

apps/__pycache__/gan_morpher.cpython-310.pyc ADDED Viewed

Binary file (2.25 kB). View file

apps/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (2.17 kB). View file

apps/classical_morpher.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import cv2
+import numpy as np
+import torch
+from torchvision.transforms import ToTensor, Resize, Compose, Normalize
+def predict_landmarks_for_classical(image, landmark_predictor_model, device):
+    """Predicts landmarks and returns them as an unnormalized tensor for OpenCV."""
+    transform = Compose([
+        Resize((256, 256)),
+        ToTensor(),
+        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+    image_transformed = transform(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        landmarks = landmark_predictor_model(image_transformed).squeeze(0).cpu()
+        # Use 38 landmarks (19 pairs) to match the original LM-1 behavior
+        landmarks = landmarks[:38]
+        landmarks = landmarks.view(-1, 2)
+    return landmarks
+def _extract_index_nparray(nparray):
+    """Helper function to extract index from numpy where clause."""
+    return nparray[0][0] if len(nparray[0]) > 0 else None
+def _tensor_to_int_array(tensor):
+    """Converts a landmark tensor to a list of integer tuples."""
+    return [(int(x[0]), int(x[1])) for x in tensor.numpy()]
+def ocular_morph_classical(img1_pil, img2_pil, landmarks1_tensor, landmarks2_tensor):
+    """Performs landmark-based morphing using Delaunay triangulation and seamless cloning."""
+    img1 = cv2.cvtColor(np.array(img1_pil), cv2.COLOR_RGB2BGR)
+    img2 = cv2.cvtColor(np.array(img2_pil), cv2.COLOR_RGB2BGR)
+    points1 = _tensor_to_int_array(landmarks1_tensor)
+    points2 = _tensor_to_int_array(landmarks2_tensor)
+    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+    # --- FIX: Define img2_gray, which was previously missing. ---
+    img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+    mask = np.zeros_like(img1_gray)
+    points_np = np.array(points1, np.int32)
+    convexhull = cv2.convexHull(points_np)
+    cv2.fillConvexPoly(mask, convexhull, 255)
+    rect = cv2.boundingRect(convexhull)
+    subdiv = cv2.Subdiv2D(rect)
+    subdiv.insert(points1)
+    triangles = subdiv.getTriangleList()
+    triangles = np.array(triangles, dtype=np.int32)
+    indexes_triangles = []
+    for t in triangles:
+        pt1, pt2, pt3 = (t[0], t[1]), (t[2], t[3]), (t[4], t[5])
+        index_pt1 = _extract_index_nparray(np.where((points_np == pt1).all(axis=1)))
+        index_pt2 = _extract_index_nparray(np.where((points_np == pt2).all(axis=1)))
+        index_pt3 = _extract_index_nparray(np.where((points_np == pt3).all(axis=1)))
+        if all(idx is not None for idx in [index_pt1, index_pt2, index_pt3]):
+            indexes_triangles.append([index_pt1, index_pt2, index_pt3])
+    img2_new_face = np.zeros_like(img2)
+    for triangle_index in indexes_triangles:
+        tr1_pt1, tr1_pt2, tr1_pt3 = points1[triangle_index[0]], points1[triangle_index[1]], points1[triangle_index[2]]
+        tr2_pt1, tr2_pt2, tr2_pt3 = points2[triangle_index[0]], points2[triangle_index[1]], points2[triangle_index[2]]
+        triangle1 = np.array([tr1_pt1, tr1_pt2, tr1_pt3], np.int32)
+        triangle2 = np.array([tr2_pt1, tr2_pt2, tr2_pt3], np.int32)
+        rect1 = cv2.boundingRect(triangle1)
+        (x1, y1, w1, h1) = rect1
+        cropped_triangle = img1[y1: y1 + h1, x1: x1 + w1]
+        points_rel1 = np.array([[tr1_pt1[0] - x1, tr1_pt1[1] - y1], [tr1_pt2[0] - x1, tr1_pt2[1] - y1], [tr1_pt3[0] - x1, tr1_pt3[1] - y1]], np.float32)
+        rect2 = cv2.boundingRect(triangle2)
+        (x2, y2, w2, h2) = rect2
+        points_rel2 = np.array([[tr2_pt1[0] - x2, tr2_pt1[1] - y2], [tr2_pt2[0] - x2, tr2_pt2[1] - y2], [tr2_pt3[0] - x2, tr2_pt3[1] - y2]], np.float32)
+        M = cv2.getAffineTransform(points_rel1, points_rel2)
+        warped_triangle = cv2.warpAffine(cropped_triangle, M, (w2, h2))
+        cropped_tr2_mask = np.zeros((h2, w2), np.uint8)
+        cv2.fillConvexPoly(cropped_tr2_mask, np.int32(points_rel2), 255)
+        warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=cropped_tr2_mask)
+        img2_new_face_rect_area = img2_new_face[y2: y2 + h2, x2: x2 + w2]
+        img2_new_face_rect_area_gray = cv2.cvtColor(img2_new_face_rect_area, cv2.COLOR_BGR2GRAY)
+        _, mask_triangles_designed = cv2.threshold(img2_new_face_rect_area_gray, 1, 255, cv2.THRESH_BINARY_INV)
+        warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle, mask=mask_triangles_designed)
+        img2_new_face_rect_area = cv2.add(img2_new_face_rect_area, warped_triangle)
+        img2_new_face[y2: y2 + h2, x2: x2 + w2] = img2_new_face_rect_area
+    img2_face_mask = np.zeros_like(img2_gray)
+    convexhull2 = cv2.convexHull(np.array(points2, np.int32))
+    img2_head_mask = cv2.fillConvexPoly(img2_face_mask, convexhull2, 255)
+    (x, y, w, h) = cv2.boundingRect(convexhull2)
+    center_face2 = (int(x + w / 2), int(y + h / 2))
+    seamlessclone = cv2.seamlessClone(img2_new_face, img2, img2_head_mask, center_face2, cv2.NORMAL_CLONE)
+    return cv2.cvtColor(seamlessclone, cv2.COLOR_BGR2RGB)

apps/gan_morpher.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+from torchvision.transforms import ToTensor, Resize, Compose, Normalize
+from utils import create_landmark_heatmaps
+def predict_landmarks_for_gan(image, landmark_predictor_model, device):
+    """Predicts landmarks and formats them specifically for the GAN pipeline."""
+    transform = Compose([
+        Resize((256, 256)),
+        ToTensor(),
+        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+    image_transformed = transform(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        landmarks = landmark_predictor_model(image_transformed).squeeze(0).cpu()
+        landmarks = landmarks[:38] # Corresponds to 19 landmarks (x,y)
+        landmarks = landmarks.view(-1, 2)
+        # Normalize to [0, 1] range for heatmap generation
+        landmarks[:, 0] /= 256.0
+        landmarks[:, 1] /= 256.0
+        landmarks = landmarks.flatten()
+    return landmarks.unsqueeze(0) # Return with a batch dimension
+def process_image_for_gan(image, config, device, models):
+    """Processes a single image to get its latent vector (z) and landmark features (lf)."""
+    image_tensor = Compose([
+        Resize((config['data']['image_size'], config['data']['image_size'])),
+        ToTensor(),
+        Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+    ])(image).unsqueeze(0).to(device)
+    landmarks = predict_landmarks_for_gan(image, models['landmark_predictor'], device).to(device)
+    heatmap = create_landmark_heatmaps(landmarks, image_size=config['data']['image_size']).to(device)
+    with torch.no_grad():
+        landmark_features = models['landmark_encoder'](landmarks)
+        z = models['netE'](image_tensor, heatmap)
+    return z, landmark_features
+def morph_images_with_gan(image1, image2, config, device, models, alpha=0.5):
+    """Generates a morphed image using the GAN with a given interpolation factor."""
+    z1, lf1 = process_image_for_gan(image1, config, device, models)
+    z2, lf2 = process_image_for_gan(image2, config, device, models)
+    # Interpolate in both latent and landmark feature spaces
+    z_morph = (1 - alpha) * z1 + alpha * z2
+    lf_morph = (1 - alpha) * lf1 + alpha * lf2
+    with torch.no_grad():
+        morphed_image_tensor = models['netG'](z_morph, lf_morph)
+    # Denormalize from [-1, 1] to [0, 1] for display
+    morphed_image = (morphed_image_tensor * 0.5 + 0.5).clamp(0, 1)
+    morphed_image_numpy = morphed_image.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    return morphed_image_numpy

apps/utils.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import torch
+from collections import OrderedDict
+from PIL import Image
+# Local project imports
+from models import Generator, Encoder, LandmarkEncoder
+from models.landmark_predictor import OcularLMGenerator
+def remove_module_prefix(state_dict):
+    """Removes the 'module.' prefix from state dict keys if it exists."""
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k[7:] if k.startswith('module.') else k
+        new_state_dict[name] = v
+    return new_state_dict
+def load_all_models(config, epoch, device):
+    """Loads and initializes all models needed for the Streamlit app."""
+    model_cfg = config['model']
+    data_cfg = config['data']
+    paths_cfg = config['paths']
+    # --- 1. Load the main GAN models (G, E, LE) ---
+    gan_models = {
+        'G': Generator(nz=model_cfg['nz'], ngf=model_cfg['ngf'], nc=data_cfg['nc'], landmark_feature_size=model_cfg['landmark_feature_size']),
+        'E': Encoder(nc=data_cfg['nc'], ndf=model_cfg['ndf'], nz=model_cfg['nz'], num_landmarks=model_cfg['num_landmarks']),
+        'LE': LandmarkEncoder(input_dim=model_cfg['num_landmarks'] * 2, output_dim=model_cfg['landmark_feature_size'])
+    }
+    for name, model in gan_models.items():
+        model_path = os.path.join(paths_cfg['outputs'][name], f'{name.lower()}_epoch_{epoch}.pth')
+        print(f"Loading {name} model from: {model_path}")
+        state_dict = torch.load(model_path, map_location=device)
+        state_dict = remove_module_prefix(state_dict)
+        model.load_state_dict(state_dict)
+        model.to(device).eval()
+    # --- 2. Load the separate Landmark Predictor model ---
+    lp_path = paths_cfg['inputs']['landmark_predictor_model']
+    print(f"Loading Landmark Predictor from: {lp_path}")
+    landmark_predictor = OcularLMGenerator().to(device)
+    state_dict_lp = torch.load(lp_path, map_location=device)
+    state_dict_lp = remove_module_prefix(state_dict_lp)
+    landmark_predictor.load_state_dict(state_dict_lp)
+    landmark_predictor.eval()
+    # Return all models in a dictionary for easy access
+    return {
+        'netG': gan_models['G'],
+        'netE': gan_models['E'],
+        'landmark_encoder': gan_models['LE'],
+        'landmark_predictor': landmark_predictor
+    }
+def load_image(file):
+    """Safely loads an image file into a PIL Image object."""
+    try:
+        image = Image.open(file).convert('RGB')
+        return image
+    except Exception as e:
+        raise ValueError(f"Error loading image: {str(e)}")

assets/1144_r_1.png ADDED Viewed

Git LFS Details

SHA256: 793102c6c55225ca3af28bb77cf6526ac29f2ccc23a4b2848ab3db5a17f46e72
Pointer size: 131 Bytes
Size of remote file: 107 kB

assets/1147_r_1.png ADDED Viewed

Git LFS Details

SHA256: 2e4b7372287ade634ac68f44576d94983a7eead7dd82a59b61bd2b2159c72671
Pointer size: 131 Bytes
Size of remote file: 106 kB

assets/1162_r_9.png ADDED Viewed

Git LFS Details

SHA256: b150989ef4391a4c03b14a234c16fce1cb27ced6154f774653ff840726982314
Pointer size: 131 Bytes
Size of remote file: 105 kB

assets/1163_r_17.png ADDED Viewed

Git LFS Details

SHA256: 2b5e08fd6ff801759897de0887943ffe3766b8549c4059b140fef5ca06ed31c8
Pointer size: 131 Bytes
Size of remote file: 105 kB

assets/1172_l_1.png ADDED Viewed

Git LFS Details

SHA256: d51f9b73b6e048f15089f375251a8b5599ec1146cd785bae05cfc3832f4cca4f
Pointer size: 131 Bytes
Size of remote file: 104 kB

assets/1177_l_1.png ADDED Viewed

Git LFS Details

SHA256: 0da794a833e1020711fd2264948934203924dcdcfae7d3e507801956bdada2d9
Pointer size: 131 Bytes
Size of remote file: 108 kB

assets/2517_r_9.png ADDED Viewed

Git LFS Details

SHA256: 358494196fca72f3c4e6b6ac62afa8232a989d1078c7239c61604d126839dc44
Pointer size: 131 Bytes
Size of remote file: 103 kB

assets/3243_l_1.png ADDED Viewed

config/__init__.py ADDED Viewed

File without changes

config/config.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+# config/config.yaml
+# --- General Settings ---
+project_name: "OcularMorph-DOOMGAN"
+manual_seed: 42
+ngpu: 1
+use_deterministic_algorithms: true
+device: "cuda:1"
+# --- Data Settings ---
+data:
+  image_root: "data/filtered_output"
+  landmark_json_path: "data/landmarks_GAN.json"
+  image_size: 256
+  nc: 3
+  workers: 4
+# --- Model Hyperparameters ---
+model:
+  nz: 200
+  ngf: 64
+  ndf: 64
+  num_landmarks: 19
+  landmark_feature_size: 128
+# --- Training Hyperparameters ---
+training:
+  num_epochs: 501
+  batch_size: 64
+  optimizer:
+    lr_g: 0.0002
+    lr_d: 0.00001
+    lr_e: 0.0002
+    lr_le: 0.0001
+    beta1: 0.5
+    beta2: 0.999
+    weight_decay: 0.00001
+  scheduler:
+    gamma_d: 0.9998
+    gamma_g: 0.9998
+    gamma_e: 0.9998
+    gamma_le: 0.9998
+  loss_weights:
+    gp: 10.0
+    initial_dynamic:
+      base: 50.0
+      ms_ssim: 30.0
+      perceptual: 50.0
+      reconstruction: 10.0
+      identity: 50.0
+      identity_diff: 40.0
+# --- Paths ---
+paths:
+  inputs:
+    arcface_model: "trained_models/resnet50_arcface.pth"
+    landmark_predictor_model: "trained_models/Ocular_LM_Generator.pth"
+  outputs:
+    G: "generator_models"
+    D: "discriminator_models"
+    E: "encoder_models"
+    LE: "landmark_encoder_models"

models/ResNet_Model.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+class ResNet50_ArcFace(nn.Module):
+    """
+    ResNet-50 model modified for ArcFace loss.
+    Outputs embeddings that can be used for feature extraction.
+    """
+    def __init__(self, embedding_size=512, pretrained=True):
+        super(ResNet50_ArcFace, self).__init__()
+        self.embedding_size = embedding_size
+        # Load a pre-trained ResNet-50 model
+        self.backbone = models.resnet50(pretrained=pretrained)
+        # Modify the final fully connected layer
+        # Replace the last fully connected layer with a linear layer to get embeddings
+        in_features = self.backbone.fc.in_features
+        self.backbone.fc = nn.Linear(in_features, self.embedding_size)
+        # Normalize the embedding vectors
+        self.l2_norm = nn.functional.normalize
+    def forward(self, x):
+        x = self.backbone(x)
+        # Normalize embeddings to have unit length
+        x = self.l2_norm(x, p=2, dim=1)
+        return x
+# Example usage
+if __name__ == "__main__":
+    # Load config parameters if needed
+    import yaml
+    with open('config.yml', 'r') as f:
+        config = yaml.safe_load(f)
+    device = torch.device(config['device'] if torch.cuda.is_available() else 'cpu')
+    model = ResNet50_ArcFace(
+        embedding_size=config['embedding_size'],
+        pretrained=True
+    ).to(device)
+    # Print model architecture
+    print(model)
+    # Test with a random input
+    dummy_input = torch.randn(1, 3, config['image_height'], config['image_width']).to(device)
+    embeddings = model(dummy_input)
+    print("Embeddings shape:", embeddings.shape)

models/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# All Required Model imports
+from .models import (
+    weights_init,
+    SelfAttention,
+    ResidualBlock,
+    Encoder,
+    Generator,
+    Discriminator,
+    LandmarkEncoder
+)
+# Import for the ArcFace model
+try:
+    from .ResNet_Model import ResNet50_ArcFace
+except ImportError:
+    ResNet50_ArcFace = None
+# Import the LM Predictor model for App
+try:
+    from .landmark_predictor import OcularLMGenerator
+except ImportError:
+    OcularLMGenerator = None

models/__pycache__/ResNet_Model.cpython-310.pyc ADDED Viewed

Binary file (1.57 kB). View file

models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (515 Bytes). View file

models/__pycache__/landmark_predictor.cpython-310.pyc ADDED Viewed

Binary file (1.17 kB). View file

models/__pycache__/models.cpython-310.pyc ADDED Viewed

Binary file (6.28 kB). View file

models/__pycache__/test_models.cpython-310.pyc ADDED Viewed

Binary file (1.58 kB). View file

models/landmark_predictor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+class OcularLMGenerator(nn.Module):
+    def __init__(self):
+        super(OcularLMGenerator, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
+        self.fc1 = nn.Linear(64 * 64 * 64, 500)
+        self.fc2 = nn.Linear(500, 66)  # Output the maximum number of landmarks
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(-1, 64 * 64 * 64)
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+if __name__ == "__main__":
+    model = OcularLMGenerator()
+    print(model)

models/models.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import torch
+import torch.nn as nn
+def weights_init(m):
+    """
+    Applies custom weights initialization to a model's modules.
+    - Conv layers: He (Kaiming) normal initialization.
+    - InstanceNorm layers: Normal distribution for weights, constant for biases.
+    """
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        # Use a fan-in He initialization for Conv layers
+        nn.init.kaiming_normal_(m.weight.data, a=0.2, mode='fan_in')
+    elif classname.find('InstanceNorm') != -1:
+        if m.weight is not None:
+            nn.init.normal_(m.weight.data, 1.0, 0.02)
+        if m.bias is not None:
+            nn.init.constant_(m.bias.data, 0)
+class SelfAttention(nn.Module):
+    def __init__(self, in_channels):
+        super(SelfAttention, self).__init__()
+        self.query = nn.Conv2d(in_channels, in_channels // 8, 1)
+        self.key = nn.Conv2d(in_channels, in_channels // 8, 1)
+        self.value = nn.Conv2d(in_channels, in_channels, 1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x):
+        B, C, W, H = x.size()
+        proj_query = self.query(x).view(B, -1, W * H).permute(0, 2, 1)
+        proj_key = self.key(x).view(B, -1, W * H)
+        attention = self.softmax(torch.bmm(proj_query, proj_key))
+        proj_value = self.value(x).view(B, -1, W * H)
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(B, C, W, H)
+        return self.gamma * out + x
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels):
+        super(ResidualBlock, self).__init__()
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, 3, padding=1, bias=False),
+            nn.InstanceNorm2d(in_channels),
+            nn.ReLU(),
+            nn.Conv2d(in_channels, in_channels, 3, padding=1, bias=False),
+            nn.InstanceNorm2d(in_channels)
+        )
+    def forward(self, x):
+        return x + self.block(x)
+class Encoder(nn.Module):
+    def __init__(self, nc=3, ndf=64, nz=200, num_landmarks=19):
+        super(Encoder, self).__init__()
+        in_channels = nc + num_landmarks
+        self.model = nn.Sequential(
+            nn.Conv2d(in_channels, ndf, 4, 2, 1), nn.LeakyReLU(0.2),
+            nn.Conv2d(ndf, ndf*2, 4, 2, 1, bias=False), nn.InstanceNorm2d(ndf*2), nn.LeakyReLU(0.2),
+            nn.Conv2d(ndf*2, ndf*4, 4, 2, 1, bias=False), nn.InstanceNorm2d(ndf*4), nn.LeakyReLU(0.2),
+            nn.Conv2d(ndf*4, ndf*8, 4, 2, 1, bias=False), nn.InstanceNorm2d(ndf*8), nn.LeakyReLU(0.2),
+            ResidualBlock(ndf*8), SelfAttention(ndf*8),
+            nn.Conv2d(ndf*8, ndf*16, 4, 2, 1, bias=False), nn.InstanceNorm2d(ndf*16), nn.LeakyReLU(0.2),
+            nn.Conv2d(ndf*16, ndf*16, 4, 2, 1), nn.LeakyReLU(0.2),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(ndf * 16, nz, 1)
+        )
+    def forward(self, img, heatmaps):
+        return self.model(torch.cat([img, heatmaps], 1)).view(img.size(0), -1)
+class Generator(nn.Module):
+    def __init__(self, nz=200, ngf=64, nc=3, landmark_feature_size=128):
+        super(Generator, self).__init__()
+        self.ngf = ngf
+        self.fc = nn.Sequential(
+             nn.Linear(nz + landmark_feature_size, ngf * 32 * 4 * 4),
+             nn.ReLU() # Removed inplace=True
+        )
+        def block(in_c, out_c):
+            return [nn.ConvTranspose2d(in_c, out_c, 4, 2, 1, bias=False), nn.InstanceNorm2d(out_c), nn.ReLU()]
+        self.main = nn.Sequential(
+            ResidualBlock(ngf * 32), # 4x4
+            *block(ngf*32, ngf*16), # 8x8
+            *block(ngf*16, ngf*8), # 16x16
+            SelfAttention(ngf * 8),
+            *block(ngf*8, ngf*4), # 32x32
+            *block(ngf*4, ngf*2), # 64x64
+            *block(ngf*2, ngf), # 128x128
+            nn.ConvTranspose2d(ngf, nc, 4, 2, 1), nn.Tanh() # 256x256
+        )
+    def forward(self, z, landmark_features):
+        x = self.fc(torch.cat([z, landmark_features], 1)).view(-1, self.ngf*32, 4, 4)
+        return self.main(x)
+class Discriminator(nn.Module):
+    def __init__(self, nc=3, ndf=64, num_landmarks=19):
+        super(Discriminator, self).__init__()
+        in_channels = nc + num_landmarks
+        def block(in_c, out_c, norm=True, dropout=True):
+            layers = [nn.utils.spectral_norm(nn.Conv2d(in_c, out_c, 4, 2, 1)) if norm else nn.Conv2d(in_c, out_c, 4, 2, 1)]
+            layers.append(nn.LeakyReLU(0.2))
+            if dropout: layers.append(nn.Dropout(0.5))
+            layers.append(ResidualBlock(out_c))
+            return layers
+        self.model = nn.ModuleList([
+            nn.Sequential(*block(in_channels, ndf, norm=False, dropout=False)), # 128
+            nn.Sequential(*block(ndf, ndf * 2)), # 64
+            nn.Sequential(*block(ndf*2, ndf * 4)), # 32
+            nn.Sequential(SelfAttention(ndf*4), *block(ndf*4, ndf * 8)), # 16
+            nn.Sequential(*block(ndf*8, ndf * 16)), # 8
+        ])
+        self.out_layers = nn.ModuleList([
+            nn.utils.spectral_norm(nn.Conv2d(ndf*2, 1, 3, 1, 1)),
+            nn.utils.spectral_norm(nn.Conv2d(ndf*4, 1, 3, 1, 1)),
+            nn.utils.spectral_norm(nn.Conv2d(ndf*8, 1, 3, 1, 1)),
+            nn.utils.spectral_norm(nn.Conv2d(ndf*16, 1, 3, 1, 1)),
+        ])
+    def forward(self, img, heatmaps):
+        x = torch.cat([img, heatmaps], 1)
+        outputs = []
+        for i, layer in enumerate(self.model):
+            x = layer(x)
+            if i > 0: outputs.append(self.out_layers[i-1](x))
+        return outputs
+class LandmarkEncoder(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(LandmarkEncoder, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Linear(input_dim, 128), nn.LeakyReLU(0.2),
+            nn.Linear(128, output_dim), nn.LeakyReLU(0.2)
+        )
+    def forward(self, landmarks):
+        return self.encoder(landmarks)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch==2.2.1+cu118
+torchvision==0.17.1+cu118
+gradio
+huggingface_hub
+pyyaml
+numpy
+Pillow
+scipy
+opencv-python-headless