Pierre Fernandez
added encoding and decoding
9e6cbab
import gradio as gr
import gradio.inputs as grinputs
import gradio.outputs as groutputs
import numpy as np
import json
import torch
from torchvision import transforms
import utils
import utils_img
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)
np.random.seed(0)
print('Building backbone and normalization layer...')
backbone = utils.build_backbone(path='dino_r50.pth')
normlayer = utils.load_normalization_layer(path='out2048.pth')
model = utils.NormLayerWrapper(backbone, normlayer)
print('Building the hypercone...')
FPR = 1e-6
angle = 1.462771101178447 # value for FPR=1e-6 and D=2048
rho = 1 + np.tan(angle)**2
# angle = utils.pvalue_angle(2048, 1, proba=FPR)
carrier = torch.randn(1, 2048)
carrier /= torch.norm(carrier, dim=1, keepdim=True)
default_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def encode(image, epochs=10, psnr=44, lambda_w=1, lambda_i=1):
img_orig = default_transform(image).to(device, non_blocking=True).unsqueeze(0)
img = img_orig.clone().to(device, non_blocking=True)
img.requires_grad = True
optimizer = torch.optim.Adam([img], lr=1e-2)
for iteration in range(epochs):
x = utils_img.ssim_attenuation(img, img_orig)
x = utils_img.psnr_clip(x, img_orig, psnr)
ft = model(x) # BxCxWxH -> BxD
dot_product = (ft @ carrier.T) # BxD @ Dx1 -> Bx1
norm = torch.norm(ft, dim=-1, keepdim=True) # Bx1
cosines = torch.abs(dot_product/norm)
log10_pvalue = np.log10(utils.cosine_pvalue(cosines.item(), ft.shape[-1]))
loss_R = -(rho * dot_product**2 - norm**2) # B-B -> B
loss_l2_img = torch.norm(x - img_orig)**2 # CxWxH -> 1
loss = lambda_w*loss_R + lambda_i*loss_l2_img
optimizer.zero_grad()
loss.backward()
optimizer.step()
logs = {
"keyword": "img_optim",
"iteration": iteration,
"loss": loss.item(),
"loss_R": loss_R.item(),
"loss_l2_img": loss_l2_img.item(),
"log10_pvalue": log10_pvalue.item(),
}
print("__log__:%s" % json.dumps(logs))
img = utils_img.ssim_attenuation(img, img_orig)
img = utils_img.psnr_clip(img, img_orig, psnr)
img = utils_img.round_pixel(img)
img = img.squeeze(0).detach().cpu()
img = transforms.ToPILImage()(utils_img.unnormalize_img(img).squeeze(0))
return img
def decode(image):
img = default_transform(image).to(device, non_blocking=True).unsqueeze(0)
ft = model(img) # BxCxWxH -> BxD
dot_product = (ft @ carrier.T) # BxD @ Dx1 -> Bx1
norm = torch.norm(ft, dim=-1, keepdim=True) # Bx1
cosines = torch.abs(dot_product/norm)
log10_pvalue = np.log10(utils.cosine_pvalue(cosines.item(), ft.shape[-1]))
loss_R = -(rho * dot_product**2 - norm**2) # B-B -> B
text_marked = "marked" if loss_R < 0 else "unmarked"
return 'Image is {s}, with p-value={p}'.format(s=text_marked, p=10**log10_pvalue)
def on_submit(image, mode):
print('{} mode'.format(mode))
if mode=='Encode':
return encode(image), 'Successfully encoded'
else:
return image, decode(image)
iface = gr.Interface(
fn=on_submit,
inputs=[
grinputs.Image(),
grinputs.Radio(['Encode', 'Decode'], label="Encode or Decode mode")],
outputs=[
groutputs.Image(label='Watermarked image'),
groutputs.Textbox(label='Information')],
allow_screenshot=False,
allow_flagging="auto",
)
iface.launch()