I implement the following:
I present results for the following:
We use vgg-19 without normalization layers, with style loss applied at conv_1 to conv_5 inclusive, and content loss applied at conv_4. We use the LBFGS optimizer with lr=1
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.models as models
import copy
import sys
from utils import load_image, Normalization, device, imshow, get_image_optimizer
from style_and_content import ContentLoss, StyleLoss
import torchvision.transforms as transforms
from time import time
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_model_and_losses(cnn, style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)
content_losses = []
style_losses = []
layers = cnn.children()
normalization = Normalization()
model = nn.Sequential(normalization)
current_offset = 0
latest = [int(loss_name.split("_")[-1]) for loss_name in content_layers] + [int(loss_name.split("_")[-1]) for loss_name in style_layers]
latest = max(latest)
for k in layers:
if isinstance(k, torch.nn.Conv2d):
current_offset+=1
##########################################
# TRIM THE LATER LAYERS BY EARLY TERMINATION
##########################################
if current_offset>latest:
break
current_name = "conv_" + str(current_offset)
cur_layer = k
elif isinstance(k, torch.nn.ReLU):
current_name = "relu_" + str(current_offset)
cur_layer = torch.nn.ReLU(inplace=False)
elif isinstance(k, torch.nn.MaxPool2d):
current_name = 'pool_' + str(current_offset)
cur_layer = k
else:
raise ValueError
exit()
model.add_module(current_name, cur_layer)
if current_name in content_layers:
current_output = model(content_img)
current_content_loss = ContentLoss(current_output)
model.add_module("content_loss_"+str(current_offset), current_content_loss)
content_losses.append(current_content_loss)
if current_name in style_layers:
current_output = model(style_img).detach()
current_style_loss = StyleLoss(current_output)
model.add_module("style_loss_" + str(current_offset), current_style_loss)
style_losses.append(current_style_loss)
return model, style_losses, content_losses
def run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, num_steps=300,
style_weight=1000000, content_weight=1, cont_layers=None, style_layers=None):
"""Run the image reconstruction, texture synthesis, or style transfer."""
print('Building the style transfer model..')
model, style_losses, content_losses = get_model_and_losses(cnn, style_img,content_img, cont_layers, style_layers)
optimizer = get_image_optimizer(input_img)
print('Optimizing..')
global calls
calls = 0
while calls <= num_steps:
def closure():
input_img.data.clamp_(0, 1)
optimizer.zero_grad()
output = model(input_img)
total_s_loss = 0
total_c_loss = 0
for s in style_losses:
total_s_loss += s.loss
for c in content_losses:
total_c_loss += c.loss
total_s_loss = style_weight * total_s_loss
total_c_loss = content_weight * total_c_loss
total_loss = total_s_loss + total_c_loss
total_loss.backward()
global calls
calls = calls + 1
return total_loss
optimizer.step(closure)
return torch.clamp(input_img, 0.0, 1.0)
style_img_path = "./images/style/starry_night.jpeg"
content_img_path = "./images/content/wally.jpg"
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# Expand to 3 chn
if style_img.shape[1]!=3:
style_img = style_img.expand(-1,3,-1,-1)
if content_img.shape[1]!=3:
content_img = content_img.expand(-1,3,-1,-1)
content_img = torch.nn.functional.interpolate(content_img, size=512, mode="bicubic")
style_img = torch.nn.functional.interpolate(style_img, size=content_img.shape[2:], mode="bicubic")
/home/aluo/anaconda3/envs/torch170/lib/python3.8/site-packages/torch/nn/functional.py:3060: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details. warnings.warn("Default upsampling behavior when mode={} is changed "
unloader = transforms.ToPILImage()
def imshow2(tensor, title=None):
image = tensor.cpu().clone() # we clone the tensor to not do changes on it
image = image.squeeze(0) # remove the fake batch dimension
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.show()
def to_img(tensor):
image = tensor.cpu().clone() # we clone the tensor to not do changes on it
image = image.squeeze(0) # remove the fake batch dimension
image = unloader(image)
return image
cnn = models.vgg19(pretrained=True).features.to(device).eval()
style_img = torch.clamp(style_img,0.0,1.0)
content_img = torch.clamp(content_img,0.0, 1.0)
input_img = content_img.clone().detach()
input_img.requires_grad = True
content_layers_default = ['conv_1']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_1 content")
content_layers_default = ['conv_2']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_2 content")
content_layers_default = ['conv_3']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_3 content")
content_layers_default = ['conv_4']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_4 content")
content_layers_default = ['conv_5']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_5 content")
content_layers_default = ['conv_6']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_6 content")
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
We find that enforcing the content loss closer to the input results in a more faithful reconstruction of the input. The image suffers from decoloration as we apply the content loss closer to the output.
style_img_path = "./images/style/starry_night.jpeg"
content_img_path = "./images/content/fallingwater.png"
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# Expand to 3 chn
if style_img.shape[1]!=3:
style_img = style_img.expand(-1,3,-1,-1)
if content_img.shape[1]!=3:
content_img = content_img.expand(-1,3,-1,-1)
content_img = torch.nn.functional.interpolate(content_img, size=512, mode="bicubic")
style_img = torch.nn.functional.interpolate(style_img, size=content_img.shape[2:], mode="bicubic")
content_layers_default = ['conv_6']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_6 content")
Building the style transfer model.. Optimizing..
We apply content loss at conv_6 on the falling water image.
style_img_path = "./images/style/starry_night.jpeg"
content_img_path = "./images/content/phipps.jpeg"
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# Expand to 3 chn
if style_img.shape[1]!=3:
style_img = style_img.expand(-1,3,-1,-1)
if content_img.shape[1]!=3:
content_img = content_img.expand(-1,3,-1,-1)
content_img = torch.nn.functional.interpolate(content_img, size=512, mode="bicubic")
style_img = torch.nn.functional.interpolate(style_img, size=content_img.shape[2:], mode="bicubic")
content_layers_default = ['conv_6']
style_layers_default = []
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_6 content")
Building the style transfer model.. Optimizing..
We apply content loss at conv_6 on the phipps image.
style_img_path = "./images/style/starry_night.jpeg"
content_img_path = "./images/content/wally.jpg"
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# Expand to 3 chn
if style_img.shape[1]!=3:
style_img = style_img.expand(-1,3,-1,-1)
if content_img.shape[1]!=3:
content_img = content_img.expand(-1,3,-1,-1)
content_img = torch.nn.functional.interpolate(content_img, size=512, mode="bicubic")
style_img = torch.nn.functional.interpolate(style_img, size=content_img.shape[2:], mode="bicubic")
content_layers_default = []
style_layers_default = ['conv_1']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_1 style")
content_layers_default = []
style_layers_default = ['conv_2']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_2 style")
content_layers_default = []
style_layers_default = ['conv_3']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_3 style")
content_layers_default = []
style_layers_default = ['conv_4']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_4 style")
content_layers_default = []
style_layers_default = ['conv_5']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_5 style")
content_layers_default = []
style_layers_default = ['conv_6']
input_img = content_img.clone().detach()
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_6 style")
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
We find that enforcing the style loss closer to the input results more high frequency transfer of style. The image has a better style if we apply the loss at a later layer.
style_img_path = "./images/style/starry_night.jpeg"
style_img1 = load_image(style_img_path)
style_img_path = "./images/style/the_scream.jpeg"
style_img2 = load_image(style_img_path)
# Expand to 3 chn
style_img1 = torch.nn.functional.interpolate(style_img1, size=512, mode="bicubic")
style_img2 = torch.nn.functional.interpolate(style_img2, size=512, mode="bicubic")
content_layers_default = []
style_layers_default = ['conv_5']
input_img = torch.rand_like(style_img1)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img1, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_5 style with starry night")
content_layers_default = []
style_layers_default = ['conv_5']
input_img = torch.rand_like(style_img2)
input_img.requires_grad = True
output = run_optimization(cnn, content_img, style_img2, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(output, "conv_5 style with the scream")
Building the style transfer model.. Optimizing..
Building the style transfer model.. Optimizing..
Above, we show noise transformed with only style loss
content1_path = "./images/content/tubingen.jpeg"
content2_path = "./images/content/dancing.jpg"
content_img1 = load_image(content1_path)
content_img2 = load_image(content2_path)
style1_path = "./images/style/picasso.jpg"
style2_path = "./images/style/the_scream.jpeg"
style_img1 = load_image(style1_path)
style_img2 = load_image(style2_path)
if content_img1.shape[1]!=3:
content_img1 = content_img1.expand(-1,3,-1,-1)
if content_img2.shape[1]!=3:
content_img2 = content_img2.expand(-1,3,-1,-1)
if style_img1.shape[1]!=3:
style_img1 = style_img1.expand(-1,3,-1,-1)
if style_img2.shape[1]!=3:
style_img2 = style_img2.expand(-1,3,-1,-1)
content_img1 = torch.nn.functional.interpolate(content_img1, size=512, mode="bicubic")
content_img2 = torch.nn.functional.interpolate(content_img2, size=512, mode="bicubic")
style_img1 = torch.nn.functional.interpolate(style_img1, size=512, mode="bicubic")
style_img2 = torch.nn.functional.interpolate(style_img2, size=512, mode="bicubic")
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
input_img = content_img1.clone().detach()
input_img.requires_grad = True
output1 = run_optimization(cnn, content_img1, style_img1, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
input_img = content_img2.clone().detach()
input_img.requires_grad = True
output2 = run_optimization(cnn, content_img2, style_img1, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
input_img = content_img1.clone().detach()
input_img.requires_grad = True
output3 = run_optimization(cnn, content_img1, style_img2, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
input_img = content_img2.clone().detach()
input_img.requires_grad = True
output4 = run_optimization(cnn, content_img2, style_img2, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
Building the style transfer model.. Optimizing.. Building the style transfer model.. Optimizing.. Building the style transfer model.. Optimizing.. Building the style transfer model.. Optimizing..
We mix the content and style across a 2x2 grid of images. We initialize the input to be the content image.
fig, axs = plt.subplots(3, 3, dpi=200)
axs[0, 1].imshow(to_img(style_img1))
axs[0, 1].set_title("style 1")
axs[0, 2].imshow(to_img(style_img2))
axs[0, 2].set_title("style 2")
axs[1, 0].imshow(to_img(content_img1))
axs[1, 0].set_title("content 1")
axs[2, 0].imshow(to_img(content_img2))
axs[2, 0].set_title("content 2")
axs[1, 1].imshow(to_img(output1))
axs[2, 1].imshow(to_img(output2))
axs[1, 2].imshow(to_img(output3))
axs[2, 2].imshow(to_img(output4))
# for ax in axs.flat:
# ax.set(xlabel='x-label', ylabel='y-label')
plt.axis('off')
for k in axs:
for j in k:
j.axis('off')
fig.tight_layout()
# Hide x labels and tick labels for top plots and y ticks for right plots.
# for ax in axs.flat:
# ax.label_outer()
content1_path = "./images/content/wally.jpg"
content_img1 = load_image(content1_path)
style2_path = "./images/style/the_scream.jpeg"
style_img2 = load_image(style2_path)
if content_img1.shape[1]!=3:
content_img1 = content_img1.expand(-1,3,-1,-1)
if style_img2.shape[1]!=3:
style_img2 = style_img2.expand(-1,3,-1,-1)
content_img1 = torch.nn.functional.interpolate(content_img1, size=512, mode="bicubic")
style_img2 = torch.nn.functional.interpolate(style_img2, size=512, mode="bicubic")
input_img = torch.rand_like(content_img)
input_img.requires_grad = True
old_t = time()
output5 = run_optimization(cnn, content_img1, style_img2, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
print(time()-old_t, " seconds")
imshow2(output5, "noise as input")
Building the style transfer model.. Optimizing.. 15.556963205337524 seconds
content1_path = "./images/content/wally.jpg"
content_img1 = load_image(content1_path)
style2_path = "./images/style/the_scream.jpeg"
style_img2 = load_image(style2_path)
if content_img1.shape[1]!=3:
content_img1 = content_img1.expand(-1,3,-1,-1)
if style_img2.shape[1]!=3:
style_img2 = style_img2.expand(-1,3,-1,-1)
content_img1 = torch.nn.functional.interpolate(content_img1, size=512, mode="bicubic")
style_img2 = torch.nn.functional.interpolate(style_img2, size=512, mode="bicubic")
input_img = content_img1.clone().detach()
old_t = time()
output5 = run_optimization(cnn, content_img1, style_img2, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
print(time()-old_t, " seconds")
imshow2(output5, "noise as input")
Building the style transfer model.. Optimizing.. 15.552756786346436 seconds
We compare the different initializations, while applying the same content and style losses in two different experiements. We find no difference in run time, but the optimization result with the content based initialization results in much better results.
content1_path = "./images/content/heeler.jpg"
content_img1 = load_image(content1_path)
style1_path = "./images/style/thaneeya.jpg"
style_img1 = load_image(style1_path)
if content_img1.shape[1]!=3:
content_img1 = content_img1.expand(-1,3,-1,-1)
if style_img1.shape[1]!=3:
style_img1 = style_img1.expand(-1,3,-1,-1)
content_img1 = torch.nn.functional.interpolate(content_img1, size=512, mode="bicubic")
style_img1 = torch.nn.functional.interpolate(style_img1, size=512, mode="bicubic")
input_img = content_img1.clone().detach()
input_img.requires_grad = True
output4 = run_optimization(cnn, content_img1, style_img1, input_img, num_steps=300, cont_layers=content_layers_default, style_layers=style_layers_default)
imshow2(content_img1, "content")
imshow2(style_img1, "style")
imshow2(output4, "my result")
Building the style transfer model.. Optimizing..
I blend two of my images, the content which is a puppy and the style being abstract art.