import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import copy
import sys
from utils import load_image, device, imshow, imConvertForDisplay
import torch.nn.functional as F
"""Loss Functions
--------------
Content Loss
~~~~~~~~~~~~
The content loss is a function that represents a weighted version of the
content distance for an individual layer. The function takes the feature
maps $F_{XL}$ of a layer $L$ in a network processing input $X$ and returns the
weighted content distance $w_{CL}.D_C^L(X,C)$ between the image $X$ and the
content image $C$. The feature maps of the content image($F_{CL}$) must be
known by the function in order to calculate the content distance. We
implement this function as a torch module with a constructor that takes
$F_{CL}$ as an input. The distance $\|F_{XL} - F_{CL}\|^2$ is the mean square error
between the two sets of feature maps, and can be computed using ``nn.MSELoss``.
We will add this content loss module directly after the convolution
layer(s) that are being used to compute the content distance. This way
each time the network is fed an input image the content losses will be
computed at the desired layers and because of auto grad, all the
gradients will be computed. Now, in order to make the content loss layer
transparent we must define a ``forward`` method that computes the content
loss and then returns the layer’s input. The computed loss is saved as a
parameter of the module.
"""
'Loss Functions\n--------------\nContent Loss\n~~~~~~~~~~~~\n\nThe content loss is a function that represents a weighted version of the\ncontent distance for an individual layer. The function takes the feature\nmaps $F_{XL}$ of a layer $L$ in a network processing input $X$ and returns the\nweighted content distance $w_{CL}.D_C^L(X,C)$ between the image $X$ and the\ncontent image $C$. The feature maps of the content image($F_{CL}$) must be\nknown by the function in order to calculate the content distance. We\nimplement this function as a torch module with a constructor that takes\n$F_{CL}$ as an input. The distance $\\|F_{XL} - F_{CL}\\|^2$ is the mean square error\nbetween the two sets of feature maps, and can be computed using ``nn.MSELoss``.\n\nWe will add this content loss module directly after the convolution\nlayer(s) that are being used to compute the content distance. This way\neach time the network is fed an input image the content losses will be\ncomputed at the desired layers and because of auto grad, all the\ngradients will be computed. Now, in order to make the content loss layer\ntransparent we must define a ``forward`` method that computes the content\nloss and then returns the layer’s input. The computed loss is saved as a\nparameter of the module.\n\n\n\n'
class ContentLoss(nn.Module):
def __init__(self, target,):
super(ContentLoss, self).__init__()
# you need to `detach' the target content from the graph used to
# compute the gradient in the forward pass that made it so that we don't track
# those gradients anymore
self.target = target.detach()
def forward(self, input):
# this needs to be a passthrough where you save the appropriate loss value
self.loss = torch.mean(torch.square(self.target-input))
return input
"""Additionally, VGG networks are trained on images with each channel
normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
We will use them to normalize the image before sending it into the network.
"""
cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
# create a module to normalize input image so we can easily put it in a
# nn.Sequential
class Normalization(nn.Module):
def __init__(self, mean=cnn_normalization_mean, std=cnn_normalization_std):
super(Normalization, self).__init__()
self.mean3Channel = mean.view([torch.numel(mean),1,1])
self.std3Channel = std.view([torch.numel(std),1,1])
# .view the mean and std to make them [C x 1 x 1] so that they can
# directly work with image Tensor of shape [B x C x H x W].
# B is batch size. C is number of channels. H is height and W is width.
def forward(self, img):
# normalize img
return torch.div(img - self.mean3Channel, self.std3Channel)
def get_image_optimizer(input_img):
# we recommend that you use the L-BFGS optimizer to fit the image target
# set up an optimizer for the input image pixel values
# make sure to specify that we need gradients for the input_image
viewWithGrad = input_img.requires_grad_()
optimizer = optim.LBFGS([viewWithGrad])
return optimizer
"""
Style Loss
~~
The style loss module is implemented similarly to the content loss module. It will act as a transparent layer in a network that computes the style loss of that layer. In order to calculate the style loss, we need to compute the gram matrix $G_{XL}$. A gram matrix is the result of multiplying a given matrix by its transposed matrix. In this application the given matrix is a reshaped version of the feature maps $F_{XL}$ of a layer $L$. $F_{XL}$ is reshaped to form $\hat{F}_{XL}$, a $K$\ x\ $N$ matrix, where $K$ is the number of feature maps at layer $L$ and $N$ is the length of any vectorized feature map $F_{XL}^k$. For example, the first line of $\hat{F}_{XL}$ corresponds to the first vectorized feature map $F_{XL}^1$.
Finally, the gram matrix must be normalized by dividing each element by the total number of elements in the matrix. This normalization is to counteract the fact that $\hat{F}_{XL}$ matrices with a large $N$ dimension yield larger values in the Gram matrix. These larger values will cause the first layers (before pooling layers) to have a larger impact during the gradient descent. Style features tend to be in the deeper layers of the network so this normalization step is crucial. """
def gram_matrix(activations):
a, b, c, d = activations.size() # a=batch size(=1)
# b=number of feature maps
# (c,d)=dimensions of a f. map (N=c*d)
viewUnwrapped = activations.view(a*b,c*d)
# efficiently get correlation as a matrix matrix product
unnormalized_gram = torch.mm(viewUnwrapped,viewUnwrapped.t())
# 'normalize' the values of the gram matrix
# by dividing by the number of element in each feature maps.
normalized_gram = unnormalized_gram / torch.numel(activations)
return normalized_gram
"""Now the style loss module looks almost exactly like the content loss
module. The style distance is also computed using the mean square
error between $G_{XL}$ and $G_{SL}$.
"""
'Now the style loss module looks almost exactly like the content loss\nmodule. The style distance is also computed using the mean square\nerror between $G_{XL}$ and $G_{SL}$.\n'
class StyleLoss(nn.Module):
def __init__(self, target_feature):
super(StyleLoss, self).__init__()
# need to detach and cache the appropriate thing
self.target = gram_matrix(target_feature.detach())
def forward(self, input):
# need to cache the appropriate loss value in self.loss
inputGram = gram_matrix(input)
self.loss = torch.mean(torch.square(inputGram-self.target))
return input
"""A Sequential
module contains an ordered list of child modules. For
instance, vgg19.features
contains a sequence (Conv2d, ReLU, MaxPool2d,
Conv2d, ReLU…) aligned in the right order of depth. We need to add our
content loss and style loss layers immediately after the convolution
layer they are detecting. To do this we must create a new Sequential
module that has content loss and style loss modules correctly inserted.
"""
# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_model_and_losses(cnn, style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)
# build a sequential model consisting of a Normalization layer
# then all the layers of the VGG feature network along with ContentLoss and StyleLoss
# layers in the specified places
# just in order to have an iterable access to or list of content/syle
# losses
content_losses = []
style_losses = []
# assuming that cnn is a nn.Sequential, so we make a new nn.Sequential
# to put in modules that are supposed to be activated sequentially
# here if you need a nn.ReLU layer, make sure to use inplace=False
# as the in place version interferes with the loss layers
# trim off the layers after the last content and style losses
# as they are vestigial
normalization = Normalization().to(device)
model = nn.Sequential(normalization)
i = 0;
# loop over layers and add them to new model
for layer in cnn.children():
if isinstance(layer, nn.Conv2d):
i += 1
name = 'conv_{}'.format(i)
elif isinstance(layer, nn.ReLU):
name = 'relu_{}'.format(i)
layer = nn.ReLU(inplace=False) # replace without inplace
elif isinstance(layer, nn.MaxPool2d):
name = 'pool_{}'.format(i)
elif isinstance(layer, nn.BatchNorm2d):
name = 'bn_{}'.format(i)
model.add_module(name, layer)
if name in content_layers:
# add content loss:
content_loss = ContentLoss(model(content_img).detach())
model.add_module("content_loss_{}".format(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
# add style loss:
style_loss = StyleLoss(model(style_img).detach())
model.add_module("style_loss_{}".format(i), style_loss)
style_losses.append(style_loss)
# chop layers after the last content and style losses
for i in range(len(model) - 1, -1, -1): # iterate in reverse, find first occurance of a loss we added
if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
break
model = model[:(i + 1)] # clear from after that last loss onward
return model, style_losses, content_losses
"""Finally, we must define a function that performs the neural transfer. For
each iteration of the networks, it is fed an updated input and computes
new losses. We will run the backward
methods of each loss module to
dynamicaly compute their gradients. The optimizer requires a “closure”
function, which reevaluates the module and returns the loss.
We still have one final constraint to address. The network may try to optimize the input with values that exceed the 0 to 1 tensor range for the image. We can address this by correcting the input values to be between 0 to 1 each time the network is run.
"""
def run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, num_steps=300,
style_weight=1000000, content_weight=1, content_layers=content_layers_default, style_layers=style_layers_default):
"""Run the image reconstruction, texture synthesis, or style transfer."""
print('Building the style transfer model..')
# get your model, style, and content losses
# get the optimizer
model, style_losses, content_losses = get_model_and_losses(cnn,
style_img, content_img, content_layers=content_layers, style_layers=style_layers)
optimizer = get_image_optimizer(input_img)
# run model training, with one weird caveat
# we recommend you use LBFGS, an algorithm which preconditions the gradient
# with an approximate Hessian taken from only gradient evaluations of the function
# this means that the optimizer might call your function multiple times per step, so as
# to numerically approximate the derivative of the gradients (the Hessian)
# so you need to define a function
# def closure():
# here
# which does the following:
# clear the gradients
# compute the loss and it's gradient
# return the loss
run = [0]
while run[0] <= num_steps:
def closure():
# one more hint: the images must be in the range [0, 1]
# but the optimizer doesn't know that
# so you will need to clamp the img values to be in that range after every step
input_img.data.clamp_(0, 1) # optimizer is searching pixel values, it might give us something outside domain
optimizer.zero_grad() # clear current gradients
model(input_img)
# compute losses
style_score = 0
content_score = 0
for sl in style_losses:
style_score += sl.loss
for cl in content_losses:
content_score += cl.loss
# weight them
style_score *= style_weight
content_score *= content_weight
# back prop, this will update input image
loss = 0;
if use_style:
loss += style_score
if use_content:
loss += content_score
loss.backward()
run[0] += 1
# if run[0] % 50 == 0:
# print("run {}:".format(run))
# print('Style Loss : {:4f} Content Loss: {:4f}'.format(
# style_score.item(), content_score.item()))
# print()
return style_score + content_score
optimizer.step(closure)
# clamp again, since optimizer doesn't know lower and upper bounds
input_img.data.clamp_(0, 1)
# make sure to clamp once you are done
return input_img
def main(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# plot the original input image:
plt.figure()
imshow(style_img, title='Style Image')
plt.figure()
imshow(content_img, title='Content Image')
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=False) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
# texture synthesis
print("Performing Texture Synthesis from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=False, use_style=True)# synthesize a texture like style_image
plt.figure()
imshow(output, title='Synthesized Texture')
# style transfer
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, style_weight=10000)# transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from noise')
print("Performing Style Transfer from content image initialization")
input_img = content_img.clone()
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True) #transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from content')
plt.ioff()
plt.show()
# desired depth layers to compute style/content losses :
content_layers = ['conv_4']
style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
style_image_path = 'images/style/frida_kahlo.jpeg'
content_img_path = 'images/content/wally.jpg'
main(style_image_path, content_img_path)
Performing Image Reconstruction from white noise initialization Building the style transfer model..
Performing Texture Synthesis from white noise initialization Building the style transfer model..
Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
def mainContent(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=False, content_layers=content_layers) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
# desired depth layers to compute style/content losses :
for singleLayer in ['conv_2', 'conv_4', 'conv_8', 'conv_16']:
print(singleLayer)
content_layers = [singleLayer]
style_image_path = 'images/style/frida_kahlo.jpeg'
content_img_path = 'images/content/wally.jpg'
mainContent(style_image_path, content_img_path, content_layers=content_layers)
conv_2 Performing Image Reconstruction from white noise initialization Building the style transfer model..
conv_4 Performing Image Reconstruction from white noise initialization Building the style transfer model..
conv_8 Performing Image Reconstruction from white noise initialization Building the style transfer model..
conv_16 Performing Image Reconstruction from white noise initialization Building the style transfer model..
The layer we were directed to use, conv_4 seems to offer a good reconstruction. Later layers are hard to optimize for reconstruction. I believe that the later the layer, the less convex the optimization problem, allowing for many local minima (or even "infinite" local minima). Interestingly, conv_16 is able to take a few steps in its optimization, before blowing up.
def mainContent(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
plt.figure()
imshow(input_img, title='Initialization')
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=False, content_layers=content_layers, num_steps=50) # reconstruct the image from the noise
plt.figure()
imshow(output, title='After 50 iterations')
output = run_optimization(cnn, content_img, style_img, output, use_content=True, use_style=False, content_layers=content_layers, num_steps=150) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
content_img_path = 'images/content/phipps.jpeg'
content_img = load_image(content_img_path)
plt.figure()
imshow(content_img, title='Original Image')
for seed in [6,17]:
torch.manual_seed(seed)
print(singleLayer)
content_layers = ['conv_4']
style_image_path = 'images/style/frida_kahlo.jpeg'
mainContent(style_image_path, content_img_path, content_layers=content_layers)
conv_16 Performing Image Reconstruction from white noise initialization
Building the style transfer model..
Building the style transfer model..
conv_16 Performing Image Reconstruction from white noise initialization
Building the style transfer model..
Building the style transfer model..
In order to highlight the differences due to initialization more, I chose to include an intermediate result with fewer iterations. However, with different initializations, they seem to go through a very similar path to the final result
import random
def mainStyle(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=False, use_style=True, content_layers=content_layers, num_steps=150) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
# From https://stackoverflow.com/questions/22229796/choose-at-random-from-combinations
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(range(n), r))
return tuple(pool[i] for i in indices)
# desired depth layers to compute style/content losses :
for i in range(5):
style_layers = random_combination(['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5','conv_7','conv_11','conv_15'],random.randint(2,4))
print(style_layers)
style_image_path = 'images/style/frida_kahlo.jpeg'
content_img_path = 'images/content/wally.jpg'
mainStyle(style_image_path, content_img_path, style_layers=style_layers)
('conv_2', 'conv_7', 'conv_15') Performing Image Reconstruction from white noise initialization Building the style transfer model..
('conv_3', 'conv_4') Performing Image Reconstruction from white noise initialization Building the style transfer model..
('conv_4', 'conv_11', 'conv_15') Performing Image Reconstruction from white noise initialization Building the style transfer model..
('conv_2', 'conv_4', 'conv_5') Performing Image Reconstruction from white noise initialization Building the style transfer model..
('conv_4', 'conv_5', 'conv_11') Performing Image Reconstruction from white noise initialization Building the style transfer model..
It is hard to compare different texture images. I think that all of them look similar, using different combinations of layers. It seems the important thing is using the gram matrix, which is a distribution of features, instead of the optimizing the raw feature maps.
def mainStyle(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
plt.figure()
imshow(input_img, title='Initialization')
output = run_optimization(cnn, content_img, style_img, input_img, use_content=False, use_style=True, content_layers=content_layers, num_steps=50) # reconstruct the image from the noise
plt.figure()
imshow(output, title='After 50 iterations')
output = run_optimization(cnn, content_img, style_img, output, use_content=False, use_style=True, content_layers=content_layers, num_steps=150) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
style_image_path = 'images/style/frida_kahlo.jpeg'
style_image = load_image(style_image_path)
plt.figure()
imshow(style_image, title='Original Image')
for seed in [6,17]:
torch.manual_seed(seed)
print(singleLayer)
mainStyle(style_image_path, content_img_path)
conv_16 Performing Image Reconstruction from white noise initialization
Building the style transfer model..
Building the style transfer model..
conv_16 Performing Image Reconstruction from white noise initialization
Building the style transfer model..
Building the style transfer model..
In this case, the texture images are clearly different, but seem to be sampled from similar distributions. It is interesting to me that the intermediate results again look similar.
def mainStyle(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default, style_weight=10000):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# style transfer
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, style_weight=style_weight)# transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from noise')
print("Performing Style Transfer from content image initialization")
input_img = content_img.clone()
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True) #transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from content')
plt.ioff()
plt.show()
content_layers = ['conv_4']
style_layers = ['conv_1', 'conv_3', 'conv_5', 'conv_7', 'conv_9']
style_image_path = 'images/style/picasso.jpg'
content_img_path = 'images/content/phipps.jpeg'
# plot the original input image:
plt.figure()
imshow(load_image(style_image_path), title='Style Image')
plt.figure()
imshow(load_image(content_img_path), title='Content Image')
for style_weight in [10000,100000,1000000]:
print("style_weight:",style_weight)
mainStyle(style_image_path, content_img_path, style_weight=style_weight)
style_weight: 10000 Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
style_weight: 100000 Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
style_weight: 1000000 Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
Modifying the weight can affect the final image in a small way when starting from the content image, but it is interesting that the weight affects result when starting from random much more.
def mainStyle(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default, style_weight=10000):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# style transfer
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, style_weight=style_weight)# transfer the style from the style_img to the content image
return output
fig, axs = plt.subplots(3, 3, figsize=[11,11])
for indc,content_path in enumerate(['images/content/tubingen.jpeg','images/content/dancing.jpg']):
for inds,style_path in enumerate(['images/style/frida_kahlo.jpeg','images/style/starry_night.jpeg']):
output = mainStyle(style_path, content_path)
axs[2, indc].imshow(imConvertForDisplay(load_image(content_path)))
axs[2, indc].set_title(content_path.split('/')[-1])
axs[inds, 2].imshow(imConvertForDisplay(load_image(style_path)))
axs[inds, 2].set_title(style_path.split('/')[-1])
axs[inds, indc].imshow(imConvertForDisplay(output))
axs[inds, indc].set_title(content_path.split('/')[-1]+' '+style_path.split('/')[-1])
Building the style transfer model.. Building the style transfer model.. Building the style transfer model.. Building the style transfer model..
I really like the dancing image, that always had interesting results. Tubingen seemed to have less interesting texture in my experiments here
def main(style_img_path, content_img_path, content_layers=content_layers_default, style_layers=style_layers_default):
# we've loaded the images for you
style_img = load_image(style_img_path)
content_img = load_image(content_img_path)
# interative MPL
plt.ion()
minWidth = min(style_img.shape[3],content_img.shape[3])
minHeight = min(style_img.shape[2],content_img.shape[2])
style_img = style_img[:,:,0:minHeight,0:minWidth]
content_img = content_img[:,:,0:minHeight,0:minWidth]
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
# plot the original input image:
plt.figure()
imshow(style_img, title='Style Image')
plt.figure()
imshow(content_img, title='Content Image')
# we load a pretrained VGG19 model from the PyTorch models library
# but only the feature extraction part (conv layers)
# and configure it for evaluation
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# image reconstruction
print("Performing Image Reconstruction from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=False) # reconstruct the image from the noise
plt.figure()
imshow(output, title='Reconstructed Image')
# texture synthesis
print("Performing Texture Synthesis from white noise initialization")
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=False, use_style=True)# synthesize a texture like style_image
plt.figure()
imshow(output, title='Synthesized Texture')
# style transfer
input_img = torch.rand(content_img.shape, device=device)# random noise of the size of content_img on the correct device
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True, style_weight=10000)# transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from noise')
print("Performing Style Transfer from content image initialization")
input_img = content_img.clone()
output = run_optimization(cnn, content_img, style_img, input_img, use_content=True, use_style=True) #transfer the style from the style_img to the content image
plt.figure()
imshow(output, title='Output Image from content')
plt.ioff()
plt.show()
# desired depth layers to compute style/content losses :
content_layers = ['conv_4']
style_layers = ['conv_1', 'conv_3', 'conv_5', 'conv_7', 'conv_9']
style_image_path = 'images/style/frida_kahlo.jpeg'
content_img_path = 'images/content/dancing.jpg'
main(style_image_path, content_img_path)
Performing Image Reconstruction from white noise initialization Building the style transfer model..
Performing Texture Synthesis from white noise initialization Building the style transfer model..
Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
Both starting from the content and starting from random generated interesting very interesting and artistic images in this case. In other tests I found that starting from the content image can converge much more quickly
content_layers = ['conv_4']
style_layers = ['conv_1', 'conv_3', 'conv_5', 'conv_7', 'conv_9']
style_image_path = 'images/style/frida_kahlo.jpeg'
content_img_path = 'IMG_20200505_142925.jpeg'
main(style_image_path, content_img_path)
Performing Image Reconstruction from white noise initialization Building the style transfer model..
Performing Texture Synthesis from white noise initialization Building the style transfer model..
Building the style transfer model..
Performing Style Transfer from content image initialization Building the style transfer model..
My image was one I took while working with imaging dummies out in a test site recently. We staged the dummies during down time, to have them interact. In this case, the color is closer to Frida than some other images I tried. It converged, especially when initializing from content, to an interesting painting like image.