import numpy as np
import skimage as sk
import skimage.io as skio
import skimage.transform
import torch.nn.functional as F
import torch
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.dpi']=150 # makes figures a little bigger
# name of the input file
imname = 'data/cathedral.jpg'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
# align the images
# functions that might be useful for aligning the images include:
# np.roll, np.sum, sk.transform.rescale (for multiscale)
ag = g
ar = r
### ag = align(g, b)
### ar = align(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
# skio.imshow(im_out)
# skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
def sad(im1,im2):
# given two images, computes SAD for all pixels
return np.sum(np.abs(im1-im2))
def align(moving,ref,func=sad):
# given two images, finds the translation of moving to match ref
# initialize variables
bestDiff = float('inf')
bestx = 0
besty = 0
# search for best SAD
for x in range(-20,20):
movinglocalx = np.roll(moving,x,1)
for y in range(-20,20):
movinglocaly = np.roll(movinglocalx,y,0)
# SAD Sum of Absolute Differences
diff = sad(movinglocaly,ref)
if diff < bestDiff:
bestx = x
besty = y
bestDiff = diff
# return image with best offset applied
return np.roll(np.roll(moving,besty,0),bestx,1)
###ag = g
###ar = r
ag = align(g, b)
ar = align(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
skio.imshow(im_out)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
We can also pass another simple metric, the zero normalized cross correlation for the entire image
def zncc(im1,im2):
# given two images, computes zero normalized cross-correlation for all pixels, neglecting constant terms
return np.sum(np.multiply(im1/np.mean(im1),im2/np.mean(im2)))
###ag = g
###ar = r
ag = align(g, b,zncc)
ar = align(r, b,zncc)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
skio.imshow(im_out)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
On this small image, the single scale search found an answer quickly
# name of the input file
imname = 'data/three_generations.tif'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
Function takes in two images of the same size. If the images are small enough, it searches for an alignment. If images are too large, it first searches for an alignment on a downscaled version, before searching for an alignment at full scale
def alignRecursive(moving,ref,func=sad):
# given two images, finds the translation of moving to match ref
# parameter
scaleFactor = 2
searchRange = 2
maxHeight = 50
# initialize variables
bestDiff = float('inf')
bestx = 0
bestxLowRes = 0
besty = 0
bestyLowRes = 0
# check height
if ref.shape[0] > maxHeight:
# Resize inputs
movingSmall = sk.transform.rescale(moving,1/scaleFactor,anti_aliasing=1)
refSmall = sk.transform.rescale(ref,1/scaleFactor,anti_aliasing=1)
# recurse
_,bestxLowRes,bestyLowRes = alignRecursive(movingSmall,refSmall)
# resize outputs of recurse and apply
bestxLowRes *= scaleFactor
bestyLowRes *= scaleFactor
moving = np.roll(np.roll(moving,bestyLowRes,0),bestxLowRes,1)
# perform search for best SAD
for x in range(-searchRange,searchRange):
movinglocalx = np.roll(moving,x,1)
for y in range(-searchRange,searchRange):
movinglocaly = np.roll(movinglocalx,y,0)
diff = func(movinglocaly,ref)
if diff < bestDiff:
# store new current best
bestx = x
besty = y
bestDiff = diff
# return aligned image and aligning offsets
return np.roll(np.roll(moving,besty,0),bestx,1), bestx+bestxLowRes, besty + bestyLowRes
ag,bestxg,bestyg = alignRecursive(g, b)
ar,bestxr,bestyr = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
print("offset g = ({},{})".format(bestxg, bestyg))
print("offset r = ({},{})".format(bestxr, bestyr))
# display the image
skio.imshow(im_out)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (7,53) offset r = (8,111)
ZNCC has basically the same result on this image
ag,bestxg,bestyg = alignRecursive(g, b, zncc)
ar,bestxr,bestyr = alignRecursive(r, b, zncc)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
print("offset g = ({},{})".format(bestxg, bestyg))
print("offset r = ({},{})".format(bestxr, bestyr))
# display the image
skio.imshow(im_out)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (9,55) offset r = (9,111)
Alignment turned out well on this image! There is a small difference between SAD and ZNCC, but I don't see the difference in the resulting image
However, there is still a lot of border, which we can now try to crop out
def cropOutMissingEdge(im):
# Searches for major dark to bright edge closest to maximum border, then crops detected border off
# parameters
maxBorder = im.shape[0]//10 # how much of the image to search. For some images, very important
filtwidth = 5 # how much uniform averaging to do in edge detection, actually do equivalent offset
bd = -50 # border threshold
# consider darkest channel, so last border is considered
colSum = np.amin(np.sum(im,axis=0),axis=1)
rowSum = np.amin(np.sum(im,axis=1),axis=1)
# search on top and left
colStart = maxBorder-np.argmax((colSum[0:maxBorder-filtwidth]-colSum[filtwidth:maxBorder])[::-1] < bd)
rowStart = maxBorder-np.argmax((rowSum[0:maxBorder-filtwidth]-rowSum[filtwidth:maxBorder])[::-1] < bd)
# search bottom and right
colEnd = maxBorder-np.argmax((colSum[-1-filtwidth:-maxBorder-filtwidth:-1]-colSum[:-maxBorder:-1])[::-1] < bd)
rowEnd = maxBorder-np.argmax((rowSum[-1-filtwidth:-maxBorder-filtwidth:-1]-rowSum[:-maxBorder:-1])[::-1] < bd)
# crop and return
return im[colStart:-colEnd,rowStart:-rowEnd]
# demo on 3 gen
imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()
Crop isn't tight. Could likely be improved further.
# name of the input file
imname = 'data/train.tif'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b, zncc)
ar,_,_ = alignRecursive(r, b, zncc)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
# skio.imshow(im_out)
# skio.show()
imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
imperfect alignment, but good crop
# name of the input file
imname = 'data/icon.tif'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b)
ar,_,_ = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
skio.imshow(im_out)
skio.show()
imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
Alignment looks very good, crop is tight on most sides, but a dark to white edge fooled the detector
# name of the input file
imname = 'data/village.tif'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b)
ar,_,_ = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
# display the image
skio.imshow(im_out)
skio.show()
imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
Aligment looks good on some parts of the image, but it seems like pure translation may not be able to solve this one. Cropping is tight on all sides.
The section below uses pytorch to set up a gradient descent search over scale and translation. Scale and translation are passed in through an affine transform, which is full differentiable. When updating the affine transform, I discarded parts of the gradient that would change parameters other than scale and translation. One enhancement, to improve the size of the local cost basin was to smooth the reference image.
# name of the input file
#imname = 'data/cathedral.jpg'
#imname = 'data/train.tif'
imname = 'data/icon.tif'
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
skio.imshow(im)
skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
from scipy.ndimage.filters import gaussian_filter
# reduce image size for memory reasons
gFloat = np.float32(g)
rFloat = np.float32(r)
bFloat = np.float32(b)
def alignGradientPT(moving,ref):
# small image
#scalestep = .0000001
#transstep = .0000001
# large image
scalestep = .0000000001
transstep = .0000000001
iters = 50
filtRad = 21
# convert to torch tensors
ref = torch.from_numpy(gaussian_filter(ref, sigma=filtRad))
moving = torch.from_numpy(np.expand_dims(np.expand_dims(moving,0),0))
# initialize transform as identity
theta = torch.zeros(1, 2, 3, dtype=torch.float32)
theta[:,:,:] = torch.tensor([[1.0, 0, 0],
[0, 1.0, 0]],requires_grad=True)
theta.retain_grad()
# initialize for plotting
lossVals = []
for i in range(iters):
# pytorch affine, allows for differentiable scale/translation
grid = F.affine_grid(theta, torch.Size([1, 1, np.int32(g.shape[0]), g.shape[1]]))
movingScaled = F.grid_sample(moving, grid)
loss = torch.sum(torch.abs(ref-torch.squeeze(movingScaled)))
#print(loss)
loss.backward(gradient=torch.tensor(1.0),retain_graph=True)
# perform some conditioning on gradient, mostly to avoid shear and rotation
relevantGrad = theta.grad.data
relevantGrad[0,0,1] = 0
relevantGrad[0,1,0] = 0
relevantGrad[0,0,0] = relevantGrad[0,0,0] * scalestep
relevantGrad[0,1,1] = relevantGrad[0,1,1] * scalestep
relevantGrad[0,0,2] = relevantGrad[0,0,2] * transstep
relevantGrad[0,1,2] = relevantGrad[0,1,2] * transstep
# gradient descent!
theta.data = (-relevantGrad + theta.data)
# store loss for plotting
lossVals.append(loss.detach().numpy())
return np.squeeze(np.float64(movingScaled.detach().numpy())), lossVals
rScaled,rloss = alignGradientPT(rFloat,gFloat)
plt.plot(rloss)
plt.title('Red SAD at iterations')
plt.xlabel('Iter')
plt.show()
bScaled,bloss = alignGradientPT(bFloat,gFloat)
plt.plot(bloss)
plt.title('Blue SAD at iterations')
plt.xlabel('Iter')
plt.show()
/home/ttabor/anaconda3/envs/imSynth/lib/python3.7/site-packages/torch/nn/functional.py:3448: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details. warnings.warn("Default grid_sample and affine_grid behavior has changed " /home/ttabor/anaconda3/envs/imSynth/lib/python3.7/site-packages/torch/nn/functional.py:3385: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details. warnings.warn("Default grid_sample and affine_grid behavior has changed "
im_out = np.dstack([r, g, b])
print("Original")
skio.imshow(im_out)
skio.show()
im_out = np.dstack([np.squeeze(rScaled), gFloat, np.squeeze(bScaled)])
print("Aligned")
skio.imshow(im_out)
skio.show()
Original
Aligned
Alignment was pretty successful! This didn't work for all images
For completeness, here is the multi scale alignment for the full set
imlist=['emir.tif','harvesters.tif','icon.tif','lady.tif','self_portrait.tif','three_generations.tif','train.tif','turkmen.tif','village.tif']
for im in imlist:
# name of the input file
imname = 'data/' + im
# read in the image
im = skio.imread(imname)
# convert to double (might want to do this later on to save memory)
im = sk.img_as_float(im)
# display the image
# skio.imshow(im)
# skio.show()
# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)
# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,bestxg,bestyg = alignRecursive(g, b, zncc)
ar,bestxr,bestyr = alignRecursive(r, b, zncc)
# create a color image
im_out = np.dstack([ar, ag, b])
# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)
print("offset g = ({},{})".format(bestxg, bestyg))
print("offset r = ({},{})".format(bestxr, bestyr))
# display the image
skio.imshow(im_out)
skio.show()
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (11,17) offset r = (19,109)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (15,58) offset r = (5,92)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (14,40) offset r = (20,91)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (-3,59) offset r = (-15,98)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (1,73) offset r = (-1,169)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (9,55) offset r = (9,111)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (-4,43) offset r = (5,109)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (15,52) offset r = (-2,76)
Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
offset g = (11,62) offset r = (-16,137)