import numpy as np
import skimage as sk
import skimage.io as skio
import skimage.transform
import torch.nn.functional as F
import torch
import matplotlib.pyplot as plt
import matplotlib as mpl

%matplotlib inline


mpl.rcParams['figure.dpi']=150 # makes figures a little bigger


# name of the input file
imname = 'data/cathedral.jpg'

# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
# skio.imshow(im)
# skio.show()


# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]

# align the images
# functions that might be useful for aligning the images include:
# np.roll, np.sum, sk.transform.rescale (for multiscale)

ag = g
ar = r
### ag = align(g, b)
### ar = align(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
# skio.imshow(im_out)
# skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


def sad(im1,im2):
    # given two images, computes SAD for all pixels
    return np.sum(np.abs(im1-im2))

def align(moving,ref,func=sad):
    # given two images, finds the translation of moving to match ref
    # initialize variables
    bestDiff = float('inf')
    bestx = 0
    besty = 0
    # search for best SAD
    for x in range(-20,20):
        movinglocalx = np.roll(moving,x,1)
        for y in range(-20,20):
            movinglocaly = np.roll(movinglocalx,y,0)
            # SAD Sum of Absolute Differences
            diff = sad(movinglocaly,ref)
            if diff < bestDiff:
                bestx = x
                besty = y
                bestDiff = diff
    # return image with best offset applied
    return np.roll(np.roll(moving,besty,0),bestx,1)


###ag = g
###ar = r
ag = align(g, b)
ar = align(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
skio.imshow(im_out)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


def zncc(im1,im2):
    # given two images, computes zero normalized cross-correlation for all pixels, neglecting constant terms
    return np.sum(np.multiply(im1/np.mean(im1),im2/np.mean(im2)))

###ag = g
###ar = r
ag = align(g, b,zncc)
ar = align(r, b,zncc)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
skio.imshow(im_out)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


# name of the input file
imname = 'data/three_generations.tif'

# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
# skio.imshow(im)
# skio.show()

# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]


def alignRecursive(moving,ref,func=sad):
    # given two images, finds the translation of moving to match ref
    
    # parameter
    scaleFactor = 2
    searchRange = 2
    maxHeight = 50
    # initialize variables
    bestDiff = float('inf')
    bestx = 0
    bestxLowRes = 0
    besty = 0
    bestyLowRes = 0
    # check height
    if ref.shape[0] > maxHeight:
        # Resize inputs
        movingSmall = sk.transform.rescale(moving,1/scaleFactor,anti_aliasing=1)
        refSmall = sk.transform.rescale(ref,1/scaleFactor,anti_aliasing=1)
        # recurse
        _,bestxLowRes,bestyLowRes = alignRecursive(movingSmall,refSmall)
        # resize outputs of recurse and apply
        bestxLowRes *= scaleFactor
        bestyLowRes *= scaleFactor
        moving = np.roll(np.roll(moving,bestyLowRes,0),bestxLowRes,1)
    # perform search for best SAD
    for x in range(-searchRange,searchRange):
        movinglocalx = np.roll(moving,x,1)
        for y in range(-searchRange,searchRange):
            movinglocaly = np.roll(movinglocalx,y,0)
            diff = func(movinglocaly,ref)
            if diff < bestDiff:
                # store new current best
                bestx = x
                besty = y
                bestDiff = diff
    # return aligned image and aligning offsets
    return np.roll(np.roll(moving,besty,0),bestx,1), bestx+bestxLowRes, besty + bestyLowRes


ag,bestxg,bestyg = alignRecursive(g, b)
ar,bestxr,bestyr = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

print("offset g = ({},{})".format(bestxg, bestyg))
print("offset r = ({},{})".format(bestxr, bestyr))

# display the image
skio.imshow(im_out)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (7,53)
offset r = (8,111)


ag,bestxg,bestyg = alignRecursive(g, b, zncc)
ar,bestxr,bestyr = alignRecursive(r, b, zncc)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

print("offset g = ({},{})".format(bestxg, bestyg))
print("offset r = ({},{})".format(bestxr, bestyr))

# display the image
skio.imshow(im_out)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (9,55)
offset r = (9,111)


def cropOutMissingEdge(im):
    # Searches for major dark to bright edge closest to maximum border, then crops detected border off
    
    # parameters
    maxBorder = im.shape[0]//10 # how much of the image to search. For some images, very important
    filtwidth = 5  # how much uniform averaging to do in edge detection, actually do equivalent offset
    bd = -50 # border threshold
    
    # consider darkest channel, so last border is considered
    colSum = np.amin(np.sum(im,axis=0),axis=1)
    rowSum = np.amin(np.sum(im,axis=1),axis=1)
    
    # search on top and left
    colStart = maxBorder-np.argmax((colSum[0:maxBorder-filtwidth]-colSum[filtwidth:maxBorder])[::-1] < bd)
    rowStart = maxBorder-np.argmax((rowSum[0:maxBorder-filtwidth]-rowSum[filtwidth:maxBorder])[::-1] < bd)
  
    # search bottom and right
    colEnd = maxBorder-np.argmax((colSum[-1-filtwidth:-maxBorder-filtwidth:-1]-colSum[:-maxBorder:-1])[::-1] < bd)
    rowEnd = maxBorder-np.argmax((rowSum[-1-filtwidth:-maxBorder-filtwidth:-1]-rowSum[:-maxBorder:-1])[::-1] < bd)
    
    # crop and return
    return im[colStart:-colEnd,rowStart:-rowEnd]

# demo on 3 gen
imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()


# name of the input file
imname = 'data/train.tif'

# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
# skio.imshow(im)
# skio.show()

# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b, zncc)
ar,_,_ = alignRecursive(r, b, zncc)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
# skio.imshow(im_out)
# skio.show()

imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


# name of the input file
imname = 'data/icon.tif'

# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
# skio.imshow(im)
# skio.show()

# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b)
ar,_,_ = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
skio.imshow(im_out)
skio.show()

imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


# name of the input file
imname = 'data/village.tif'

# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
# skio.imshow(im)
# skio.show()

# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]
ag,_,_ = alignRecursive(g, b)
ar,_,_ = alignRecursive(r, b)
# create a color image
im_out = np.dstack([ar, ag, b])

# save the image
fname = 'out_path/out_fname.jpg'
skio.imsave(fname, im_out)

# display the image
skio.imshow(im_out)
skio.show()

imcropped = cropOutMissingEdge(im_out)
skio.imshow(imcropped)
skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.


# name of the input file
#imname = 'data/cathedral.jpg'
#imname = 'data/train.tif'
imname = 'data/icon.tif'
# read in the image
im = skio.imread(imname)

# convert to double (might want to do this later on to save memory)    
im = sk.img_as_float(im)
    
# display the image
skio.imshow(im)
skio.show()

# compute the height of each part (just 1/3 of total)
height = np.floor(im.shape[0] / 3.0).astype(np.int)

# separate color channels
b = im[:height]
g = im[height: 2*height]
r = im[2*height: 3*height]


from scipy.ndimage.filters import gaussian_filter

# reduce image size for memory reasons
gFloat = np.float32(g)
rFloat = np.float32(r)
bFloat = np.float32(b)

def alignGradientPT(moving,ref):
    
    
    # small image
    #scalestep = .0000001 
    #transstep = .0000001
    # large image
    scalestep = .0000000001
    transstep = .0000000001
    iters = 50
    filtRad = 21
    
    # convert to torch tensors
    ref = torch.from_numpy(gaussian_filter(ref, sigma=filtRad))
    moving = torch.from_numpy(np.expand_dims(np.expand_dims(moving,0),0))
    
    # initialize transform as identity
    theta = torch.zeros(1, 2, 3, dtype=torch.float32)
    theta[:,:,:] = torch.tensor([[1.0, 0, 0],
                                 [0, 1.0, 0]],requires_grad=True)
    theta.retain_grad()

    # initialize for plotting
    lossVals = []
    

    for i in range(iters):
        # pytorch affine, allows for differentiable scale/translation
        grid = F.affine_grid(theta, torch.Size([1, 1, np.int32(g.shape[0]), g.shape[1]]))
        movingScaled = F.grid_sample(moving, grid)
        loss = torch.sum(torch.abs(ref-torch.squeeze(movingScaled)))
        #print(loss)
        loss.backward(gradient=torch.tensor(1.0),retain_graph=True)

        # perform some conditioning on gradient, mostly to avoid shear and rotation
        relevantGrad = theta.grad.data
        relevantGrad[0,0,1] = 0
        relevantGrad[0,1,0] = 0
        relevantGrad[0,0,0] = relevantGrad[0,0,0] * scalestep
        relevantGrad[0,1,1] = relevantGrad[0,1,1] * scalestep
        relevantGrad[0,0,2] = relevantGrad[0,0,2] * transstep
        relevantGrad[0,1,2] = relevantGrad[0,1,2] * transstep
        # gradient descent!
        theta.data = (-relevantGrad  + theta.data)
        # store loss for plotting
        lossVals.append(loss.detach().numpy())
        
    return np.squeeze(np.float64(movingScaled.detach().numpy())), lossVals    

rScaled,rloss = alignGradientPT(rFloat,gFloat)

plt.plot(rloss)
plt.title('Red SAD at iterations')
plt.xlabel('Iter')
plt.show()


bScaled,bloss = alignGradientPT(bFloat,gFloat)

plt.plot(bloss)
plt.title('Blue SAD at iterations')
plt.xlabel('Iter')
plt.show()

/home/ttabor/anaconda3/envs/imSynth/lib/python3.7/site-packages/torch/nn/functional.py:3448: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.
  warnings.warn("Default grid_sample and affine_grid behavior has changed "
/home/ttabor/anaconda3/envs/imSynth/lib/python3.7/site-packages/torch/nn/functional.py:3385: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.
  warnings.warn("Default grid_sample and affine_grid behavior has changed "


im_out = np.dstack([r, g, b])
print("Original")
skio.imshow(im_out)
skio.show()


im_out = np.dstack([np.squeeze(rScaled), gFloat, np.squeeze(bScaled)])
print("Aligned")
skio.imshow(im_out)
skio.show()

Original

Aligned


imlist=['emir.tif','harvesters.tif','icon.tif','lady.tif','self_portrait.tif','three_generations.tif','train.tif','turkmen.tif','village.tif']

for im in imlist:
    # name of the input file
    imname = 'data/' + im

    # read in the image
    im = skio.imread(imname)

    # convert to double (might want to do this later on to save memory)    
    im = sk.img_as_float(im)

    # display the image
    # skio.imshow(im)
    # skio.show()

    # compute the height of each part (just 1/3 of total)
    height = np.floor(im.shape[0] / 3.0).astype(np.int)

    # separate color channels
    b = im[:height]
    g = im[height: 2*height]
    r = im[2*height: 3*height]
    ag,bestxg,bestyg = alignRecursive(g, b, zncc)
    ar,bestxr,bestyr = alignRecursive(r, b, zncc)
    # create a color image
    im_out = np.dstack([ar, ag, b])

    # save the image
    fname = 'out_path/out_fname.jpg'
    skio.imsave(fname, im_out)

    print("offset g = ({},{})".format(bestxg, bestyg))
    print("offset r = ({},{})".format(bestxr, bestyr))

    # display the image
    skio.imshow(im_out)
    skio.show()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (11,17)
offset r = (19,109)

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (15,58)
offset r = (5,92)

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (14,40)
offset r = (20,91)

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (-3,59)
offset r = (-15,98)

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

offset g = (1,73)
offset r = (-1,169)

Part 1, single Scale alignment, using SAD¶

function implementing single scale alignment¶

Applying single scale function to small scale image¶

ZNCC alignment result¶

Larger images, requires multiscale¶

Implementation of multiscale, course to fine alignment with recursion¶

Cropping using dark to light edge detection¶

Alignment using Gradient Descent in Pytorch¶

Full Results¶