# (16-726): Project 1 starter Python code
# credit to https://inst.eecs.berkeley.edu/~cs194-26/fa18/hw/proj1/data/colorize_skel.py
# these are just some suggested libraries
# instead of scikit-image you could use matplotlib and opencv to read, write, and display images

import numpy as np
import skimage as sk
import skimage.io as skio
import skimage.transform
from os import listdir

import torch
import torch.nn.functional as F

def convolve(image, kernel, stride, padding=0):
    width, height = image.shape[0], image.shape[1]
    k_width, k_height = kernel.shape[0], kernel.shape[1]
    out_width = int(((width - k_width + 2*padding) / stride) + 1)
    out_height = int(((height - k_height + 2*padding) / stride) + 1)
    output = np.zeros((out_width, out_height))
    if padding != 0:
        padded = np.zeros((width + 2*padding, height + 2*padding))
        padded[padding:padding + width, padding:padding + height] = image
        image = padded
    for w in range(0, width, stride):
        for h in range(0, height, stride):
            output[w, h] = np.sum(kernel * image[w:w + k_width, h:h + k_height])
    return output


# low res simple alignment, align source to target
# ver 1, edge information with ssd
def align_once(source, target, window=15):
    width, height = target.shape[0], target.shape[1]
    
    # sobel filter to detect edge feature
    sobel_x = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
    sobel_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
    # below is the vanilla version that is implemented from scratch, because it runs too slow the results for tif
    # are obtained by the pytorch version

#     s_x = convolve(source, sobel_x, 1, 1)
#     s_y = convolve(source, sobel_y, 1, 1)
#     t_x = convolve(target, sobel_x, 1, 1)
#     t_y = convolve(target, sobel_y, 1, 1)
#     s = np.sqrt(np.square(s_x) + np.square(s_y))
#     t = np.sqrt(np.square(t_x) + np.square(t_y))
    
    tensor_source = torch.tensor(source).double().view(1, 1, width, height)
    tensor_target = torch.tensor(target).double().view(1, 1, width, height)
    tensor_sobelx = torch.tensor(sobel_x).double().view(1, 1, 3, 3).repeat(1, 1, 1, 1)
    tensor_sobely = torch.tensor(sobel_y).double().view(1, 1, 3, 3).repeat(1, 1, 1, 1)
    tensor_sx = F.conv2d(tensor_source, tensor_sobelx, stride=1, padding=1).view(width, height)
    tensor_sy = F.conv2d(tensor_source, tensor_sobely, stride=1, padding=1).view(width, height)
    tensor_tx = F.conv2d(tensor_target, tensor_sobelx, stride=1, padding=1).view(width, height)
    tensor_ty = F.conv2d(tensor_target, tensor_sobely, stride=1, padding=1).view(width, height)
    
    tensor_s = torch.sqrt(torch.square(tensor_sx) + torch.square(tensor_sy))
    tensor_t = torch.sqrt(torch.square(tensor_tx) + torch.square(tensor_ty))
    
    s = tensor_s.numpy()
    t = tensor_t.numpy()
    
    roi_s = np.zeros((width, height))
    roi_s[window:-window, window:-window] = s[window:-window, window:-window]
    # multiply image size to prevent extremely small values
    min_x, min_y = 0, 0
    min_val = float('inf')
    for x in range(-1*window, window):
        for y in range(-1*window, window):
            x_shift = np.roll(roi_s, x, axis=1)
            roll_s = np.roll(x_shift, y, axis=0)
            roi_t = np.zeros((width, height))
            roi_t[window + x:-window - x, window + y: -window - y] = t[window + x:-window - x, window + y: -window - y]
            val = np.sum(np.square((roll_s-roi_t)))
            if val < min_val:
                min_val = val
                min_x, min_y = x, y
    return np.roll(np.roll(source, min_x, axis=1), min_y, axis=0), min_x, min_y, min_val


def pyramid_align(source, target, level, window=15):
    width, height = target.shape[0], target.shape[1]
    if level <= 0:
        img, min_x, min_y, val = align_once(source, target, window)
        return align_once(source, target, window)
    else:
        source_half = sk.transform.rescale(source, 0.5, anti_aliasing=True)
        target_half = sk.transform.rescale(target, 0.5, anti_aliasing=True)
        _, min_half_x, min_half_y, _ = pyramid_align(source_half, target_half, level-1)
        source_rolled = np.roll(np.roll(source, min_half_x*2, axis=1), min_half_y*2, axis=0)
        img, min_x, min_y, val = align_once(source_rolled, target, window=1)
        return img, min_half_x*2 + min_x, min_half_y*2+min_y, val


# A wrapper for reading image, dividing them into B, G and R channels and stacking and saving aligned images.
# Minor modification from base code
def align_image(imname):
    # name of the input file
    with_path = "data/"+imname

    # read in the image
    im = skio.imread(with_path)

    # convert to double (might want to do this later on to save memory)    
    im = sk.img_as_float(im)

    # compute the height of each part (just 1/3 of total)
    height = np.floor(im.shape[0] / 3.0).astype(np.int)

    # separate color channels
    b = im[:height]
    g = im[height: 2*height]
    r = im[2*height: 3*height]

    # align the images
    # functions that might be useful for aligning the images include:
    # np.roll, np.sum, sk.transform.rescale (for multiscale)
    if imname.endswith(".jpg"):
        ag, gx, gy, _ = align_once(g, b)
        ar, rx, ry, rval = align_once(r, b)
    else:
        ag, gx, gy, gval = pyramid_align(g, b, 3)
        ar, rx, ry, rval = pyramid_align(r, b, 3)
    

    ### ag = align(g, b)
    ### ar = align(r, b)
    # create a color image
    im_out = np.dstack([ar, ag, b])

    # save the image
    fname = "output/" + imname.replace(".tif", ".jpg")
    skio.imsave(fname, im_out)

    # display the image
    skio.imshow(im_out)
    skio.show()
    print("Green to blue horizontal offset:" + str(gx))
    print("Green to blue vertical offset:" + str(gy))
    print("Red to blue horizontal offset:" + str(rx))
    print("Red to blue vertical offset:" + str(ry))


def main():
    images = [f for f in listdir("data")]
    for im in images:
        align_image(im)

main()

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

Green to blue horizontal offset:8
Green to blue vertical offset:48
Red to blue horizontal offset:7
Red to blue vertical offset:112

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

Green to blue horizontal offset:32
Green to blue vertical offset:80
Red to blue horizontal offset:108
Red to blue vertical offset:105

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

Green to blue horizontal offset:6
Green to blue vertical offset:58
Red to blue horizontal offset:78
Red to blue vertical offset:75

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

Green to blue horizontal offset:1
Green to blue vertical offset:53
Red to blue horizontal offset:10
Red to blue vertical offset:105

Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.

Green to blue horizontal offset:2
Green to blue vertical offset:5
Red to blue horizontal offset:3
Red to blue vertical offset:12

Colorizing the Prokudin-Gorskii Photo Collection¶

Overview¶

Bells and Whistles¶

Results¶

Successful Alignments¶

Failed Alignments¶