Author: Zhe Huang (zhehuang@andrew.cmu.edu)
!pwd
/home/zheh/Documents/CMU_16_889_22spring/assignment2
!which python
/home/zheh/anaconda3/envs/16889/bin/python
import torch
import pytorch3d
import imageio
import numpy as np
import mediapy as media
from pytorch3d.ops import sample_points_from_meshes
from utils_vox import (
render_voxel_mesh_with_transform,
render_pointcloud_with_transform,
render_mesh_with_transform,
render_pointcloud_loss_with_transform,
)
p1_1_result = torch.load('results/p1_1.pth')
rendered_p1_1_src_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_voxel_mesh_with_transform(p1_1_result['voxels_src'], R=R, T=T)
rendered_p1_1_src_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_1_src.gif', rendered_p1_1_src_list, fps=15)
p1_1_src_gif = media.read_video('results/p1_1_src.gif')
# media.show_video(p1_1_src_gif, height=500, codec='gif')
rendered_p1_1_tgt_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_voxel_mesh_with_transform(p1_1_result['voxels_tgt'], R=R, T=T)
rendered_p1_1_tgt_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_1_tgt.gif', rendered_p1_1_tgt_list, fps=15)
p1_1_tgt_gif = media.read_video('results/p1_1_tgt.gif')
# media.show_video(p1_1_tgt_gif, height=500, codec='gif')
p1_1_gifs = {
'target': p1_1_tgt_gif,
'source': p1_1_src_gif,
}
media.show_videos(p1_1_gifs, height=500)
target | source |
p1_2_result = torch.load('results/p1_2.pth')
rendered_p1_2_src_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p1_2_result['pointclouds_src'], R=R, T=T)
rendered_p1_2_src_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_2_src.gif', rendered_p1_2_src_list, fps=15)
p1_2_src_gif = media.read_video('results/p1_2_src.gif')
# media.show_video(p1_2_src_gif, height=500, codec='gif')
rendered_p1_2_tgt_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p1_2_result['pointclouds_tgt'], R=R, T=T)
rendered_p1_2_tgt_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_2_tgt.gif', rendered_p1_2_tgt_list, fps=15)
p1_2_tgt_gif = media.read_video('results/p1_2_tgt.gif')
# media.show_video(p1_2_tgt_gif, height=500, codec='gif')
p1_2_gifs = {
'target': p1_2_tgt_gif,
'source': p1_2_src_gif,
}
media.show_videos(p1_2_gifs, height=500)
target | source |
p1_3_result = torch.load('results/p1_3.pth')
rendered_p1_3_src_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p1_3_result['mesh_src'], R=R, T=T)
rendered_p1_3_src_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_3_src.gif', rendered_p1_3_src_list, fps=15)
p1_3_src_gif = media.read_video('results/p1_3_src.gif')
# media.show_video(p1_3_src_gif, height=500, codec='gif')
rendered_p1_3_tgt_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p1_3_result['mesh_tgt'], R=R, T=T)
rendered_p1_3_tgt_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p1_3_tgt.gif', rendered_p1_3_tgt_list, fps=15)
p1_3_tgt_gif = media.read_video('results/p1_3_tgt.gif')
# media.show_video(p1_3_tgt_gif, height=500, codec='gif')
p1_3_gifs = {
'target': p1_3_tgt_gif,
'source': p1_3_src_gif,
}
media.show_videos(p1_3_gifs, height=500)
target | source |
p2_1_1 = torch.load('results/vox/vox_eval_1.pth')
rendered_p2_1_1_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_1_1['gt_mesh'], R=R, T=T)
rendered_p2_1_1_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_1_mesh.gif', rendered_p2_1_1_gt_mesh_list, fps=15)
p2_1_1_gt_mesh_gif = media.read_video('results/p2_1_1_mesh.gif')
# media.show_video(p2_1_1_gt_mesh_gif, height=500, codec='gif')
rendered_p2_1_1_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_voxel_mesh_with_transform(p2_1_1['pred'], R=R, T=T, use_sigmoid=True)
rendered_p2_1_1_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_1_pred.gif', rendered_p2_1_1_pred_list, fps=15)
p2_1_1_pred_gif = media.read_video('results/p2_1_1_pred.gif')
# media.show_video(p2_1_1_pred_gif, height=500, codec='gif')
p2_1_1_gifs = {
'gt_2d_image': [p2_1_1['gt_image'].numpy()],
'gt_mesh': p2_1_1_gt_mesh_gif,
'pred_voxel_mesh': p2_1_1_pred_gif
}
media.show_videos(p2_1_1_gifs, height=334)
gt_2d_image | gt_mesh | pred_voxel_mesh |
p2_1_91 = torch.load('results/vox/vox_eval_91.pth')
rendered_p2_1_91_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_1_91['gt_mesh'], R=R, T=T)
rendered_p2_1_91_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_91_mesh.gif', rendered_p2_1_91_gt_mesh_list, fps=15)
p2_1_91_gt_mesh_gif = media.read_video('results/p2_1_91_mesh.gif')
# media.show_video(p2_1_1_gt_mesh_gif, height=500, codec='gif')
rendered_p2_1_91_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_voxel_mesh_with_transform(p2_1_91['pred'], R=R, T=T, use_sigmoid=True)
rendered_p2_1_91_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_91_pred.gif', rendered_p2_1_91_pred_list, fps=15)
p2_1_91_pred_gif = media.read_video('results/p2_1_91_pred.gif')
p2_1_91_gifs = {
'gt_2d_image': [p2_1_91['gt_image'].numpy()],
'gt_mesh': p2_1_91_gt_mesh_gif,
'pred_voxel_mesh': p2_1_91_pred_gif
}
media.show_videos(p2_1_91_gifs, height=334)
gt_2d_image | gt_mesh | pred_voxel_mesh |
p2_1_485 = torch.load('results/vox/vox_eval_485.pth')
rendered_p2_1_485_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_1_485['gt_mesh'], R=R, T=T)
rendered_p2_1_485_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_485_mesh.gif', rendered_p2_1_485_gt_mesh_list, fps=15)
p2_1_485_gt_mesh_gif = media.read_video('results/p2_1_485_mesh.gif')
rendered_p2_1_485_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_voxel_mesh_with_transform(p2_1_485['pred'], R=R, T=T, use_sigmoid=True)
rendered_p2_1_485_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_1_485_pred.gif', rendered_p2_1_485_pred_list, fps=15)
p2_1_485_pred_gif = media.read_video('results/p2_1_485_pred.gif')
p2_1_485_gifs = {
'gt_2d_image': [p2_1_485['gt_image'].numpy()],
'gt_mesh': p2_1_485_gt_mesh_gif,
'pred_voxel_mesh': p2_1_485_pred_gif
}
media.show_videos(p2_1_485_gifs, height=334)
gt_2d_image | gt_mesh | pred_voxel_mesh |
p2_2_388 = torch.load('results/point/point_eval_388.pth')
rendered_p2_2_388_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_2_388['gt_mesh'], R=R, T=T)
rendered_p2_2_388_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_388_mesh.gif', rendered_p2_2_388_gt_mesh_list, fps=15)
p2_2_388_gt_mesh_gif = media.read_video('results/p2_2_388_mesh.gif')
rendered_p2_2_388_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p2_2_388['pred'], R=R, T=T)
rendered_p2_2_388_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_388_pred.gif', rendered_p2_2_388_pred_list, fps=15)
p2_2_388_pred_gif = media.read_video('results/p2_2_388_pred.gif')
p2_2_388_gifs = {
'gt_2d_image': [p2_2_388['gt_image'].numpy()],
'gt_mesh': p2_2_388_gt_mesh_gif,
'pred_pointcloud': p2_2_388_pred_gif
}
media.show_videos(p2_2_388_gifs, height=334)
gt_2d_image | gt_mesh | pred_pointcloud |
p2_2_113 = torch.load('results/point/point_eval_113.pth')
rendered_p2_2_113_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_2_113['gt_mesh'], R=R, T=T)
rendered_p2_2_113_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_113_mesh.gif', rendered_p2_2_113_gt_mesh_list, fps=15)
p2_2_113_gt_mesh_gif = media.read_video('results/p2_2_113_mesh.gif')
rendered_p2_2_113_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p2_2_113['pred'], R=R, T=T)
rendered_p2_2_113_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_113_pred.gif', rendered_p2_2_113_pred_list, fps=15)
p2_2_113_pred_gif = media.read_video('results/p2_2_113_pred.gif')
p2_2_113_gifs = {
'gt_2d_image': [p2_2_113['gt_image'].numpy()],
'gt_mesh': p2_2_113_gt_mesh_gif,
'pred_pointcloud': p2_2_113_pred_gif
}
media.show_videos(p2_2_113_gifs, height=334)
gt_2d_image | gt_mesh | pred_pointcloud |
p2_2_233 = torch.load('results/point/point_eval_233.pth')
rendered_p2_2_233_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_2_233['gt_mesh'], R=R, T=T)
rendered_p2_2_233_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_233_mesh.gif', rendered_p2_2_233_gt_mesh_list, fps=15)
p2_2_233_gt_mesh_gif = media.read_video('results/p2_2_233_mesh.gif')
rendered_p2_2_233_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p2_2_233['pred'], R=R, T=T)
rendered_p2_2_233_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_2_233_pred.gif', rendered_p2_2_233_pred_list, fps=15)
p2_2_233_pred_gif = media.read_video('results/p2_2_233_pred.gif')
p2_2_233_gifs = {
'gt_2d_image': [p2_2_233['gt_image'].numpy()],
'gt_mesh': p2_2_233_gt_mesh_gif,
'pred_pointcloud': p2_2_233_pred_gif
}
media.show_videos(p2_2_233_gifs, height=334)
gt_2d_image | gt_mesh | pred_pointcloud |
p2_3_670 = torch.load('results/mesh/mesh_eval_670.pth')
rendered_p2_3_670_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_670['gt_mesh'], R=R, T=T)
rendered_p2_3_670_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_670_mesh.gif', rendered_p2_3_670_gt_mesh_list, fps=15)
p2_3_670_gt_mesh_gif = media.read_video('results/p2_3_670_mesh.gif')
rendered_p2_3_670_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_670['pred'], R=R, T=T)
rendered_p2_3_670_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_670_pred.gif', rendered_p2_3_670_pred_list, fps=15)
p2_3_670_pred_gif = media.read_video('results/p2_3_670_pred.gif')
p2_3_670_gifs = {
'gt_2d_image': [p2_3_670['gt_image'].numpy()],
'gt_mesh': p2_3_670_gt_mesh_gif,
'pred_mesh': p2_3_670_pred_gif
}
media.show_videos(p2_3_670_gifs, height=334)
gt_2d_image | gt_mesh | pred_mesh |
p2_3_432 = torch.load('results/mesh/mesh_eval_432.pth')
rendered_p2_3_432_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_432['gt_mesh'], R=R, T=T)
rendered_p2_3_432_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_432_mesh.gif', rendered_p2_3_432_gt_mesh_list, fps=15)
p2_3_432_gt_mesh_gif = media.read_video('results/p2_3_432_mesh.gif')
rendered_p2_3_432_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_432['pred'], R=R, T=T)
rendered_p2_3_432_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_432_pred.gif', rendered_p2_3_432_pred_list, fps=15)
p2_3_432_pred_gif = media.read_video('results/p2_3_432_pred.gif')
p2_3_432_gifs = {
'gt_2d_image': [p2_3_432['gt_image'].numpy()],
'gt_mesh': p2_3_432_gt_mesh_gif,
'pred_mesh': p2_3_432_pred_gif
}
media.show_videos(p2_3_432_gifs, height=334)
gt_2d_image | gt_mesh | pred_mesh |
p2_3_231 = torch.load('results/mesh/mesh_eval_231.pth')
rendered_p2_3_231_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_231['gt_mesh'], R=R, T=T)
rendered_p2_3_231_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_231_mesh.gif', rendered_p2_3_231_gt_mesh_list, fps=15)
p2_3_231_gt_mesh_gif = media.read_video('results/p2_3_231_mesh.gif')
rendered_p2_3_231_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_3_231['pred'], R=R, T=T)
rendered_p2_3_231_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_3_231_pred.gif', rendered_p2_3_231_pred_list, fps=15)
p2_3_231_pred_gif = media.read_video('results/p2_3_231_pred.gif')
p2_3_231_gifs = {
'gt_2d_image': [p2_3_231['gt_image'].numpy()],
'gt_mesh': p2_3_231_gt_mesh_gif,
'pred_mesh': p2_3_231_pred_gif
}
media.show_videos(p2_3_231_gifs, height=334)
gt_2d_image | gt_mesh | pred_mesh |
Here are the quantitative results for all three methods generated by using the evaluation script. All models are trained for 32,000
steps with batch size set to 16
. It seems that the image-to-mesh model performs the best as it achieves the highest average F1 score. This might be because the image-to-mesh model has the lowest training loss as 0.001
compared with other methods. Since the image-to-voxelgrid model and image-to-point cloud model have similar final training loss, their F1 scores are similar.
Voxel | Point Cloud | Mesh | |
---|---|---|---|
Batch size | 16 | 16 | 16 |
# training steps | 32,000 | 32,000 | 32,000 |
Final loss value | 0.002 | 0.002 | 0.001 |
Avg. F1\@0.05 | 90.921 | 90.353 | 93.984 |
Here we tweak the level
of ico_sphere
from 4
to 3
, reducing the total number of subdivisions from 2,562
to 642
. Keep all other hyparameters the same, we train this relatively "low-resolution" model for 32,000
steps with batch size as 16
.
The example results are shown below. Although pred_mesh_2562
s and pred_mesh_642
s look alike generally but they differ subtly. pred_mesh_2562
s are more detailed and overall their surfaces are smoother. The chair legs in pred_mesh_2562
s are less sharp, resembling the groundtruth better than pred_mesh_642
s. The back of pred_mesh_2562
chairs also match the groundtruth better in terms of overall style and the shape. Thus, we can conclude that training using an initial mesh that has more subdivisions can yield to a better result.
p2_5_642_11 = torch.load('results/mesh/mesh_eval_642_11.pth')
p2_5_2562_11 = torch.load('results/mesh/mesh_eval_11.pth')
rendered_p2_5_11_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_642_11['gt_mesh'], R=R, T=T)
rendered_p2_5_11_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_11_mesh.gif', rendered_p2_5_11_gt_mesh_list, fps=15)
p2_5_11_gt_mesh_gif = media.read_video('results/p2_5_11_mesh.gif')
rendered_p2_5_642_11_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_642_11['pred'], R=R, T=T)
rendered_p2_5_642_11_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_642_11_pred.gif', rendered_p2_5_642_11_pred_list, fps=15)
p2_5_642_11_pred_gif = media.read_video('results/p2_5_642_11_pred.gif')
rendered_p2_5_2562_11_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_2562_11['pred'], R=R, T=T)
rendered_p2_5_2562_11_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_2562_11_pred.gif', rendered_p2_5_2562_11_pred_list, fps=15)
p2_5_2562_11_pred_gif = media.read_video('results/p2_5_2562_11_pred.gif')
p2_5_11_gifs = {
# 'gt_2d_image': [p2_5_231['gt_image'].numpy()],
'gt_mesh': p2_5_11_gt_mesh_gif,
'pred_mesh_2562': p2_5_2562_11_pred_gif,
'pred_mesh_642': p2_5_642_11_pred_gif,
}
media.show_videos(p2_5_11_gifs, height=334)
gt_mesh | pred_mesh_2562 | pred_mesh_642 |
p2_5_642_231 = torch.load('results/mesh/mesh_eval_642_231.pth')
p2_5_2562_231 = torch.load('results/mesh/mesh_eval_231.pth')
rendered_p2_5_231_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_642_231['gt_mesh'], R=R, T=T)
rendered_p2_5_231_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_231_mesh.gif', rendered_p2_5_231_gt_mesh_list, fps=15)
p2_5_231_gt_mesh_gif = media.read_video('results/p2_5_231_mesh.gif')
rendered_p2_5_642_231_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_642_231['pred'], R=R, T=T)
rendered_p2_5_642_231_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_642_231_pred.gif', rendered_p2_5_642_231_pred_list, fps=15)
p2_5_642_231_pred_gif = media.read_video('results/p2_5_642_231_pred.gif')
rendered_p2_5_2562_231_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_5_2562_231['pred'], R=R, T=T)
rendered_p2_5_2562_231_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_5_2562_231_pred.gif', rendered_p2_5_2562_231_pred_list, fps=15)
p2_5_2562_231_pred_gif = media.read_video('results/p2_5_2562_231_pred.gif')
p2_5_231_gifs = {
# 'gt_2d_image': [p2_5_231['gt_image'].numpy()],
'gt_mesh': p2_5_231_gt_mesh_gif,
'pred_mesh_2562': p2_5_2562_231_pred_gif,
'pred_mesh_642': p2_5_642_231_pred_gif,
}
media.show_videos(p2_5_231_gifs, height=334)
gt_mesh | pred_mesh_2562 | pred_mesh_642 |
One interesting discovery is that for the image-to-point cloud model, it looks like most of those 5000 predicted points are "squeezed" on the seat part of those chairs. This results in the appearence where the seat is densly populated but other parts of a chair is sparsely represented by the predicted point cloud. One possibility to interpret this artifact is that crowding at the seat could make the overall training loss lower, thus more preferable by the model since its objective is to minimize the loss.
To demonstrate this, we sample a point cloud of 5,000
from the groundtruth mesh, generating the froundtruth point cloud in the same way as during the training. We then examine the mutual chamfer distance between the groundtruth and the predicted point cloud. We color points from groundtruth and prediction by their loss value. Specifically, points that have small losses are colored with red whereas points that have large lossed are colored with blue. The less loss an individual point generates, the more red it will appear in the figure.
As is illustrated in the group of gifs below, it seems like lots of points at the seating area generate low losses as they appear in red. This could be the reason why after training, the seating area is the densest part of the predicted pointcloud as it carries lowest energy. Hence, the behavior of the image-to-point cloud model can be interpreted from the perspective of training loss optimization.
p2_6_111 = torch.load('results/point/point_eval_111.pth')
rendered_p2_6_111_gt_mesh_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_mesh_with_transform(p2_6_111['gt_mesh'], R=R, T=T)
rendered_p2_6_111_gt_mesh_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_6_111_mesh.gif', rendered_p2_6_111_gt_mesh_list, fps=15)
p2_6_111_gt_mesh_gif = media.read_video('results/p2_6_111_mesh.gif')
sampled_gt = sample_points_from_meshes(p2_6_111['gt_mesh'], 5000).squeeze()
rendered_p2_6_111_gt_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(sampled_gt, R=R, T=T)
rendered_p2_6_111_gt_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_6_111_gt.gif', rendered_p2_6_111_gt_list, fps=15)
p2_6_111_gt_gif = media.read_video('results/p2_6_111_gt.gif')
rendered_p2_6_111_pred_list = []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered = render_pointcloud_with_transform(p2_6_111['pred'], R=R, T=T)
rendered_p2_6_111_pred_list.append((_rendered * 255).astype(np.uint8))
imageio.mimsave('results/p2_6_111_pred.gif', rendered_p2_6_111_pred_list, fps=15)
p2_6_111_pred_gif = media.read_video('results/p2_6_111_pred.gif')
rendered_p2_6_111_gt_list, rendered_p2_6_111_pred_list = [], []
for deg in range(0, 360, 5):
R, T = pytorch3d.renderer.look_at_view_transform(1, 0, deg)
_rendered_gt, _rendered_pred = render_pointcloud_loss_with_transform(
sampled_gt, p2_6_111['pred'], R=R, T=T)
rendered_p2_6_111_gt_list.append((_rendered_gt * 255).astype(np.uint8))
rendered_p2_6_111_pred_list.append((_rendered_pred * 255).astype(np.uint8))
imageio.mimsave('results/p2_6_111_gt_loss.gif', rendered_p2_6_111_gt_list, fps=15)
p2_6_111_gt_loss_gif = media.read_video('results/p2_6_111_gt_loss.gif')
imageio.mimsave('results/p2_6_111_pred_loss.gif', rendered_p2_6_111_pred_list, fps=15)
p2_6_111_pred_loss_gif = media.read_video('results/p2_6_111_pred_loss.gif')
p2_6_111_gifs = {
'gt_mesh': p2_6_111_gt_mesh_gif,
'gt_sampled_points': p2_6_111_gt_gif,
'pred_pointcloud': p2_6_111_pred_gif,
'gt_2d_image': [p2_6_111['gt_image'].numpy()],
'gt_loss_dist': p2_6_111_gt_loss_gif,
'pred_pointcloud_loss_dist': p2_6_111_pred_loss_gif,
}
media.show_videos(p2_6_111_gifs, height=334, columns=3)
gt_mesh | gt_sampled_points | pred_pointcloud |
gt_2d_image | gt_loss_dist | pred_pointcloud_loss_dist |