|
|
from torchvision import transforms
|
|
|
import numpy as np
|
|
|
import math
|
|
|
import torch
|
|
|
from PIL import Image, ImageOps
|
|
|
|
|
|
|
|
|
RATIOS = [
|
|
|
(512, 512),
|
|
|
(384, 512),
|
|
|
(512, 384),
|
|
|
(384, 768),
|
|
|
(768, 384),
|
|
|
(384, 576),
|
|
|
(576, 384),
|
|
|
(320, 960),
|
|
|
(960, 320),
|
|
|
(256, 1024),
|
|
|
(1024, 256),
|
|
|
]
|
|
|
|
|
|
RATIO_TYPES = [
|
|
|
'ratio_h512_w512',
|
|
|
'ratio_h384_w512',
|
|
|
'ratio_h512_w384',
|
|
|
'ratio_h384_w768',
|
|
|
'ratio_h768_w384',
|
|
|
'ratio_h384_w576',
|
|
|
'ratio_h576_w384',
|
|
|
'ratio_h320_w960',
|
|
|
'ratio_h960_w320',
|
|
|
'ratio_h256_w1024',
|
|
|
'ratio_h1024_w256',
|
|
|
]
|
|
|
|
|
|
|
|
|
def center_crop_and_resize(img, output_size=(256, 256)):
|
|
|
target_h, target_w = output_size
|
|
|
img_w, img_h = img.size
|
|
|
|
|
|
scale_w, scale_h = img_w / target_w, img_h / target_h
|
|
|
if scale_h > scale_w:
|
|
|
new_w, new_h = target_w, int(target_w / img_w * img_h)
|
|
|
else:
|
|
|
new_w, new_h = int(target_h / img_h * img_w), target_h
|
|
|
|
|
|
|
|
|
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
|
|
|
|
|
|
|
left = (new_w - target_w) // 2
|
|
|
top = (new_h - target_h) // 2
|
|
|
right = left + target_w
|
|
|
bottom = top + target_h
|
|
|
|
|
|
|
|
|
img = img.crop((left, top, right, bottom))
|
|
|
|
|
|
return img
|
|
|
|
|
|
def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
|
|
|
target_height, target_width = output_size
|
|
|
|
|
|
|
|
|
original_width, original_height = img.size
|
|
|
ratio = min(target_width / original_width, target_height / original_height)
|
|
|
new_size = (int(original_width * ratio), int(original_height * ratio))
|
|
|
resized_image = img.resize(new_size, Image.LANCZOS)
|
|
|
|
|
|
|
|
|
delta_w = target_width - new_size[0]
|
|
|
delta_h = target_height - new_size[1]
|
|
|
padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
|
|
|
padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)
|
|
|
|
|
|
return padded_image
|
|
|
|
|
|
|
|
|
def unpad_and_resize_back(padded_image, original_width, original_height):
|
|
|
"""
|
|
|
Revert the padded+resized image back to original size.
|
|
|
|
|
|
Args:
|
|
|
padded_image (PIL.Image): Image after padding.
|
|
|
original_width (int): Original image width before resize & pad.
|
|
|
original_height (int): Original image height before resize & pad.
|
|
|
|
|
|
Returns:
|
|
|
PIL.Image: Image resized back to original resolution.
|
|
|
"""
|
|
|
|
|
|
target_width, target_height = padded_image.size
|
|
|
ratio = min(target_width / original_width, target_height / original_height)
|
|
|
resized_w = int(original_width * ratio)
|
|
|
resized_h = int(original_height * ratio)
|
|
|
|
|
|
|
|
|
left = (target_width - resized_w) // 2
|
|
|
upper = (target_height - resized_h) // 2
|
|
|
right = left + resized_w
|
|
|
lower = upper + resized_h
|
|
|
|
|
|
|
|
|
cropped_image = padded_image.crop((left, upper, right, lower))
|
|
|
|
|
|
|
|
|
recovered_image = cropped_image.resize((original_width, original_height), Image.LANCZOS)
|
|
|
return recovered_image
|
|
|
|
|
|
|
|
|
def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
|
|
|
target_height, target_width = output_size
|
|
|
|
|
|
|
|
|
original_width, original_height = img.size
|
|
|
ratio = min(target_width / original_width, target_height / original_height)
|
|
|
new_size = (int(original_width * ratio), int(original_height * ratio))
|
|
|
resized_image = img.resize(new_size, Image.LANCZOS)
|
|
|
|
|
|
|
|
|
delta_w = target_width - new_size[0]
|
|
|
delta_h = target_height - new_size[1]
|
|
|
padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
|
|
|
padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)
|
|
|
|
|
|
return padded_image
|
|
|
|
|
|
|
|
|
def calculate_ratio():
|
|
|
max_area = 512 * 512
|
|
|
ratios = [(2, 2), (3, 4), (4, 3), (2, 4), (4, 2), (1, 4), (4, 1), (2, 3), (3, 2), (1, 3), (3, 1)]
|
|
|
ratio_candicates = []
|
|
|
for ratio in ratios:
|
|
|
x = math.sqrt(max_area / ratio[0] / ratio[1])
|
|
|
x = round(x / 64) * 64
|
|
|
tmp = (x*ratio[0], x*ratio[1])
|
|
|
|
|
|
ratio_candicates.append(tmp)
|
|
|
|
|
|
print("ratio_candicates", ratio_candicates)
|
|
|
return ratio_candicates
|
|
|
|
|
|
|
|
|
class AspectRatioCrop(object):
|
|
|
"""
|
|
|
Aspect Ratio Crop transform.
|
|
|
For a given image, find the corresponding aspect ratio and
|
|
|
resize / resize + crop to the corresponding base sizes
|
|
|
|
|
|
Args:
|
|
|
base_sizes: list[tuple], the base sizes of final output.
|
|
|
For example, [(512, 512), (512, 768), (768, 512)]
|
|
|
|
|
|
resize_and_crop: bool .If False, find the matched aspect ratio and resize to base size.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, base_sizes, crop_percent_thresh=0.2):
|
|
|
self.base_sizes = [(math.floor(h), math.floor(w)) for (h, w) in base_sizes]
|
|
|
self.aspect_ratios = [x[1] / x[0] for x in self.base_sizes]
|
|
|
self.crop_percent_thresh = crop_percent_thresh
|
|
|
|
|
|
def _find_size(self, w, h):
|
|
|
base_size_indexes = list(range(len(self.base_sizes)))
|
|
|
aspect_ratios = [self.aspect_ratios[i] for i in base_size_indexes]
|
|
|
aspect_ratio = w / h
|
|
|
ratio_diff = [abs(ratio - aspect_ratio) for ratio in aspect_ratios]
|
|
|
min_diff = np.min(ratio_diff)
|
|
|
match_diff_indexes = [j for j in range(len(ratio_diff)) if ratio_diff[j] == min_diff]
|
|
|
match_diff_indexes = sorted(match_diff_indexes, key=lambda x: (h-self.base_sizes[base_size_indexes[x]][0])**2
|
|
|
+ (w-self.base_sizes[base_size_indexes[x]][1])**2)
|
|
|
corr_index = base_size_indexes[match_diff_indexes[0]]
|
|
|
return corr_index
|
|
|
|
|
|
def get_pred_target_w_h(self, w, h):
|
|
|
aspect_ratio = w / h
|
|
|
aspect_index = self._find_size(w, h)
|
|
|
pred_h, pred_w = self.base_sizes[aspect_index]
|
|
|
|
|
|
solutions = [
|
|
|
(pred_w, int(pred_w / aspect_ratio)),
|
|
|
(int(pred_h * aspect_ratio), pred_h),
|
|
|
]
|
|
|
w_tar = None
|
|
|
h_tar = None
|
|
|
for solution in solutions:
|
|
|
w_s, h_s = solution
|
|
|
if w_s >= pred_w and h_s >= pred_h:
|
|
|
w_tar = w_s
|
|
|
h_tar = h_s
|
|
|
|
|
|
return pred_w, pred_h, w_tar, h_tar, aspect_index
|
|
|
|
|
|
def __call__(self, image, is_inference=False):
|
|
|
|
|
|
flag_matched = True
|
|
|
w, h = image.size
|
|
|
pred_w, pred_h, w_tar, h_tar, aspect_index = self.get_pred_target_w_h(w, h)
|
|
|
|
|
|
crop_percent = 1 - pred_w * pred_h / (w_tar * h_tar)
|
|
|
if self.crop_percent_thresh > 0 and crop_percent > self.crop_percent_thresh:
|
|
|
flag_matched = False
|
|
|
|
|
|
if not is_inference:
|
|
|
|
|
|
image = center_crop_and_resize(image, output_size=(pred_h, pred_w))
|
|
|
else:
|
|
|
|
|
|
image = resize_with_padding(image, output_size=(pred_h, pred_w))
|
|
|
|
|
|
original_size = [h, w]
|
|
|
target_size = [pred_h, pred_w]
|
|
|
|
|
|
return image, original_size, target_size, flag_matched
|
|
|
|
|
|
|