from torchvision import transforms import numpy as np import math import torch from PIL import Image, ImageOps RATIOS = [ (512, 512), (384, 512), (512, 384), (384, 768), (768, 384), (384, 576), (576, 384), (320, 960), (960, 320), (256, 1024), (1024, 256), ] RATIO_TYPES = [ 'ratio_h512_w512', 'ratio_h384_w512', 'ratio_h512_w384', 'ratio_h384_w768', 'ratio_h768_w384', 'ratio_h384_w576', 'ratio_h576_w384', 'ratio_h320_w960', 'ratio_h960_w320', 'ratio_h256_w1024', 'ratio_h1024_w256', ] def center_crop_and_resize(img, output_size=(256, 256)): target_h, target_w = output_size img_w, img_h = img.size scale_w, scale_h = img_w / target_w, img_h / target_h if scale_h > scale_w: new_w, new_h = target_w, int(target_w / img_w * img_h) else: new_w, new_h = int(target_h / img_h * img_w), target_h # Resize the image, keeping the aspect ratio img = img.resize((new_w, new_h), Image.LANCZOS) # Calculate the center cropping area left = (new_w - target_w) // 2 top = (new_h - target_h) // 2 right = left + target_w bottom = top + target_h # Crop the extra part img = img.crop((left, top, right, bottom)) return img def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)): target_height, target_width = output_size # Step 1: Resize with aspect ratio preserved original_width, original_height = img.size ratio = min(target_width / original_width, target_height / original_height) new_size = (int(original_width * ratio), int(original_height * ratio)) resized_image = img.resize(new_size, Image.LANCZOS) # Step 2: Add padding to reach target size delta_w = target_width - new_size[0] delta_h = target_height - new_size[1] padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2)) padded_image = ImageOps.expand(resized_image, padding, fill=fill_color) return padded_image def unpad_and_resize_back(padded_image, original_width, original_height): """ Revert the padded+resized image back to original size. Args: padded_image (PIL.Image): Image after padding. original_width (int): Original image width before resize & pad. original_height (int): Original image height before resize & pad. Returns: PIL.Image: Image resized back to original resolution. """ # Compute the scale factor used during the first resize target_width, target_height = padded_image.size ratio = min(target_width / original_width, target_height / original_height) resized_w = int(original_width * ratio) resized_h = int(original_height * ratio) # Compute cropping box on padded image left = (target_width - resized_w) // 2 upper = (target_height - resized_h) // 2 right = left + resized_w lower = upper + resized_h # Crop out the resized region (before padding) cropped_image = padded_image.crop((left, upper, right, lower)) # Resize back to original resolution recovered_image = cropped_image.resize((original_width, original_height), Image.LANCZOS) return recovered_image def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)): target_height, target_width = output_size # Step 1: Resize with aspect ratio preserved original_width, original_height = img.size ratio = min(target_width / original_width, target_height / original_height) new_size = (int(original_width * ratio), int(original_height * ratio)) resized_image = img.resize(new_size, Image.LANCZOS) # Step 2: Add padding to reach target size delta_w = target_width - new_size[0] delta_h = target_height - new_size[1] padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2)) padded_image = ImageOps.expand(resized_image, padding, fill=fill_color) return padded_image def calculate_ratio(): max_area = 512 * 512 ratios = [(2, 2), (3, 4), (4, 3), (2, 4), (4, 2), (1, 4), (4, 1), (2, 3), (3, 2), (1, 3), (3, 1)] ratio_candicates = [] for ratio in ratios: x = math.sqrt(max_area / ratio[0] / ratio[1]) x = round(x / 64) * 64 tmp = (x*ratio[0], x*ratio[1]) # print(ratio, x, tmp) ratio_candicates.append(tmp) print("ratio_candicates", ratio_candicates) return ratio_candicates class AspectRatioCrop(object): """ Aspect Ratio Crop transform. For a given image, find the corresponding aspect ratio and resize / resize + crop to the corresponding base sizes Args: base_sizes: list[tuple], the base sizes of final output. For example, [(512, 512), (512, 768), (768, 512)] resize_and_crop: bool .If False, find the matched aspect ratio and resize to base size. """ def __init__(self, base_sizes, crop_percent_thresh=0.2): self.base_sizes = [(math.floor(h), math.floor(w)) for (h, w) in base_sizes] self.aspect_ratios = [x[1] / x[0] for x in self.base_sizes] # w / h self.crop_percent_thresh = crop_percent_thresh def _find_size(self, w, h): base_size_indexes = list(range(len(self.base_sizes))) aspect_ratios = [self.aspect_ratios[i] for i in base_size_indexes] aspect_ratio = w / h ratio_diff = [abs(ratio - aspect_ratio) for ratio in aspect_ratios] min_diff = np.min(ratio_diff) match_diff_indexes = [j for j in range(len(ratio_diff)) if ratio_diff[j] == min_diff] match_diff_indexes = sorted(match_diff_indexes, key=lambda x: (h-self.base_sizes[base_size_indexes[x]][0])**2 + (w-self.base_sizes[base_size_indexes[x]][1])**2) # pick the area most match one corr_index = base_size_indexes[match_diff_indexes[0]] return corr_index def get_pred_target_w_h(self, w, h): aspect_ratio = w / h aspect_index = self._find_size(w, h) pred_h, pred_w = self.base_sizes[aspect_index] solutions = [ (pred_w, int(pred_w / aspect_ratio)), (int(pred_h * aspect_ratio), pred_h), ] w_tar = None h_tar = None for solution in solutions: w_s, h_s = solution if w_s >= pred_w and h_s >= pred_h: w_tar = w_s h_tar = h_s return pred_w, pred_h, w_tar, h_tar, aspect_index def __call__(self, image, is_inference=False): ## step 1: find the cloest aspect ratios flag_matched = True w, h = image.size pred_w, pred_h, w_tar, h_tar, aspect_index = self.get_pred_target_w_h(w, h) crop_percent = 1 - pred_w * pred_h / (w_tar * h_tar) if self.crop_percent_thresh > 0 and crop_percent > self.crop_percent_thresh: flag_matched = False # filter data if not is_inference: ## step 2: train: crop and resize image = center_crop_and_resize(image, output_size=(pred_h, pred_w)) else: ## step 2: inference: resize and padding image = resize_with_padding(image, output_size=(pred_h, pred_w)) original_size = [h, w] target_size = [pred_h, pred_w] return image, original_size, target_size, flag_matched