illume_plus-qwen2_5-7b-hf / aspect_ratio_utils.py
huangrh9's picture
Upload folder using huggingface_hub
e853ca4 verified
from torchvision import transforms
import numpy as np
import math
import torch
from PIL import Image, ImageOps
RATIOS = [
(512, 512),
(384, 512),
(512, 384),
(384, 768),
(768, 384),
(384, 576),
(576, 384),
(320, 960),
(960, 320),
(256, 1024),
(1024, 256),
]
RATIO_TYPES = [
'ratio_h512_w512',
'ratio_h384_w512',
'ratio_h512_w384',
'ratio_h384_w768',
'ratio_h768_w384',
'ratio_h384_w576',
'ratio_h576_w384',
'ratio_h320_w960',
'ratio_h960_w320',
'ratio_h256_w1024',
'ratio_h1024_w256',
]
def center_crop_and_resize(img, output_size=(256, 256)):
target_h, target_w = output_size
img_w, img_h = img.size
scale_w, scale_h = img_w / target_w, img_h / target_h
if scale_h > scale_w:
new_w, new_h = target_w, int(target_w / img_w * img_h)
else:
new_w, new_h = int(target_h / img_h * img_w), target_h
# Resize the image, keeping the aspect ratio
img = img.resize((new_w, new_h), Image.LANCZOS)
# Calculate the center cropping area
left = (new_w - target_w) // 2
top = (new_h - target_h) // 2
right = left + target_w
bottom = top + target_h
# Crop the extra part
img = img.crop((left, top, right, bottom))
return img
def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
target_height, target_width = output_size
# Step 1: Resize with aspect ratio preserved
original_width, original_height = img.size
ratio = min(target_width / original_width, target_height / original_height)
new_size = (int(original_width * ratio), int(original_height * ratio))
resized_image = img.resize(new_size, Image.LANCZOS)
# Step 2: Add padding to reach target size
delta_w = target_width - new_size[0]
delta_h = target_height - new_size[1]
padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)
return padded_image
def unpad_and_resize_back(padded_image, original_width, original_height):
"""
Revert the padded+resized image back to original size.
Args:
padded_image (PIL.Image): Image after padding.
original_width (int): Original image width before resize & pad.
original_height (int): Original image height before resize & pad.
Returns:
PIL.Image: Image resized back to original resolution.
"""
# Compute the scale factor used during the first resize
target_width, target_height = padded_image.size
ratio = min(target_width / original_width, target_height / original_height)
resized_w = int(original_width * ratio)
resized_h = int(original_height * ratio)
# Compute cropping box on padded image
left = (target_width - resized_w) // 2
upper = (target_height - resized_h) // 2
right = left + resized_w
lower = upper + resized_h
# Crop out the resized region (before padding)
cropped_image = padded_image.crop((left, upper, right, lower))
# Resize back to original resolution
recovered_image = cropped_image.resize((original_width, original_height), Image.LANCZOS)
return recovered_image
def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
target_height, target_width = output_size
# Step 1: Resize with aspect ratio preserved
original_width, original_height = img.size
ratio = min(target_width / original_width, target_height / original_height)
new_size = (int(original_width * ratio), int(original_height * ratio))
resized_image = img.resize(new_size, Image.LANCZOS)
# Step 2: Add padding to reach target size
delta_w = target_width - new_size[0]
delta_h = target_height - new_size[1]
padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)
return padded_image
def calculate_ratio():
max_area = 512 * 512
ratios = [(2, 2), (3, 4), (4, 3), (2, 4), (4, 2), (1, 4), (4, 1), (2, 3), (3, 2), (1, 3), (3, 1)]
ratio_candicates = []
for ratio in ratios:
x = math.sqrt(max_area / ratio[0] / ratio[1])
x = round(x / 64) * 64
tmp = (x*ratio[0], x*ratio[1])
# print(ratio, x, tmp)
ratio_candicates.append(tmp)
print("ratio_candicates", ratio_candicates)
return ratio_candicates
class AspectRatioCrop(object):
"""
Aspect Ratio Crop transform.
For a given image, find the corresponding aspect ratio and
resize / resize + crop to the corresponding base sizes
Args:
base_sizes: list[tuple], the base sizes of final output.
For example, [(512, 512), (512, 768), (768, 512)]
resize_and_crop: bool .If False, find the matched aspect ratio and resize to base size.
"""
def __init__(self, base_sizes, crop_percent_thresh=0.2):
self.base_sizes = [(math.floor(h), math.floor(w)) for (h, w) in base_sizes]
self.aspect_ratios = [x[1] / x[0] for x in self.base_sizes] # w / h
self.crop_percent_thresh = crop_percent_thresh
def _find_size(self, w, h):
base_size_indexes = list(range(len(self.base_sizes)))
aspect_ratios = [self.aspect_ratios[i] for i in base_size_indexes]
aspect_ratio = w / h
ratio_diff = [abs(ratio - aspect_ratio) for ratio in aspect_ratios]
min_diff = np.min(ratio_diff)
match_diff_indexes = [j for j in range(len(ratio_diff)) if ratio_diff[j] == min_diff]
match_diff_indexes = sorted(match_diff_indexes, key=lambda x: (h-self.base_sizes[base_size_indexes[x]][0])**2
+ (w-self.base_sizes[base_size_indexes[x]][1])**2) # pick the area most match one
corr_index = base_size_indexes[match_diff_indexes[0]]
return corr_index
def get_pred_target_w_h(self, w, h):
aspect_ratio = w / h
aspect_index = self._find_size(w, h)
pred_h, pred_w = self.base_sizes[aspect_index]
solutions = [
(pred_w, int(pred_w / aspect_ratio)),
(int(pred_h * aspect_ratio), pred_h),
]
w_tar = None
h_tar = None
for solution in solutions:
w_s, h_s = solution
if w_s >= pred_w and h_s >= pred_h:
w_tar = w_s
h_tar = h_s
return pred_w, pred_h, w_tar, h_tar, aspect_index
def __call__(self, image, is_inference=False):
## step 1: find the cloest aspect ratios
flag_matched = True
w, h = image.size
pred_w, pred_h, w_tar, h_tar, aspect_index = self.get_pred_target_w_h(w, h)
crop_percent = 1 - pred_w * pred_h / (w_tar * h_tar)
if self.crop_percent_thresh > 0 and crop_percent > self.crop_percent_thresh:
flag_matched = False # filter data
if not is_inference:
## step 2: train: crop and resize
image = center_crop_and_resize(image, output_size=(pred_h, pred_w))
else:
## step 2: inference: resize and padding
image = resize_with_padding(image, output_size=(pred_h, pred_w))
original_size = [h, w]
target_size = [pred_h, pred_w]
return image, original_size, target_size, flag_matched