illume_plus-qwen2_5-7b-hf / aspect_ratio_utils.py

Upload folder using huggingface_hub

e853ca4 verified 5 months ago

7.62 kB

	from torchvision import transforms
	import numpy as np
	import math
	import torch
	from PIL import Image, ImageOps


	RATIOS = [
	(512, 512),
	(384, 512),
	(512, 384),
	(384, 768),
	(768, 384),
	(384, 576),
	(576, 384),
	(320, 960),
	(960, 320),
	(256, 1024),
	(1024, 256),
	]

	RATIO_TYPES = [
	'ratio_h512_w512',
	'ratio_h384_w512',
	'ratio_h512_w384',
	'ratio_h384_w768',
	'ratio_h768_w384',
	'ratio_h384_w576',
	'ratio_h576_w384',
	'ratio_h320_w960',
	'ratio_h960_w320',
	'ratio_h256_w1024',
	'ratio_h1024_w256',
	]


	def center_crop_and_resize(img, output_size=(256, 256)):
	target_h, target_w = output_size
	img_w, img_h = img.size

	scale_w, scale_h = img_w / target_w, img_h / target_h
	if scale_h > scale_w:
	new_w, new_h = target_w, int(target_w / img_w * img_h)
	else:
	new_w, new_h = int(target_h / img_h * img_w), target_h

	# Resize the image, keeping the aspect ratio
	img = img.resize((new_w, new_h), Image.LANCZOS)

	# Calculate the center cropping area
	left = (new_w - target_w) // 2
	top = (new_h - target_h) // 2
	right = left + target_w
	bottom = top + target_h

	# Crop the extra part
	img = img.crop((left, top, right, bottom))

	return img

	def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
	target_height, target_width = output_size

	# Step 1: Resize with aspect ratio preserved
	original_width, original_height = img.size
	ratio = min(target_width / original_width, target_height / original_height)
	new_size = (int(original_width * ratio), int(original_height * ratio))
	resized_image = img.resize(new_size, Image.LANCZOS)

	# Step 2: Add padding to reach target size
	delta_w = target_width - new_size[0]
	delta_h = target_height - new_size[1]
	padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
	padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)

	return padded_image


	def unpad_and_resize_back(padded_image, original_width, original_height):
	"""
	Revert the padded+resized image back to original size.

	Args:
	padded_image (PIL.Image): Image after padding.
	original_width (int): Original image width before resize & pad.
	original_height (int): Original image height before resize & pad.

	Returns:
	PIL.Image: Image resized back to original resolution.
	"""
	# Compute the scale factor used during the first resize
	target_width, target_height = padded_image.size
	ratio = min(target_width / original_width, target_height / original_height)
	resized_w = int(original_width * ratio)
	resized_h = int(original_height * ratio)

	# Compute cropping box on padded image
	left = (target_width - resized_w) // 2
	upper = (target_height - resized_h) // 2
	right = left + resized_w
	lower = upper + resized_h

	# Crop out the resized region (before padding)
	cropped_image = padded_image.crop((left, upper, right, lower))

	# Resize back to original resolution
	recovered_image = cropped_image.resize((original_width, original_height), Image.LANCZOS)
	return recovered_image


	def resize_with_padding(img, output_size=(256, 256), fill_color=(255, 255, 255)):
	target_height, target_width = output_size

	# Step 1: Resize with aspect ratio preserved
	original_width, original_height = img.size
	ratio = min(target_width / original_width, target_height / original_height)
	new_size = (int(original_width * ratio), int(original_height * ratio))
	resized_image = img.resize(new_size, Image.LANCZOS)

	# Step 2: Add padding to reach target size
	delta_w = target_width - new_size[0]
	delta_h = target_height - new_size[1]
	padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
	padded_image = ImageOps.expand(resized_image, padding, fill=fill_color)

	return padded_image


	def calculate_ratio():
	max_area = 512 * 512
	ratios = [(2, 2), (3, 4), (4, 3), (2, 4), (4, 2), (1, 4), (4, 1), (2, 3), (3, 2), (1, 3), (3, 1)]
	ratio_candicates = []
	for ratio in ratios:
	x = math.sqrt(max_area / ratio[0] / ratio[1])
	x = round(x / 64) * 64
	tmp = (xratio[0], xratio[1])
	# print(ratio, x, tmp)
	ratio_candicates.append(tmp)

	print("ratio_candicates", ratio_candicates)
	return ratio_candicates


	class AspectRatioCrop(object):
	"""
	Aspect Ratio Crop transform.
	For a given image, find the corresponding aspect ratio and
	resize / resize + crop to the corresponding base sizes

	Args:
	base_sizes: list[tuple], the base sizes of final output.
	For example, [(512, 512), (512, 768), (768, 512)]

	resize_and_crop: bool .If False, find the matched aspect ratio and resize to base size.
	"""

	def __init__(self, base_sizes, crop_percent_thresh=0.2):
	self.base_sizes = [(math.floor(h), math.floor(w)) for (h, w) in base_sizes]
	self.aspect_ratios = [x[1] / x[0] for x in self.base_sizes] # w / h
	self.crop_percent_thresh = crop_percent_thresh

	def _find_size(self, w, h):
	base_size_indexes = list(range(len(self.base_sizes)))
	aspect_ratios = [self.aspect_ratios[i] for i in base_size_indexes]
	aspect_ratio = w / h
	ratio_diff = [abs(ratio - aspect_ratio) for ratio in aspect_ratios]
	min_diff = np.min(ratio_diff)
	match_diff_indexes = [j for j in range(len(ratio_diff)) if ratio_diff[j] == min_diff]
	match_diff_indexes = sorted(match_diff_indexes, key=lambda x: (h-self.base_sizes[base_size_indexes[x]][0])**2
	+ (w-self.base_sizes[base_size_indexes[x]][1])**2) # pick the area most match one
	corr_index = base_size_indexes[match_diff_indexes[0]]
	return corr_index

	def get_pred_target_w_h(self, w, h):
	aspect_ratio = w / h
	aspect_index = self._find_size(w, h)
	pred_h, pred_w = self.base_sizes[aspect_index]

	solutions = [
	(pred_w, int(pred_w / aspect_ratio)),
	(int(pred_h * aspect_ratio), pred_h),
	]
	w_tar = None
	h_tar = None
	for solution in solutions:
	w_s, h_s = solution
	if w_s >= pred_w and h_s >= pred_h:
	w_tar = w_s
	h_tar = h_s

	return pred_w, pred_h, w_tar, h_tar, aspect_index

	def __call__(self, image, is_inference=False):
	## step 1: find the cloest aspect ratios
	flag_matched = True
	w, h = image.size
	pred_w, pred_h, w_tar, h_tar, aspect_index = self.get_pred_target_w_h(w, h)

	crop_percent = 1 - pred_w * pred_h / (w_tar * h_tar)
	if self.crop_percent_thresh > 0 and crop_percent > self.crop_percent_thresh:
	flag_matched = False # filter data

	if not is_inference:
	## step 2: train: crop and resize
	image = center_crop_and_resize(image, output_size=(pred_h, pred_w))
	else:
	## step 2: inference: resize and padding
	image = resize_with_padding(image, output_size=(pred_h, pred_w))

	original_size = [h, w]
	target_size = [pred_h, pred_w]

	return image, original_size, target_size, flag_matched