import torch import torchvision from torchvision import transforms from torch import nn from facenet_pytorch import InceptionResnetV1 def create_vggface2_model(num_classes:int=2, seed:int=42): """Creates an InceptionResnetV1 - Vggface2 model and transforms. Args: num_classes (int, optional): number of classes in the classifier head. Defaults to 2. seed (int, optional): random seed value. Defaults to 42. Returns: model (torch.nn.Module): vggface2 feature extractor model. transforms (torchvision.transforms): vggface2 image transforms. """ # load the saved model model_pred = InceptionResnetV1(pretrained='vggface2' , classify = True , num_classes = 2) layer_list = list(model_pred.children())[-5:] # all final layers model_pred = nn.Sequential(*list(model_pred.children())[:-5]) class Flatten(nn.Module): def __init__(self): super(Flatten, self).__init__() def forward(self, x): x = x.view(x.size(0), -1) return x for param in model_pred.parameters(): param.requires_grad = False # Recreate the classifier layer and seed it to the target device model_pred.classifier = torch.nn.Sequential( torch.nn.AdaptiveAvgPool2d(output_size=1), torch.nn.Dropout(p=0.6, inplace=False), Flatten(), torch.nn.Linear(in_features=1792, out_features=512, bias=False), torch.nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True), torch.nn.Linear(in_features=512, out_features=2, # same number of output units as our number of classes bias=True)) # Write transform for image data_transform = transforms.Compose([ # Resize the images to 64x64 --> RECOMENDATION FROM TRAINING FROM FACENET --> 160x160 transforms.Resize(size=(160, 160)), # Flip the images randomly on the horizontal transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance # Turn the image into a torch.Tensor transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 ]) return model_pred, data_transform