Spaces:
Runtime error
Runtime error
Upload 29 files
Browse files- .gitattributes +2 -9
- .gitignore +160 -0
- Dockerfile +29 -0
- README.md +6 -6
- align/__init__.py +1 -0
- align/align_trans.py +296 -0
- align/box_utils.py +238 -0
- align/detector.py +126 -0
- align/first_stage.py +97 -0
- align/get_nets.py +169 -0
- align/matlab_cp2tform.py +345 -0
- align/onet.npy +3 -0
- align/pnet.npy +3 -0
- align/rnet.npy +3 -0
- app.py +80 -0
- backbone/__init__.py +1 -0
- backbone/model_irse.py +237 -0
- backbone/model_resnet.py +195 -0
- backbone/models2.py +305 -0
- models/Backbone_IR_152_Arcface_Epoch_112.pth +3 -0
- models/Backbone_IR_152_Cosface_Epoch_70.pth +3 -0
- models/Backbone_ResNet_152_Arcface_Epoch_65.pth +3 -0
- models/Backbone_ResNet_152_Cosface_Epoch_68.pth +3 -0
- requirements-min.txt +5 -0
- requirements.txt +103 -0
- util/__init__.py +1 -0
- util/attack_utils.py +127 -0
- util/feature_extraction_utils.py +112 -0
- util/prepare_utils.py +254 -0
.gitattributes
CHANGED
|
@@ -2,34 +2,27 @@
|
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 5 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 6 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 10 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 11 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 12 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 14 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 16 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 17 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 19 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 20 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 21 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 24 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 160 |
+
#.idea/
|
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stage 1: Build
|
| 2 |
+
FROM python:3.10.12-slim as builder
|
| 3 |
+
|
| 4 |
+
RUN apt update && \
|
| 5 |
+
apt install --no-install-recommends -y build-essential gcc git
|
| 6 |
+
|
| 7 |
+
COPY requirements-min.txt /requirements.txt
|
| 8 |
+
|
| 9 |
+
RUN pip install --no-cache-dir --no-warn-script-location --user -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Stage 2: Runtime
|
| 12 |
+
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
|
| 13 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 14 |
+
|
| 15 |
+
RUN apt update && \
|
| 16 |
+
apt install --no-install-recommends -y python3 python3-pip libgl1-mesa-glx libglib2.0-0 && \
|
| 17 |
+
apt-get clean && \
|
| 18 |
+
rm -rf /var/lib/apt/lists/*
|
| 19 |
+
|
| 20 |
+
COPY models/ /models
|
| 21 |
+
COPY util/ /util
|
| 22 |
+
COPY backbone/ /backbone
|
| 23 |
+
COPY align/ /align
|
| 24 |
+
COPY app.py app.py
|
| 25 |
+
|
| 26 |
+
COPY --from=builder /root/.local/lib/python3.10/site-packages /root/.local/lib/python3.10/site-packages
|
| 27 |
+
|
| 28 |
+
CMD [ "python3" , "-u", "app.py" ]
|
| 29 |
+
EXPOSE 7860
|
README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
| 1 |
---
|
| 2 |
+
title: LowKey
|
| 3 |
+
emoji: 😒
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 3.0.20
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
Needs git lfs for the model weights
|
align/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
align/align_trans.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
from align.matlab_cp2tform import get_similarity_transform_for_cv2
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# reference facial points, a list of coordinates (x,y)
|
| 7 |
+
REFERENCE_FACIAL_POINTS = [ # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size
|
| 8 |
+
[30.29459953, 51.69630051],
|
| 9 |
+
[65.53179932, 51.50139999],
|
| 10 |
+
[48.02519989, 71.73660278],
|
| 11 |
+
[33.54930115, 92.3655014],
|
| 12 |
+
[62.72990036, 92.20410156]
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
DEFAULT_CROP_SIZE = (96, 112)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class FaceWarpException(Exception):
|
| 19 |
+
def __str__(self):
|
| 20 |
+
return 'In File {}:{}'.format(
|
| 21 |
+
__file__, super.__str__(self))
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_reference_facial_points(output_size = None,
|
| 25 |
+
inner_padding_factor = 0.0,
|
| 26 |
+
outer_padding=(0, 0),
|
| 27 |
+
default_square = False):
|
| 28 |
+
"""
|
| 29 |
+
Function:
|
| 30 |
+
----------
|
| 31 |
+
get reference 5 key points according to crop settings:
|
| 32 |
+
0. Set default crop_size:
|
| 33 |
+
if default_square:
|
| 34 |
+
crop_size = (112, 112)
|
| 35 |
+
else:
|
| 36 |
+
crop_size = (96, 112)
|
| 37 |
+
1. Pad the crop_size by inner_padding_factor in each side;
|
| 38 |
+
2. Resize crop_size into (output_size - outer_padding*2),
|
| 39 |
+
pad into output_size with outer_padding;
|
| 40 |
+
3. Output reference_5point;
|
| 41 |
+
Parameters:
|
| 42 |
+
----------
|
| 43 |
+
@output_size: (w, h) or None
|
| 44 |
+
size of aligned face image
|
| 45 |
+
@inner_padding_factor: (w_factor, h_factor)
|
| 46 |
+
padding factor for inner (w, h)
|
| 47 |
+
@outer_padding: (w_pad, h_pad)
|
| 48 |
+
each row is a pair of coordinates (x, y)
|
| 49 |
+
@default_square: True or False
|
| 50 |
+
if True:
|
| 51 |
+
default crop_size = (112, 112)
|
| 52 |
+
else:
|
| 53 |
+
default crop_size = (96, 112);
|
| 54 |
+
!!! make sure, if output_size is not None:
|
| 55 |
+
(output_size - outer_padding)
|
| 56 |
+
= some_scale * (default crop_size * (1.0 + inner_padding_factor))
|
| 57 |
+
Returns:
|
| 58 |
+
----------
|
| 59 |
+
@reference_5point: 5x2 np.array
|
| 60 |
+
each row is a pair of transformed coordinates (x, y)
|
| 61 |
+
"""
|
| 62 |
+
#print('\n===> get_reference_facial_points():')
|
| 63 |
+
|
| 64 |
+
#print('---> Params:')
|
| 65 |
+
#print(' output_size: ', output_size)
|
| 66 |
+
#print(' inner_padding_factor: ', inner_padding_factor)
|
| 67 |
+
#print(' outer_padding:', outer_padding)
|
| 68 |
+
#print(' default_square: ', default_square)
|
| 69 |
+
|
| 70 |
+
tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
|
| 71 |
+
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
|
| 72 |
+
|
| 73 |
+
# 0) make the inner region a square
|
| 74 |
+
if default_square:
|
| 75 |
+
size_diff = max(tmp_crop_size) - tmp_crop_size
|
| 76 |
+
tmp_5pts += size_diff / 2
|
| 77 |
+
tmp_crop_size += size_diff
|
| 78 |
+
|
| 79 |
+
#print('---> default:')
|
| 80 |
+
#print(' crop_size = ', tmp_crop_size)
|
| 81 |
+
#print(' reference_5pts = ', tmp_5pts)
|
| 82 |
+
|
| 83 |
+
if (output_size and
|
| 84 |
+
output_size[0] == tmp_crop_size[0] and
|
| 85 |
+
output_size[1] == tmp_crop_size[1]):
|
| 86 |
+
#print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
|
| 87 |
+
return tmp_5pts
|
| 88 |
+
|
| 89 |
+
if (inner_padding_factor == 0 and
|
| 90 |
+
outer_padding == (0, 0)):
|
| 91 |
+
if output_size is None:
|
| 92 |
+
#print('No paddings to do: return default reference points')
|
| 93 |
+
return tmp_5pts
|
| 94 |
+
else:
|
| 95 |
+
raise FaceWarpException(
|
| 96 |
+
'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
|
| 97 |
+
|
| 98 |
+
# check output size
|
| 99 |
+
if not (0 <= inner_padding_factor <= 1.0):
|
| 100 |
+
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
|
| 101 |
+
|
| 102 |
+
if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
|
| 103 |
+
and output_size is None):
|
| 104 |
+
output_size = tmp_crop_size * \
|
| 105 |
+
(1 + inner_padding_factor * 2).astype(np.int32)
|
| 106 |
+
output_size += np.array(outer_padding)
|
| 107 |
+
#print(' deduced from paddings, output_size = ', output_size)
|
| 108 |
+
|
| 109 |
+
if not (outer_padding[0] < output_size[0]
|
| 110 |
+
and outer_padding[1] < output_size[1]):
|
| 111 |
+
raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
|
| 112 |
+
'and outer_padding[1] < output_size[1])')
|
| 113 |
+
|
| 114 |
+
# 1) pad the inner region according inner_padding_factor
|
| 115 |
+
#print('---> STEP1: pad the inner region according inner_padding_factor')
|
| 116 |
+
if inner_padding_factor > 0:
|
| 117 |
+
size_diff = tmp_crop_size * inner_padding_factor * 2
|
| 118 |
+
tmp_5pts += size_diff / 2
|
| 119 |
+
tmp_crop_size += np.round(size_diff).astype(np.int32)
|
| 120 |
+
|
| 121 |
+
#print(' crop_size = ', tmp_crop_size)
|
| 122 |
+
#print(' reference_5pts = ', tmp_5pts)
|
| 123 |
+
|
| 124 |
+
# 2) resize the padded inner region
|
| 125 |
+
#print('---> STEP2: resize the padded inner region')
|
| 126 |
+
size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
|
| 127 |
+
#print(' crop_size = ', tmp_crop_size)
|
| 128 |
+
#print(' size_bf_outer_pad = ', size_bf_outer_pad)
|
| 129 |
+
|
| 130 |
+
if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
|
| 131 |
+
raise FaceWarpException('Must have (output_size - outer_padding)'
|
| 132 |
+
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')
|
| 133 |
+
|
| 134 |
+
scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
|
| 135 |
+
#print(' resize scale_factor = ', scale_factor)
|
| 136 |
+
tmp_5pts = tmp_5pts * scale_factor
|
| 137 |
+
# size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
|
| 138 |
+
# tmp_5pts = tmp_5pts + size_diff / 2
|
| 139 |
+
tmp_crop_size = size_bf_outer_pad
|
| 140 |
+
#print(' crop_size = ', tmp_crop_size)
|
| 141 |
+
#print(' reference_5pts = ', tmp_5pts)
|
| 142 |
+
|
| 143 |
+
# 3) add outer_padding to make output_size
|
| 144 |
+
reference_5point = tmp_5pts + np.array(outer_padding)
|
| 145 |
+
tmp_crop_size = output_size
|
| 146 |
+
#print('---> STEP3: add outer_padding to make output_size')
|
| 147 |
+
#print(' crop_size = ', tmp_crop_size)
|
| 148 |
+
#print(' reference_5pts = ', tmp_5pts)
|
| 149 |
+
|
| 150 |
+
#print('===> end get_reference_facial_points\n')
|
| 151 |
+
|
| 152 |
+
return reference_5point
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_affine_transform_matrix(src_pts, dst_pts):
|
| 156 |
+
"""
|
| 157 |
+
Function:
|
| 158 |
+
----------
|
| 159 |
+
get affine transform matrix 'tfm' from src_pts to dst_pts
|
| 160 |
+
Parameters:
|
| 161 |
+
----------
|
| 162 |
+
@src_pts: Kx2 np.array
|
| 163 |
+
source points matrix, each row is a pair of coordinates (x, y)
|
| 164 |
+
@dst_pts: Kx2 np.array
|
| 165 |
+
destination points matrix, each row is a pair of coordinates (x, y)
|
| 166 |
+
Returns:
|
| 167 |
+
----------
|
| 168 |
+
@tfm: 2x3 np.array
|
| 169 |
+
transform matrix from src_pts to dst_pts
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
|
| 173 |
+
n_pts = src_pts.shape[0]
|
| 174 |
+
ones = np.ones((n_pts, 1), src_pts.dtype)
|
| 175 |
+
src_pts_ = np.hstack([src_pts, ones])
|
| 176 |
+
dst_pts_ = np.hstack([dst_pts, ones])
|
| 177 |
+
|
| 178 |
+
# #print(('src_pts_:\n' + str(src_pts_))
|
| 179 |
+
# #print(('dst_pts_:\n' + str(dst_pts_))
|
| 180 |
+
|
| 181 |
+
A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
|
| 182 |
+
|
| 183 |
+
# #print(('np.linalg.lstsq return A: \n' + str(A))
|
| 184 |
+
# #print(('np.linalg.lstsq return res: \n' + str(res))
|
| 185 |
+
# #print(('np.linalg.lstsq return rank: \n' + str(rank))
|
| 186 |
+
# #print(('np.linalg.lstsq return s: \n' + str(s))
|
| 187 |
+
|
| 188 |
+
if rank == 3:
|
| 189 |
+
tfm = np.float32([
|
| 190 |
+
[A[0, 0], A[1, 0], A[2, 0]],
|
| 191 |
+
[A[0, 1], A[1, 1], A[2, 1]]
|
| 192 |
+
])
|
| 193 |
+
elif rank == 2:
|
| 194 |
+
tfm = np.float32([
|
| 195 |
+
[A[0, 0], A[1, 0], 0],
|
| 196 |
+
[A[0, 1], A[1, 1], 0]
|
| 197 |
+
])
|
| 198 |
+
|
| 199 |
+
return tfm
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def warp_and_crop_face(src_img,
|
| 203 |
+
facial_pts,
|
| 204 |
+
reference_pts = None,
|
| 205 |
+
crop_size=(96, 112),
|
| 206 |
+
align_type = 'smilarity'):
|
| 207 |
+
"""
|
| 208 |
+
Function:
|
| 209 |
+
----------
|
| 210 |
+
apply affine transform 'trans' to uv
|
| 211 |
+
Parameters:
|
| 212 |
+
----------
|
| 213 |
+
@src_img: 3x3 np.array
|
| 214 |
+
input image
|
| 215 |
+
@facial_pts: could be
|
| 216 |
+
1)a list of K coordinates (x,y)
|
| 217 |
+
or
|
| 218 |
+
2) Kx2 or 2xK np.array
|
| 219 |
+
each row or col is a pair of coordinates (x, y)
|
| 220 |
+
@reference_pts: could be
|
| 221 |
+
1) a list of K coordinates (x,y)
|
| 222 |
+
or
|
| 223 |
+
2) Kx2 or 2xK np.array
|
| 224 |
+
each row or col is a pair of coordinates (x, y)
|
| 225 |
+
or
|
| 226 |
+
3) None
|
| 227 |
+
if None, use default reference facial points
|
| 228 |
+
@crop_size: (w, h)
|
| 229 |
+
output face image size
|
| 230 |
+
@align_type: transform type, could be one of
|
| 231 |
+
1) 'similarity': use similarity transform
|
| 232 |
+
2) 'cv2_affine': use the first 3 points to do affine transform,
|
| 233 |
+
by calling cv2.getAffineTransform()
|
| 234 |
+
3) 'affine': use all points to do affine transform
|
| 235 |
+
Returns:
|
| 236 |
+
----------
|
| 237 |
+
@face_img: output face image with size (w, h) = @crop_size
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
if reference_pts is None:
|
| 241 |
+
if crop_size[0] == 96 and crop_size[1] == 112:
|
| 242 |
+
reference_pts = REFERENCE_FACIAL_POINTS
|
| 243 |
+
else:
|
| 244 |
+
default_square = False
|
| 245 |
+
inner_padding_factor = 0
|
| 246 |
+
outer_padding = (0, 0)
|
| 247 |
+
output_size = crop_size
|
| 248 |
+
|
| 249 |
+
reference_pts = get_reference_facial_points(output_size,
|
| 250 |
+
inner_padding_factor,
|
| 251 |
+
outer_padding,
|
| 252 |
+
default_square)
|
| 253 |
+
|
| 254 |
+
ref_pts = np.float32(reference_pts)
|
| 255 |
+
ref_pts_shp = ref_pts.shape
|
| 256 |
+
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
|
| 257 |
+
raise FaceWarpException(
|
| 258 |
+
'reference_pts.shape must be (K,2) or (2,K) and K>2')
|
| 259 |
+
|
| 260 |
+
if ref_pts_shp[0] == 2:
|
| 261 |
+
ref_pts = ref_pts.T
|
| 262 |
+
|
| 263 |
+
src_pts = np.float32(facial_pts)
|
| 264 |
+
src_pts_shp = src_pts.shape
|
| 265 |
+
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
|
| 266 |
+
raise FaceWarpException(
|
| 267 |
+
'facial_pts.shape must be (K,2) or (2,K) and K>2')
|
| 268 |
+
|
| 269 |
+
if src_pts_shp[0] == 2:
|
| 270 |
+
src_pts = src_pts.T
|
| 271 |
+
|
| 272 |
+
# #print('--->src_pts:\n', src_pts
|
| 273 |
+
# #print('--->ref_pts\n', ref_pts
|
| 274 |
+
|
| 275 |
+
if src_pts.shape != ref_pts.shape:
|
| 276 |
+
raise FaceWarpException(
|
| 277 |
+
'facial_pts and reference_pts must have the same shape')
|
| 278 |
+
|
| 279 |
+
if align_type is 'cv2_affine':
|
| 280 |
+
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
|
| 281 |
+
# #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
|
| 282 |
+
elif align_type is 'affine':
|
| 283 |
+
tfm = get_affine_transform_matrix(src_pts, ref_pts)
|
| 284 |
+
# #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
|
| 285 |
+
else:
|
| 286 |
+
tfm,tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts)
|
| 287 |
+
# #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
|
| 288 |
+
|
| 289 |
+
# #print('--->Transform matrix: '
|
| 290 |
+
# #print(('type(tfm):' + str(type(tfm)))
|
| 291 |
+
# #print(('tfm.dtype:' + str(tfm.dtype))
|
| 292 |
+
# #print( tfm
|
| 293 |
+
|
| 294 |
+
face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
|
| 295 |
+
|
| 296 |
+
return face_img, tfm
|
align/box_utils.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from PIL import Image
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def nms(boxes, overlap_threshold = 0.5, mode = 'union'):
|
| 6 |
+
"""Non-maximum suppression.
|
| 7 |
+
|
| 8 |
+
Arguments:
|
| 9 |
+
boxes: a float numpy array of shape [n, 5],
|
| 10 |
+
where each row is (xmin, ymin, xmax, ymax, score).
|
| 11 |
+
overlap_threshold: a float number.
|
| 12 |
+
mode: 'union' or 'min'.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
list with indices of the selected boxes
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
# if there are no boxes, return the empty list
|
| 19 |
+
if len(boxes) == 0:
|
| 20 |
+
return []
|
| 21 |
+
|
| 22 |
+
# list of picked indices
|
| 23 |
+
pick = []
|
| 24 |
+
|
| 25 |
+
# grab the coordinates of the bounding boxes
|
| 26 |
+
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
|
| 27 |
+
|
| 28 |
+
area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
|
| 29 |
+
ids = np.argsort(score) # in increasing order
|
| 30 |
+
|
| 31 |
+
while len(ids) > 0:
|
| 32 |
+
|
| 33 |
+
# grab index of the largest value
|
| 34 |
+
last = len(ids) - 1
|
| 35 |
+
i = ids[last]
|
| 36 |
+
pick.append(i)
|
| 37 |
+
|
| 38 |
+
# compute intersections
|
| 39 |
+
# of the box with the largest score
|
| 40 |
+
# with the rest of boxes
|
| 41 |
+
|
| 42 |
+
# left top corner of intersection boxes
|
| 43 |
+
ix1 = np.maximum(x1[i], x1[ids[:last]])
|
| 44 |
+
iy1 = np.maximum(y1[i], y1[ids[:last]])
|
| 45 |
+
|
| 46 |
+
# right bottom corner of intersection boxes
|
| 47 |
+
ix2 = np.minimum(x2[i], x2[ids[:last]])
|
| 48 |
+
iy2 = np.minimum(y2[i], y2[ids[:last]])
|
| 49 |
+
|
| 50 |
+
# width and height of intersection boxes
|
| 51 |
+
w = np.maximum(0.0, ix2 - ix1 + 1.0)
|
| 52 |
+
h = np.maximum(0.0, iy2 - iy1 + 1.0)
|
| 53 |
+
|
| 54 |
+
# intersections' areas
|
| 55 |
+
inter = w * h
|
| 56 |
+
if mode == 'min':
|
| 57 |
+
overlap = inter/np.minimum(area[i], area[ids[:last]])
|
| 58 |
+
elif mode == 'union':
|
| 59 |
+
# intersection over union (IoU)
|
| 60 |
+
overlap = inter/(area[i] + area[ids[:last]] - inter)
|
| 61 |
+
|
| 62 |
+
# delete all boxes where overlap is too big
|
| 63 |
+
ids = np.delete(
|
| 64 |
+
ids,
|
| 65 |
+
np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
return pick
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def convert_to_square(bboxes):
|
| 72 |
+
"""Convert bounding boxes to a square form.
|
| 73 |
+
|
| 74 |
+
Arguments:
|
| 75 |
+
bboxes: a float numpy array of shape [n, 5].
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
a float numpy array of shape [n, 5],
|
| 79 |
+
squared bounding boxes.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
square_bboxes = np.zeros_like(bboxes)
|
| 83 |
+
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
| 84 |
+
h = y2 - y1 + 1.0
|
| 85 |
+
w = x2 - x1 + 1.0
|
| 86 |
+
max_side = np.maximum(h, w)
|
| 87 |
+
square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
|
| 88 |
+
square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
|
| 89 |
+
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
|
| 90 |
+
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
|
| 91 |
+
return square_bboxes
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def calibrate_box(bboxes, offsets):
|
| 95 |
+
"""Transform bounding boxes to be more like true bounding boxes.
|
| 96 |
+
'offsets' is one of the outputs of the nets.
|
| 97 |
+
|
| 98 |
+
Arguments:
|
| 99 |
+
bboxes: a float numpy array of shape [n, 5].
|
| 100 |
+
offsets: a float numpy array of shape [n, 4].
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
a float numpy array of shape [n, 5].
|
| 104 |
+
"""
|
| 105 |
+
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
| 106 |
+
w = x2 - x1 + 1.0
|
| 107 |
+
h = y2 - y1 + 1.0
|
| 108 |
+
w = np.expand_dims(w, 1)
|
| 109 |
+
h = np.expand_dims(h, 1)
|
| 110 |
+
|
| 111 |
+
# this is what happening here:
|
| 112 |
+
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
|
| 113 |
+
# x1_true = x1 + tx1*w
|
| 114 |
+
# y1_true = y1 + ty1*h
|
| 115 |
+
# x2_true = x2 + tx2*w
|
| 116 |
+
# y2_true = y2 + ty2*h
|
| 117 |
+
# below is just more compact form of this
|
| 118 |
+
|
| 119 |
+
# are offsets always such that
|
| 120 |
+
# x1 < x2 and y1 < y2 ?
|
| 121 |
+
|
| 122 |
+
translation = np.hstack([w, h, w, h])*offsets
|
| 123 |
+
bboxes[:, 0:4] = bboxes[:, 0:4] + translation
|
| 124 |
+
return bboxes
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_image_boxes(bounding_boxes, img, size = 24):
|
| 128 |
+
"""Cut out boxes from the image.
|
| 129 |
+
|
| 130 |
+
Arguments:
|
| 131 |
+
bounding_boxes: a float numpy array of shape [n, 5].
|
| 132 |
+
img: an instance of PIL.Image.
|
| 133 |
+
size: an integer, size of cutouts.
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
a float numpy array of shape [n, 3, size, size].
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
num_boxes = len(bounding_boxes)
|
| 140 |
+
width, height = img.size
|
| 141 |
+
|
| 142 |
+
[dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
|
| 143 |
+
img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
|
| 144 |
+
|
| 145 |
+
for i in range(num_boxes):
|
| 146 |
+
img_box = np.zeros((h[i], w[i], 3), 'uint8')
|
| 147 |
+
|
| 148 |
+
img_array = np.asarray(img, 'uint8')
|
| 149 |
+
img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
|
| 150 |
+
img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
|
| 151 |
+
|
| 152 |
+
# resize
|
| 153 |
+
img_box = Image.fromarray(img_box)
|
| 154 |
+
img_box = img_box.resize((size, size), Image.BILINEAR)
|
| 155 |
+
img_box = np.asarray(img_box, 'float32')
|
| 156 |
+
|
| 157 |
+
img_boxes[i, :, :, :] = _preprocess(img_box)
|
| 158 |
+
|
| 159 |
+
return img_boxes
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def correct_bboxes(bboxes, width, height):
|
| 163 |
+
"""Crop boxes that are too big and get coordinates
|
| 164 |
+
with respect to cutouts.
|
| 165 |
+
|
| 166 |
+
Arguments:
|
| 167 |
+
bboxes: a float numpy array of shape [n, 5],
|
| 168 |
+
where each row is (xmin, ymin, xmax, ymax, score).
|
| 169 |
+
width: a float number.
|
| 170 |
+
height: a float number.
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
dy, dx, edy, edx: a int numpy arrays of shape [n],
|
| 174 |
+
coordinates of the boxes with respect to the cutouts.
|
| 175 |
+
y, x, ey, ex: a int numpy arrays of shape [n],
|
| 176 |
+
corrected ymin, xmin, ymax, xmax.
|
| 177 |
+
h, w: a int numpy arrays of shape [n],
|
| 178 |
+
just heights and widths of boxes.
|
| 179 |
+
|
| 180 |
+
in the following order:
|
| 181 |
+
[dy, edy, dx, edx, y, ey, x, ex, w, h].
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
| 185 |
+
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
|
| 186 |
+
num_boxes = bboxes.shape[0]
|
| 187 |
+
|
| 188 |
+
# 'e' stands for end
|
| 189 |
+
# (x, y) -> (ex, ey)
|
| 190 |
+
x, y, ex, ey = x1, y1, x2, y2
|
| 191 |
+
|
| 192 |
+
# we need to cut out a box from the image.
|
| 193 |
+
# (x, y, ex, ey) are corrected coordinates of the box
|
| 194 |
+
# in the image.
|
| 195 |
+
# (dx, dy, edx, edy) are coordinates of the box in the cutout
|
| 196 |
+
# from the image.
|
| 197 |
+
dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
|
| 198 |
+
edx, edy = w.copy() - 1.0, h.copy() - 1.0
|
| 199 |
+
|
| 200 |
+
# if box's bottom right corner is too far right
|
| 201 |
+
ind = np.where(ex > width - 1.0)[0]
|
| 202 |
+
edx[ind] = w[ind] + width - 2.0 - ex[ind]
|
| 203 |
+
ex[ind] = width - 1.0
|
| 204 |
+
|
| 205 |
+
# if box's bottom right corner is too low
|
| 206 |
+
ind = np.where(ey > height - 1.0)[0]
|
| 207 |
+
edy[ind] = h[ind] + height - 2.0 - ey[ind]
|
| 208 |
+
ey[ind] = height - 1.0
|
| 209 |
+
|
| 210 |
+
# if box's top left corner is too far left
|
| 211 |
+
ind = np.where(x < 0.0)[0]
|
| 212 |
+
dx[ind] = 0.0 - x[ind]
|
| 213 |
+
x[ind] = 0.0
|
| 214 |
+
|
| 215 |
+
# if box's top left corner is too high
|
| 216 |
+
ind = np.where(y < 0.0)[0]
|
| 217 |
+
dy[ind] = 0.0 - y[ind]
|
| 218 |
+
y[ind] = 0.0
|
| 219 |
+
|
| 220 |
+
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
|
| 221 |
+
return_list = [i.astype('int32') for i in return_list]
|
| 222 |
+
|
| 223 |
+
return return_list
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def _preprocess(img):
|
| 227 |
+
"""Preprocessing step before feeding the network.
|
| 228 |
+
|
| 229 |
+
Arguments:
|
| 230 |
+
img: a float numpy array of shape [h, w, c].
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
a float numpy array of shape [1, c, h, w].
|
| 234 |
+
"""
|
| 235 |
+
img = img.transpose((2, 0, 1))
|
| 236 |
+
img = np.expand_dims(img, 0)
|
| 237 |
+
img = (img - 127.5) * 0.0078125
|
| 238 |
+
return img
|
align/detector.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch
|
| 3 |
+
from torch.autograd import Variable
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.append('./')
|
| 6 |
+
from align.get_nets import PNet, RNet, ONet
|
| 7 |
+
from align.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
|
| 8 |
+
from align.first_stage import run_first_stage
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def detect_faces(image, min_face_size = 20.0,
|
| 13 |
+
thresholds=[0.6, 0.7, 0.8],
|
| 14 |
+
nms_thresholds=[0.7, 0.7, 0.7]):
|
| 15 |
+
"""
|
| 16 |
+
Arguments:
|
| 17 |
+
image: an instance of PIL.Image.
|
| 18 |
+
min_face_size: a float number.
|
| 19 |
+
thresholds: a list of length 3.
|
| 20 |
+
nms_thresholds: a list of length 3.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
|
| 24 |
+
bounding boxes and facial landmarks.
|
| 25 |
+
"""
|
| 26 |
+
# LOAD MODELS
|
| 27 |
+
pnet = PNet()
|
| 28 |
+
rnet = RNet()
|
| 29 |
+
onet = ONet()
|
| 30 |
+
onet.eval()
|
| 31 |
+
|
| 32 |
+
# BUILD AN IMAGE PYRAMID
|
| 33 |
+
width, height = image.size
|
| 34 |
+
min_length = min(height, width)
|
| 35 |
+
|
| 36 |
+
min_detection_size = 12
|
| 37 |
+
factor = 0.707 # sqrt(0.5)
|
| 38 |
+
|
| 39 |
+
# scales for scaling the image
|
| 40 |
+
scales = []
|
| 41 |
+
|
| 42 |
+
# scales the image so that
|
| 43 |
+
# minimum size that we can detect equals to
|
| 44 |
+
# minimum face size that we want to detect
|
| 45 |
+
m = min_detection_size/min_face_size
|
| 46 |
+
min_length *= m
|
| 47 |
+
|
| 48 |
+
factor_count = 0
|
| 49 |
+
while min_length > min_detection_size:
|
| 50 |
+
scales.append(m*factor**factor_count)
|
| 51 |
+
min_length *= factor
|
| 52 |
+
factor_count += 1
|
| 53 |
+
|
| 54 |
+
# STAGE 1
|
| 55 |
+
|
| 56 |
+
# it will be returned
|
| 57 |
+
bounding_boxes = []
|
| 58 |
+
|
| 59 |
+
# run P-Net on different scales
|
| 60 |
+
for s in scales:
|
| 61 |
+
boxes = run_first_stage(image, pnet, scale = s, threshold = thresholds[0])
|
| 62 |
+
bounding_boxes.append(boxes)
|
| 63 |
+
|
| 64 |
+
# collect boxes (and offsets, and scores) from different scales
|
| 65 |
+
bounding_boxes = [i for i in bounding_boxes if i is not None]
|
| 66 |
+
bounding_boxes = np.vstack(bounding_boxes)
|
| 67 |
+
|
| 68 |
+
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
|
| 69 |
+
bounding_boxes = bounding_boxes[keep]
|
| 70 |
+
|
| 71 |
+
# use offsets predicted by pnet to transform bounding boxes
|
| 72 |
+
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
|
| 73 |
+
# shape [n_boxes, 5]
|
| 74 |
+
|
| 75 |
+
bounding_boxes = convert_to_square(bounding_boxes)
|
| 76 |
+
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
|
| 77 |
+
|
| 78 |
+
# STAGE 2
|
| 79 |
+
|
| 80 |
+
img_boxes = get_image_boxes(bounding_boxes, image, size = 24)
|
| 81 |
+
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
|
| 82 |
+
output = rnet(img_boxes)
|
| 83 |
+
offsets = output[0].data.numpy() # shape [n_boxes, 4]
|
| 84 |
+
probs = output[1].data.numpy() # shape [n_boxes, 2]
|
| 85 |
+
|
| 86 |
+
keep = np.where(probs[:, 1] > thresholds[1])[0]
|
| 87 |
+
bounding_boxes = bounding_boxes[keep]
|
| 88 |
+
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
|
| 89 |
+
offsets = offsets[keep]
|
| 90 |
+
|
| 91 |
+
keep = nms(bounding_boxes, nms_thresholds[1])
|
| 92 |
+
bounding_boxes = bounding_boxes[keep]
|
| 93 |
+
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
|
| 94 |
+
bounding_boxes = convert_to_square(bounding_boxes)
|
| 95 |
+
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
|
| 96 |
+
|
| 97 |
+
# STAGE 3
|
| 98 |
+
|
| 99 |
+
img_boxes = get_image_boxes(bounding_boxes, image, size = 48)
|
| 100 |
+
if len(img_boxes) == 0:
|
| 101 |
+
return [], []
|
| 102 |
+
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
|
| 103 |
+
output = onet(img_boxes)
|
| 104 |
+
landmarks = output[0].data.numpy() # shape [n_boxes, 10]
|
| 105 |
+
offsets = output[1].data.numpy() # shape [n_boxes, 4]
|
| 106 |
+
probs = output[2].data.numpy() # shape [n_boxes, 2]
|
| 107 |
+
|
| 108 |
+
keep = np.where(probs[:, 1] > thresholds[2])[0]
|
| 109 |
+
bounding_boxes = bounding_boxes[keep]
|
| 110 |
+
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
|
| 111 |
+
offsets = offsets[keep]
|
| 112 |
+
landmarks = landmarks[keep]
|
| 113 |
+
|
| 114 |
+
# compute landmark points
|
| 115 |
+
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
|
| 116 |
+
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
|
| 117 |
+
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
|
| 118 |
+
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
|
| 119 |
+
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
|
| 120 |
+
|
| 121 |
+
bounding_boxes = calibrate_box(bounding_boxes, offsets)
|
| 122 |
+
keep = nms(bounding_boxes, nms_thresholds[2], mode = 'min')
|
| 123 |
+
bounding_boxes = bounding_boxes[keep]
|
| 124 |
+
landmarks = landmarks[keep]
|
| 125 |
+
|
| 126 |
+
return bounding_boxes, landmarks
|
align/first_stage.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.autograd import Variable
|
| 3 |
+
import math
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import numpy as np
|
| 6 |
+
from align.box_utils import nms, _preprocess
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def run_first_stage(image, net, scale, threshold):
|
| 10 |
+
"""Run P-Net, generate bounding boxes, and do NMS.
|
| 11 |
+
|
| 12 |
+
Arguments:
|
| 13 |
+
image: an instance of PIL.Image.
|
| 14 |
+
net: an instance of pytorch's nn.Module, P-Net.
|
| 15 |
+
scale: a float number,
|
| 16 |
+
scale width and height of the image by this number.
|
| 17 |
+
threshold: a float number,
|
| 18 |
+
threshold on the probability of a face when generating
|
| 19 |
+
bounding boxes from predictions of the net.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
a float numpy array of shape [n_boxes, 9],
|
| 23 |
+
bounding boxes with scores and offsets (4 + 1 + 4).
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
# scale the image and convert it to a float array
|
| 27 |
+
width, height = image.size
|
| 28 |
+
sw, sh = math.ceil(width*scale), math.ceil(height*scale)
|
| 29 |
+
img = image.resize((sw, sh), Image.BILINEAR)
|
| 30 |
+
img = np.asarray(img, 'float32')
|
| 31 |
+
|
| 32 |
+
img = Variable(torch.FloatTensor(_preprocess(img)), volatile = True)
|
| 33 |
+
output = net(img)
|
| 34 |
+
probs = output[1].data.numpy()[0, 1, :, :]
|
| 35 |
+
offsets = output[0].data.numpy()
|
| 36 |
+
# probs: probability of a face at each sliding window
|
| 37 |
+
# offsets: transformations to true bounding boxes
|
| 38 |
+
|
| 39 |
+
boxes = _generate_bboxes(probs, offsets, scale, threshold)
|
| 40 |
+
if len(boxes) == 0:
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
keep = nms(boxes[:, 0:5], overlap_threshold = 0.5)
|
| 44 |
+
return boxes[keep]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _generate_bboxes(probs, offsets, scale, threshold):
|
| 48 |
+
"""Generate bounding boxes at places
|
| 49 |
+
where there is probably a face.
|
| 50 |
+
|
| 51 |
+
Arguments:
|
| 52 |
+
probs: a float numpy array of shape [n, m].
|
| 53 |
+
offsets: a float numpy array of shape [1, 4, n, m].
|
| 54 |
+
scale: a float number,
|
| 55 |
+
width and height of the image were scaled by this number.
|
| 56 |
+
threshold: a float number.
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
a float numpy array of shape [n_boxes, 9]
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
# applying P-Net is equivalent, in some sense, to
|
| 63 |
+
# moving 12x12 window with stride 2
|
| 64 |
+
stride = 2
|
| 65 |
+
cell_size = 12
|
| 66 |
+
|
| 67 |
+
# indices of boxes where there is probably a face
|
| 68 |
+
inds = np.where(probs > threshold)
|
| 69 |
+
|
| 70 |
+
if inds[0].size == 0:
|
| 71 |
+
return np.array([])
|
| 72 |
+
|
| 73 |
+
# transformations of bounding boxes
|
| 74 |
+
tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
|
| 75 |
+
# they are defined as:
|
| 76 |
+
# w = x2 - x1 + 1
|
| 77 |
+
# h = y2 - y1 + 1
|
| 78 |
+
# x1_true = x1 + tx1*w
|
| 79 |
+
# x2_true = x2 + tx2*w
|
| 80 |
+
# y1_true = y1 + ty1*h
|
| 81 |
+
# y2_true = y2 + ty2*h
|
| 82 |
+
|
| 83 |
+
offsets = np.array([tx1, ty1, tx2, ty2])
|
| 84 |
+
score = probs[inds[0], inds[1]]
|
| 85 |
+
|
| 86 |
+
# P-Net is applied to scaled images
|
| 87 |
+
# so we need to rescale bounding boxes back
|
| 88 |
+
bounding_boxes = np.vstack([
|
| 89 |
+
np.round((stride*inds[1] + 1.0)/scale),
|
| 90 |
+
np.round((stride*inds[0] + 1.0)/scale),
|
| 91 |
+
np.round((stride*inds[1] + 1.0 + cell_size)/scale),
|
| 92 |
+
np.round((stride*inds[0] + 1.0 + cell_size)/scale),
|
| 93 |
+
score, offsets
|
| 94 |
+
])
|
| 95 |
+
# why one is added?
|
| 96 |
+
|
| 97 |
+
return bounding_boxes.T
|
align/get_nets.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from collections import OrderedDict
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Flatten(nn.Module):
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
super(Flatten, self).__init__()
|
| 12 |
+
|
| 13 |
+
def forward(self, x):
|
| 14 |
+
"""
|
| 15 |
+
Arguments:
|
| 16 |
+
x: a float tensor with shape [batch_size, c, h, w].
|
| 17 |
+
Returns:
|
| 18 |
+
a float tensor with shape [batch_size, c*h*w].
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# without this pretrained model isn't working
|
| 22 |
+
x = x.transpose(3, 2).contiguous()
|
| 23 |
+
|
| 24 |
+
return x.view(x.size(0), -1)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PNet(nn.Module):
|
| 28 |
+
|
| 29 |
+
def __init__(self):
|
| 30 |
+
|
| 31 |
+
super(PNet, self).__init__()
|
| 32 |
+
|
| 33 |
+
# suppose we have input with size HxW, then
|
| 34 |
+
# after first layer: H - 2,
|
| 35 |
+
# after pool: ceil((H - 2)/2),
|
| 36 |
+
# after second conv: ceil((H - 2)/2) - 2,
|
| 37 |
+
# after last conv: ceil((H - 2)/2) - 4,
|
| 38 |
+
# and the same for W
|
| 39 |
+
|
| 40 |
+
self.features = nn.Sequential(OrderedDict([
|
| 41 |
+
('conv1', nn.Conv2d(3, 10, 3, 1)),
|
| 42 |
+
('prelu1', nn.PReLU(10)),
|
| 43 |
+
('pool1', nn.MaxPool2d(2, 2, ceil_mode = True)),
|
| 44 |
+
|
| 45 |
+
('conv2', nn.Conv2d(10, 16, 3, 1)),
|
| 46 |
+
('prelu2', nn.PReLU(16)),
|
| 47 |
+
|
| 48 |
+
('conv3', nn.Conv2d(16, 32, 3, 1)),
|
| 49 |
+
('prelu3', nn.PReLU(32))
|
| 50 |
+
]))
|
| 51 |
+
|
| 52 |
+
self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
|
| 53 |
+
self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
|
| 54 |
+
|
| 55 |
+
weights = np.load("align/pnet.npy", allow_pickle=True)[()]
|
| 56 |
+
for n, p in self.named_parameters():
|
| 57 |
+
p.data = torch.FloatTensor(weights[n])
|
| 58 |
+
|
| 59 |
+
def forward(self, x):
|
| 60 |
+
"""
|
| 61 |
+
Arguments:
|
| 62 |
+
x: a float tensor with shape [batch_size, 3, h, w].
|
| 63 |
+
Returns:
|
| 64 |
+
b: a float tensor with shape [batch_size, 4, h', w'].
|
| 65 |
+
a: a float tensor with shape [batch_size, 2, h', w'].
|
| 66 |
+
"""
|
| 67 |
+
x = self.features(x)
|
| 68 |
+
a = self.conv4_1(x)
|
| 69 |
+
b = self.conv4_2(x)
|
| 70 |
+
a = F.softmax(a)
|
| 71 |
+
return b, a
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class RNet(nn.Module):
|
| 75 |
+
|
| 76 |
+
def __init__(self):
|
| 77 |
+
|
| 78 |
+
super(RNet, self).__init__()
|
| 79 |
+
|
| 80 |
+
self.features = nn.Sequential(OrderedDict([
|
| 81 |
+
('conv1', nn.Conv2d(3, 28, 3, 1)),
|
| 82 |
+
('prelu1', nn.PReLU(28)),
|
| 83 |
+
('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
|
| 84 |
+
|
| 85 |
+
('conv2', nn.Conv2d(28, 48, 3, 1)),
|
| 86 |
+
('prelu2', nn.PReLU(48)),
|
| 87 |
+
('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
|
| 88 |
+
|
| 89 |
+
('conv3', nn.Conv2d(48, 64, 2, 1)),
|
| 90 |
+
('prelu3', nn.PReLU(64)),
|
| 91 |
+
|
| 92 |
+
('flatten', Flatten()),
|
| 93 |
+
('conv4', nn.Linear(576, 128)),
|
| 94 |
+
('prelu4', nn.PReLU(128))
|
| 95 |
+
]))
|
| 96 |
+
|
| 97 |
+
self.conv5_1 = nn.Linear(128, 2)
|
| 98 |
+
self.conv5_2 = nn.Linear(128, 4)
|
| 99 |
+
|
| 100 |
+
weights = np.load("align/rnet.npy", allow_pickle=True)[()]
|
| 101 |
+
for n, p in self.named_parameters():
|
| 102 |
+
p.data = torch.FloatTensor(weights[n])
|
| 103 |
+
|
| 104 |
+
def forward(self, x):
|
| 105 |
+
"""
|
| 106 |
+
Arguments:
|
| 107 |
+
x: a float tensor with shape [batch_size, 3, h, w].
|
| 108 |
+
Returns:
|
| 109 |
+
b: a float tensor with shape [batch_size, 4].
|
| 110 |
+
a: a float tensor with shape [batch_size, 2].
|
| 111 |
+
"""
|
| 112 |
+
x = self.features(x)
|
| 113 |
+
a = self.conv5_1(x)
|
| 114 |
+
b = self.conv5_2(x)
|
| 115 |
+
a = F.softmax(a)
|
| 116 |
+
return b, a
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class ONet(nn.Module):
|
| 120 |
+
|
| 121 |
+
def __init__(self):
|
| 122 |
+
|
| 123 |
+
super(ONet, self).__init__()
|
| 124 |
+
|
| 125 |
+
self.features = nn.Sequential(OrderedDict([
|
| 126 |
+
('conv1', nn.Conv2d(3, 32, 3, 1)),
|
| 127 |
+
('prelu1', nn.PReLU(32)),
|
| 128 |
+
('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
|
| 129 |
+
|
| 130 |
+
('conv2', nn.Conv2d(32, 64, 3, 1)),
|
| 131 |
+
('prelu2', nn.PReLU(64)),
|
| 132 |
+
('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
|
| 133 |
+
|
| 134 |
+
('conv3', nn.Conv2d(64, 64, 3, 1)),
|
| 135 |
+
('prelu3', nn.PReLU(64)),
|
| 136 |
+
('pool3', nn.MaxPool2d(2, 2, ceil_mode = True)),
|
| 137 |
+
|
| 138 |
+
('conv4', nn.Conv2d(64, 128, 2, 1)),
|
| 139 |
+
('prelu4', nn.PReLU(128)),
|
| 140 |
+
|
| 141 |
+
('flatten', Flatten()),
|
| 142 |
+
('conv5', nn.Linear(1152, 256)),
|
| 143 |
+
('drop5', nn.Dropout(0.25)),
|
| 144 |
+
('prelu5', nn.PReLU(256)),
|
| 145 |
+
]))
|
| 146 |
+
|
| 147 |
+
self.conv6_1 = nn.Linear(256, 2)
|
| 148 |
+
self.conv6_2 = nn.Linear(256, 4)
|
| 149 |
+
self.conv6_3 = nn.Linear(256, 10)
|
| 150 |
+
|
| 151 |
+
weights = np.load("align/onet.npy", allow_pickle=True)[()]
|
| 152 |
+
for n, p in self.named_parameters():
|
| 153 |
+
p.data = torch.FloatTensor(weights[n])
|
| 154 |
+
|
| 155 |
+
def forward(self, x):
|
| 156 |
+
"""
|
| 157 |
+
Arguments:
|
| 158 |
+
x: a float tensor with shape [batch_size, 3, h, w].
|
| 159 |
+
Returns:
|
| 160 |
+
c: a float tensor with shape [batch_size, 10].
|
| 161 |
+
b: a float tensor with shape [batch_size, 4].
|
| 162 |
+
a: a float tensor with shape [batch_size, 2].
|
| 163 |
+
"""
|
| 164 |
+
x = self.features(x)
|
| 165 |
+
a = self.conv6_1(x)
|
| 166 |
+
b = self.conv6_2(x)
|
| 167 |
+
c = self.conv6_3(x)
|
| 168 |
+
a = F.softmax(a)
|
| 169 |
+
return c, b, a
|
align/matlab_cp2tform.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from numpy.linalg import inv, norm, lstsq
|
| 3 |
+
from numpy.linalg import matrix_rank as rank
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class MatlabCp2tormException(Exception):
|
| 7 |
+
def __str__(self):
|
| 8 |
+
return "In File {}:{}".format(
|
| 9 |
+
__file__, super.__str__(self))
|
| 10 |
+
|
| 11 |
+
def tformfwd(trans, uv):
|
| 12 |
+
"""
|
| 13 |
+
Function:
|
| 14 |
+
----------
|
| 15 |
+
apply affine transform 'trans' to uv
|
| 16 |
+
|
| 17 |
+
Parameters:
|
| 18 |
+
----------
|
| 19 |
+
@trans: 3x3 np.array
|
| 20 |
+
transform matrix
|
| 21 |
+
@uv: Kx2 np.array
|
| 22 |
+
each row is a pair of coordinates (x, y)
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
----------
|
| 26 |
+
@xy: Kx2 np.array
|
| 27 |
+
each row is a pair of transformed coordinates (x, y)
|
| 28 |
+
"""
|
| 29 |
+
uv = np.hstack((
|
| 30 |
+
uv, np.ones((uv.shape[0], 1))
|
| 31 |
+
))
|
| 32 |
+
xy = np.dot(uv, trans)
|
| 33 |
+
xy = xy[:, 0:-1]
|
| 34 |
+
return xy
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def tforminv(trans, uv):
|
| 38 |
+
"""
|
| 39 |
+
Function:
|
| 40 |
+
----------
|
| 41 |
+
apply the inverse of affine transform 'trans' to uv
|
| 42 |
+
|
| 43 |
+
Parameters:
|
| 44 |
+
----------
|
| 45 |
+
@trans: 3x3 np.array
|
| 46 |
+
transform matrix
|
| 47 |
+
@uv: Kx2 np.array
|
| 48 |
+
each row is a pair of coordinates (x, y)
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
----------
|
| 52 |
+
@xy: Kx2 np.array
|
| 53 |
+
each row is a pair of inverse-transformed coordinates (x, y)
|
| 54 |
+
"""
|
| 55 |
+
Tinv = inv(trans)
|
| 56 |
+
xy = tformfwd(Tinv, uv)
|
| 57 |
+
return xy
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def findNonreflectiveSimilarity(uv, xy, options=None):
|
| 61 |
+
|
| 62 |
+
options = {'K': 2}
|
| 63 |
+
|
| 64 |
+
K = options['K']
|
| 65 |
+
M = xy.shape[0]
|
| 66 |
+
x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
| 67 |
+
y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
| 68 |
+
# print('--->x, y:\n', x, y
|
| 69 |
+
|
| 70 |
+
tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
|
| 71 |
+
tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
|
| 72 |
+
X = np.vstack((tmp1, tmp2))
|
| 73 |
+
# print('--->X.shape: ', X.shape
|
| 74 |
+
# print('X:\n', X
|
| 75 |
+
|
| 76 |
+
u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
| 77 |
+
v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
| 78 |
+
U = np.vstack((u, v))
|
| 79 |
+
# print('--->U.shape: ', U.shape
|
| 80 |
+
# print('U:\n', U
|
| 81 |
+
|
| 82 |
+
# We know that X * r = U
|
| 83 |
+
if rank(X) >= 2 * K:
|
| 84 |
+
r, _, _, _ = lstsq(X, U)
|
| 85 |
+
r = np.squeeze(r)
|
| 86 |
+
else:
|
| 87 |
+
raise Exception("cp2tform: two Unique Points Req")
|
| 88 |
+
|
| 89 |
+
# print('--->r:\n', r
|
| 90 |
+
|
| 91 |
+
sc = r[0]
|
| 92 |
+
ss = r[1]
|
| 93 |
+
tx = r[2]
|
| 94 |
+
ty = r[3]
|
| 95 |
+
|
| 96 |
+
Tinv = np.array([
|
| 97 |
+
[sc, -ss, 0],
|
| 98 |
+
[ss, sc, 0],
|
| 99 |
+
[tx, ty, 1]
|
| 100 |
+
])
|
| 101 |
+
|
| 102 |
+
# print('--->Tinv:\n', Tinv
|
| 103 |
+
|
| 104 |
+
T = inv(Tinv)
|
| 105 |
+
# print('--->T:\n', T
|
| 106 |
+
|
| 107 |
+
T[:, 2] = np.array([0, 0, 1])
|
| 108 |
+
|
| 109 |
+
return T, Tinv
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def findSimilarity(uv, xy, options=None):
|
| 113 |
+
|
| 114 |
+
options = {'K': 2}
|
| 115 |
+
|
| 116 |
+
# uv = np.array(uv)
|
| 117 |
+
# xy = np.array(xy)
|
| 118 |
+
|
| 119 |
+
# Solve for trans1
|
| 120 |
+
trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
|
| 121 |
+
|
| 122 |
+
# Solve for trans2
|
| 123 |
+
|
| 124 |
+
# manually reflect the xy data across the Y-axis
|
| 125 |
+
xyR = xy
|
| 126 |
+
xyR[:, 0] = -1 * xyR[:, 0]
|
| 127 |
+
|
| 128 |
+
trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
|
| 129 |
+
|
| 130 |
+
# manually reflect the tform to undo the reflection done on xyR
|
| 131 |
+
TreflectY = np.array([
|
| 132 |
+
[-1, 0, 0],
|
| 133 |
+
[0, 1, 0],
|
| 134 |
+
[0, 0, 1]
|
| 135 |
+
])
|
| 136 |
+
|
| 137 |
+
trans2 = np.dot(trans2r, TreflectY)
|
| 138 |
+
|
| 139 |
+
# Figure out if trans1 or trans2 is better
|
| 140 |
+
xy1 = tformfwd(trans1, uv)
|
| 141 |
+
norm1 = norm(xy1 - xy)
|
| 142 |
+
|
| 143 |
+
xy2 = tformfwd(trans2, uv)
|
| 144 |
+
norm2 = norm(xy2 - xy)
|
| 145 |
+
|
| 146 |
+
if norm1 <= norm2:
|
| 147 |
+
return trans1, trans1_inv
|
| 148 |
+
else:
|
| 149 |
+
trans2_inv = inv(trans2)
|
| 150 |
+
return trans2, trans2_inv
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def get_similarity_transform(src_pts, dst_pts, reflective = True):
|
| 154 |
+
"""
|
| 155 |
+
Function:
|
| 156 |
+
----------
|
| 157 |
+
Find Similarity Transform Matrix 'trans':
|
| 158 |
+
u = src_pts[:, 0]
|
| 159 |
+
v = src_pts[:, 1]
|
| 160 |
+
x = dst_pts[:, 0]
|
| 161 |
+
y = dst_pts[:, 1]
|
| 162 |
+
[x, y, 1] = [u, v, 1] * trans
|
| 163 |
+
|
| 164 |
+
Parameters:
|
| 165 |
+
----------
|
| 166 |
+
@src_pts: Kx2 np.array
|
| 167 |
+
source points, each row is a pair of coordinates (x, y)
|
| 168 |
+
@dst_pts: Kx2 np.array
|
| 169 |
+
destination points, each row is a pair of transformed
|
| 170 |
+
coordinates (x, y)
|
| 171 |
+
@reflective: True or False
|
| 172 |
+
if True:
|
| 173 |
+
use reflective similarity transform
|
| 174 |
+
else:
|
| 175 |
+
use non-reflective similarity transform
|
| 176 |
+
|
| 177 |
+
Returns:
|
| 178 |
+
----------
|
| 179 |
+
@trans: 3x3 np.array
|
| 180 |
+
transform matrix from uv to xy
|
| 181 |
+
trans_inv: 3x3 np.array
|
| 182 |
+
inverse of trans, transform matrix from xy to uv
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
if reflective:
|
| 186 |
+
trans, trans_inv = findSimilarity(src_pts, dst_pts)
|
| 187 |
+
else:
|
| 188 |
+
trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
|
| 189 |
+
|
| 190 |
+
return trans, trans_inv
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def cvt_tform_mat_for_cv2(trans):
|
| 194 |
+
"""
|
| 195 |
+
Function:
|
| 196 |
+
----------
|
| 197 |
+
Convert Transform Matrix 'trans' into 'cv2_trans' which could be
|
| 198 |
+
directly used by cv2.warpAffine():
|
| 199 |
+
u = src_pts[:, 0]
|
| 200 |
+
v = src_pts[:, 1]
|
| 201 |
+
x = dst_pts[:, 0]
|
| 202 |
+
y = dst_pts[:, 1]
|
| 203 |
+
[x, y].T = cv_trans * [u, v, 1].T
|
| 204 |
+
|
| 205 |
+
Parameters:
|
| 206 |
+
----------
|
| 207 |
+
@trans: 3x3 np.array
|
| 208 |
+
transform matrix from uv to xy
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
----------
|
| 212 |
+
@cv2_trans: 2x3 np.array
|
| 213 |
+
transform matrix from src_pts to dst_pts, could be directly used
|
| 214 |
+
for cv2.warpAffine()
|
| 215 |
+
"""
|
| 216 |
+
cv2_trans = trans[:, 0:2].T
|
| 217 |
+
|
| 218 |
+
return cv2_trans
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective = True):
|
| 222 |
+
"""
|
| 223 |
+
Function:
|
| 224 |
+
----------
|
| 225 |
+
Find Similarity Transform Matrix 'cv2_trans' which could be
|
| 226 |
+
directly used by cv2.warpAffine():
|
| 227 |
+
u = src_pts[:, 0]
|
| 228 |
+
v = src_pts[:, 1]
|
| 229 |
+
x = dst_pts[:, 0]
|
| 230 |
+
y = dst_pts[:, 1]
|
| 231 |
+
[x, y].T = cv_trans * [u, v, 1].T
|
| 232 |
+
|
| 233 |
+
Parameters:
|
| 234 |
+
----------
|
| 235 |
+
@src_pts: Kx2 np.array
|
| 236 |
+
source points, each row is a pair of coordinates (x, y)
|
| 237 |
+
@dst_pts: Kx2 np.array
|
| 238 |
+
destination points, each row is a pair of transformed
|
| 239 |
+
coordinates (x, y)
|
| 240 |
+
reflective: True or False
|
| 241 |
+
if True:
|
| 242 |
+
use reflective similarity transform
|
| 243 |
+
else:
|
| 244 |
+
use non-reflective similarity transform
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
----------
|
| 248 |
+
@cv2_trans: 2x3 np.array
|
| 249 |
+
transform matrix from src_pts to dst_pts, could be directly used
|
| 250 |
+
for cv2.warpAffine()
|
| 251 |
+
"""
|
| 252 |
+
trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
|
| 253 |
+
cv2_trans = cvt_tform_mat_for_cv2(trans)
|
| 254 |
+
cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv)
|
| 255 |
+
|
| 256 |
+
return cv2_trans, cv2_trans_inv
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
if __name__ == '__main__':
|
| 260 |
+
"""
|
| 261 |
+
u = [0, 6, -2]
|
| 262 |
+
v = [0, 3, 5]
|
| 263 |
+
x = [-1, 0, 4]
|
| 264 |
+
y = [-1, -10, 4]
|
| 265 |
+
|
| 266 |
+
# In Matlab, run:
|
| 267 |
+
#
|
| 268 |
+
# uv = [u'; v'];
|
| 269 |
+
# xy = [x'; y'];
|
| 270 |
+
# tform_sim=cp2tform(uv,xy,'similarity');
|
| 271 |
+
#
|
| 272 |
+
# trans = tform_sim.tdata.T
|
| 273 |
+
# ans =
|
| 274 |
+
# -0.0764 -1.6190 0
|
| 275 |
+
# 1.6190 -0.0764 0
|
| 276 |
+
# -3.2156 0.0290 1.0000
|
| 277 |
+
# trans_inv = tform_sim.tdata.Tinv
|
| 278 |
+
# ans =
|
| 279 |
+
#
|
| 280 |
+
# -0.0291 0.6163 0
|
| 281 |
+
# -0.6163 -0.0291 0
|
| 282 |
+
# -0.0756 1.9826 1.0000
|
| 283 |
+
# xy_m=tformfwd(tform_sim, u,v)
|
| 284 |
+
#
|
| 285 |
+
# xy_m =
|
| 286 |
+
#
|
| 287 |
+
# -3.2156 0.0290
|
| 288 |
+
# 1.1833 -9.9143
|
| 289 |
+
# 5.0323 2.8853
|
| 290 |
+
# uv_m=tforminv(tform_sim, x,y)
|
| 291 |
+
#
|
| 292 |
+
# uv_m =
|
| 293 |
+
#
|
| 294 |
+
# 0.5698 1.3953
|
| 295 |
+
# 6.0872 2.2733
|
| 296 |
+
# -2.6570 4.3314
|
| 297 |
+
"""
|
| 298 |
+
u = [0, 6, -2]
|
| 299 |
+
v = [0, 3, 5]
|
| 300 |
+
x = [-1, 0, 4]
|
| 301 |
+
y = [-1, -10, 4]
|
| 302 |
+
|
| 303 |
+
uv = np.array((u, v)).T
|
| 304 |
+
xy = np.array((x, y)).T
|
| 305 |
+
|
| 306 |
+
print("\n--->uv:")
|
| 307 |
+
print(uv)
|
| 308 |
+
print("\n--->xy:")
|
| 309 |
+
print(xy)
|
| 310 |
+
|
| 311 |
+
trans, trans_inv = get_similarity_transform(uv, xy)
|
| 312 |
+
|
| 313 |
+
print("\n--->trans matrix:")
|
| 314 |
+
print(trans)
|
| 315 |
+
|
| 316 |
+
print("\n--->trans_inv matrix:")
|
| 317 |
+
print(trans_inv)
|
| 318 |
+
|
| 319 |
+
print("\n---> apply transform to uv")
|
| 320 |
+
print("\nxy_m = uv_augmented * trans")
|
| 321 |
+
uv_aug = np.hstack((
|
| 322 |
+
uv, np.ones((uv.shape[0], 1))
|
| 323 |
+
))
|
| 324 |
+
xy_m = np.dot(uv_aug, trans)
|
| 325 |
+
print(xy_m)
|
| 326 |
+
|
| 327 |
+
print("\nxy_m = tformfwd(trans, uv)")
|
| 328 |
+
xy_m = tformfwd(trans, uv)
|
| 329 |
+
print(xy_m)
|
| 330 |
+
|
| 331 |
+
print("\n---> apply inverse transform to xy")
|
| 332 |
+
print("\nuv_m = xy_augmented * trans_inv")
|
| 333 |
+
xy_aug = np.hstack((
|
| 334 |
+
xy, np.ones((xy.shape[0], 1))
|
| 335 |
+
))
|
| 336 |
+
uv_m = np.dot(xy_aug, trans_inv)
|
| 337 |
+
print(uv_m)
|
| 338 |
+
|
| 339 |
+
print("\nuv_m = tformfwd(trans_inv, xy)")
|
| 340 |
+
uv_m = tformfwd(trans_inv, xy)
|
| 341 |
+
print(uv_m)
|
| 342 |
+
|
| 343 |
+
uv_m = tforminv(trans, xy)
|
| 344 |
+
print("\nuv_m = tforminv(trans, xy)")
|
| 345 |
+
print(uv_m)
|
align/onet.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09b2ec49f8e6210dc4203e898a6fcf9f9f1330d6750f3c8c7a1d009aa75b3c6e
|
| 3 |
+
size 132
|
align/pnet.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8364064b9199f3df5a01c72b88e6f01024d1c76657e55d2524ca8a87b97bea5d
|
| 3 |
+
size 130
|
align/rnet.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80917ce9d27de824859196469e37680b81504e99310b581e96d524c533476803
|
| 3 |
+
size 131
|
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import numpy as np
|
| 5 |
+
from util.feature_extraction_utils import normalize_transforms
|
| 6 |
+
from util.attack_utils import Attack
|
| 7 |
+
from util.prepare_utils import prepare_models, prepare_dir_vec, get_ensemble
|
| 8 |
+
from align.detector import detect_faces
|
| 9 |
+
from align.align_trans import get_reference_facial_points, warp_and_crop_face
|
| 10 |
+
import torchvision.transforms as transforms
|
| 11 |
+
|
| 12 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
+
print(device)
|
| 14 |
+
to_tensor = transforms.ToTensor()
|
| 15 |
+
|
| 16 |
+
eps = 0.05
|
| 17 |
+
n_iters = 50
|
| 18 |
+
input_size = [112, 112]
|
| 19 |
+
attack_type = 'lpips'
|
| 20 |
+
c_tv = None
|
| 21 |
+
c_sim = 0.05
|
| 22 |
+
lr = 0.0025
|
| 23 |
+
net_type = 'alex'
|
| 24 |
+
noise_size = 0.005
|
| 25 |
+
n_starts = 1
|
| 26 |
+
kernel_size_gf = 7
|
| 27 |
+
sigma_gf = 3
|
| 28 |
+
combination = True
|
| 29 |
+
using_subspace = False
|
| 30 |
+
V_reduction_root = './'
|
| 31 |
+
model_backbones = ['IR_152', 'IR_152', 'ResNet_152', 'ResNet_152']
|
| 32 |
+
model_roots = ['models/Backbone_IR_152_Arcface_Epoch_112.pth', 'models/Backbone_IR_152_Cosface_Epoch_70.pth', \
|
| 33 |
+
'models/Backbone_ResNet_152_Arcface_Epoch_65.pth', 'models/Backbone_ResNet_152_Cosface_Epoch_68.pth']
|
| 34 |
+
direction = 1
|
| 35 |
+
crop_size = 112
|
| 36 |
+
scale = crop_size / 112.
|
| 37 |
+
|
| 38 |
+
models_attack, V_reduction, dim = prepare_models(model_backbones,
|
| 39 |
+
input_size,
|
| 40 |
+
model_roots,
|
| 41 |
+
kernel_size_gf,
|
| 42 |
+
sigma_gf,
|
| 43 |
+
combination,
|
| 44 |
+
using_subspace,
|
| 45 |
+
V_reduction_root)
|
| 46 |
+
|
| 47 |
+
def protect(img):
|
| 48 |
+
img = Image.fromarray(img)
|
| 49 |
+
reference = get_reference_facial_points(default_square=True) * scale
|
| 50 |
+
h,w,c = np.array(img).shape
|
| 51 |
+
|
| 52 |
+
_, landmarks = detect_faces(img)
|
| 53 |
+
facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
|
| 54 |
+
|
| 55 |
+
_, tfm = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size))
|
| 56 |
+
|
| 57 |
+
# pytorch transform
|
| 58 |
+
theta = normalize_transforms(tfm, w, h)
|
| 59 |
+
tensor_img = to_tensor(img).unsqueeze(0).to(device)
|
| 60 |
+
|
| 61 |
+
V_reduction = None
|
| 62 |
+
dim = 512
|
| 63 |
+
|
| 64 |
+
# Find gradient direction vector
|
| 65 |
+
dir_vec_extractor = get_ensemble(models = models_attack, sigma_gf=None, kernel_size_gf=None, combination=False, V_reduction=V_reduction, warp=True, theta_warp=theta)
|
| 66 |
+
dir_vec = prepare_dir_vec(dir_vec_extractor, tensor_img, dim, combination)
|
| 67 |
+
|
| 68 |
+
img_attacked = tensor_img.clone()
|
| 69 |
+
attack = Attack(models_attack, dim, attack_type,
|
| 70 |
+
eps, c_sim, net_type, lr, n_iters,
|
| 71 |
+
noise_size, n_starts, c_tv, sigma_gf,
|
| 72 |
+
kernel_size_gf, combination, warp=True,
|
| 73 |
+
theta_warp=theta, V_reduction=V_reduction)
|
| 74 |
+
img_attacked = attack.execute(tensor_img, dir_vec, direction).detach().cpu()
|
| 75 |
+
|
| 76 |
+
img_attacked_pil = transforms.ToPILImage()(img_attacked[0])
|
| 77 |
+
return img_attacked_pil
|
| 78 |
+
|
| 79 |
+
gr.Interface(fn=protect, inputs=gr.components.Image(shape=(512,512)),
|
| 80 |
+
outputs=gr.components.Image(type="pil"), allow_flagging="never").launch(show_error=True, quiet=False, share=True)
|
backbone/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
backbone/model_irse.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \
|
| 4 |
+
AdaptiveAvgPool2d, Sequential, Module
|
| 5 |
+
from collections import namedtuple
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152']
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Flatten(Module):
|
| 12 |
+
def forward(self, input):
|
| 13 |
+
return input.view(input.size(0), -1)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def l2_norm(input, axis=1):
|
| 17 |
+
norm = torch.norm(input, 2, axis, True)
|
| 18 |
+
output = torch.div(input, norm)
|
| 19 |
+
|
| 20 |
+
return output
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SEModule(Module):
|
| 24 |
+
def __init__(self, channels, reduction):
|
| 25 |
+
super(SEModule, self).__init__()
|
| 26 |
+
self.avg_pool = AdaptiveAvgPool2d(1)
|
| 27 |
+
self.fc1 = Conv2d(
|
| 28 |
+
channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
| 29 |
+
|
| 30 |
+
nn.init.xavier_uniform_(self.fc1.weight.data)
|
| 31 |
+
|
| 32 |
+
self.relu = ReLU(inplace=True)
|
| 33 |
+
self.fc2 = Conv2d(
|
| 34 |
+
channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
| 35 |
+
|
| 36 |
+
self.sigmoid = Sigmoid()
|
| 37 |
+
|
| 38 |
+
def forward(self, x):
|
| 39 |
+
module_input = x
|
| 40 |
+
x = self.avg_pool(x)
|
| 41 |
+
x = self.fc1(x)
|
| 42 |
+
x = self.relu(x)
|
| 43 |
+
x = self.fc2(x)
|
| 44 |
+
x = self.sigmoid(x)
|
| 45 |
+
|
| 46 |
+
return module_input * x
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class bottleneck_IR(Module):
|
| 50 |
+
def __init__(self, in_channel, depth, stride):
|
| 51 |
+
super(bottleneck_IR, self).__init__()
|
| 52 |
+
if in_channel == depth:
|
| 53 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
| 54 |
+
else:
|
| 55 |
+
self.shortcut_layer = Sequential(
|
| 56 |
+
Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
|
| 57 |
+
self.res_layer = Sequential(
|
| 58 |
+
BatchNorm2d(in_channel),
|
| 59 |
+
Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
|
| 60 |
+
Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth))
|
| 61 |
+
|
| 62 |
+
def forward(self, x):
|
| 63 |
+
shortcut = self.shortcut_layer(x)
|
| 64 |
+
res = self.res_layer(x)
|
| 65 |
+
|
| 66 |
+
return res + shortcut
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class bottleneck_IR_SE(Module):
|
| 70 |
+
def __init__(self, in_channel, depth, stride):
|
| 71 |
+
super(bottleneck_IR_SE, self).__init__()
|
| 72 |
+
if in_channel == depth:
|
| 73 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
| 74 |
+
else:
|
| 75 |
+
self.shortcut_layer = Sequential(
|
| 76 |
+
Conv2d(in_channel, depth, (1, 1), stride, bias=False),
|
| 77 |
+
BatchNorm2d(depth))
|
| 78 |
+
self.res_layer = Sequential(
|
| 79 |
+
BatchNorm2d(in_channel),
|
| 80 |
+
Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
|
| 81 |
+
PReLU(depth),
|
| 82 |
+
Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
|
| 83 |
+
BatchNorm2d(depth),
|
| 84 |
+
SEModule(depth, 16)
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
def forward(self, x):
|
| 88 |
+
shortcut = self.shortcut_layer(x)
|
| 89 |
+
res = self.res_layer(x)
|
| 90 |
+
|
| 91 |
+
return res + shortcut
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
|
| 95 |
+
'''A named tuple describing a ResNet block.'''
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def get_block(in_channel, depth, num_units, stride=2):
|
| 99 |
+
|
| 100 |
+
return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def get_blocks(num_layers):
|
| 104 |
+
if num_layers == 50:
|
| 105 |
+
blocks = [
|
| 106 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
| 107 |
+
get_block(in_channel=64, depth=128, num_units=4),
|
| 108 |
+
get_block(in_channel=128, depth=256, num_units=14),
|
| 109 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 110 |
+
]
|
| 111 |
+
elif num_layers == 100:
|
| 112 |
+
blocks = [
|
| 113 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
| 114 |
+
get_block(in_channel=64, depth=128, num_units=13),
|
| 115 |
+
get_block(in_channel=128, depth=256, num_units=30),
|
| 116 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 117 |
+
]
|
| 118 |
+
elif num_layers == 152:
|
| 119 |
+
blocks = [
|
| 120 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
| 121 |
+
get_block(in_channel=64, depth=128, num_units=8),
|
| 122 |
+
get_block(in_channel=128, depth=256, num_units=36),
|
| 123 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
return blocks
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class Backbone(Module):
|
| 130 |
+
def __init__(self, input_size, num_layers, mode='ir'):
|
| 131 |
+
super(Backbone, self).__init__()
|
| 132 |
+
assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
|
| 133 |
+
assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
|
| 134 |
+
assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
|
| 135 |
+
blocks = get_blocks(num_layers)
|
| 136 |
+
if mode == 'ir':
|
| 137 |
+
unit_module = bottleneck_IR
|
| 138 |
+
elif mode == 'ir_se':
|
| 139 |
+
unit_module = bottleneck_IR_SE
|
| 140 |
+
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
|
| 141 |
+
BatchNorm2d(64),
|
| 142 |
+
PReLU(64))
|
| 143 |
+
if input_size[0] == 112:
|
| 144 |
+
self.output_layer = Sequential(BatchNorm2d(512),
|
| 145 |
+
Dropout(),
|
| 146 |
+
Flatten(),
|
| 147 |
+
Linear(512 * 7 * 7, 512),
|
| 148 |
+
BatchNorm1d(512))
|
| 149 |
+
else:
|
| 150 |
+
self.output_layer = Sequential(BatchNorm2d(512),
|
| 151 |
+
Dropout(),
|
| 152 |
+
Flatten(),
|
| 153 |
+
Linear(512 * 14 * 14, 512),
|
| 154 |
+
BatchNorm1d(512))
|
| 155 |
+
|
| 156 |
+
modules = []
|
| 157 |
+
for block in blocks:
|
| 158 |
+
for bottleneck in block:
|
| 159 |
+
modules.append(
|
| 160 |
+
unit_module(bottleneck.in_channel,
|
| 161 |
+
bottleneck.depth,
|
| 162 |
+
bottleneck.stride))
|
| 163 |
+
self.body = Sequential(*modules)
|
| 164 |
+
|
| 165 |
+
self._initialize_weights()
|
| 166 |
+
|
| 167 |
+
def forward(self, x):
|
| 168 |
+
x = self.input_layer(x)
|
| 169 |
+
x = self.body(x)
|
| 170 |
+
x = self.output_layer(x)
|
| 171 |
+
|
| 172 |
+
return x
|
| 173 |
+
|
| 174 |
+
def _initialize_weights(self):
|
| 175 |
+
for m in self.modules():
|
| 176 |
+
if isinstance(m, nn.Conv2d):
|
| 177 |
+
nn.init.xavier_uniform_(m.weight.data)
|
| 178 |
+
if m.bias is not None:
|
| 179 |
+
m.bias.data.zero_()
|
| 180 |
+
elif isinstance(m, nn.BatchNorm2d):
|
| 181 |
+
m.weight.data.fill_(1)
|
| 182 |
+
m.bias.data.zero_()
|
| 183 |
+
elif isinstance(m, nn.BatchNorm1d):
|
| 184 |
+
m.weight.data.fill_(1)
|
| 185 |
+
m.bias.data.zero_()
|
| 186 |
+
elif isinstance(m, nn.Linear):
|
| 187 |
+
nn.init.xavier_uniform_(m.weight.data)
|
| 188 |
+
if m.bias is not None:
|
| 189 |
+
m.bias.data.zero_()
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def IR_50(input_size):
|
| 193 |
+
"""Constructs a ir-50 model.
|
| 194 |
+
"""
|
| 195 |
+
model = Backbone(input_size, 50, 'ir')
|
| 196 |
+
|
| 197 |
+
return model
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def IR_101(input_size):
|
| 201 |
+
"""Constructs a ir-101 model.
|
| 202 |
+
"""
|
| 203 |
+
model = Backbone(input_size, 100, 'ir')
|
| 204 |
+
|
| 205 |
+
return model
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def IR_152(input_size):
|
| 209 |
+
"""Constructs a ir-152 model.
|
| 210 |
+
"""
|
| 211 |
+
model = Backbone(input_size, 152, 'ir')
|
| 212 |
+
|
| 213 |
+
return model
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def IR_SE_50(input_size):
|
| 217 |
+
"""Constructs a ir_se-50 model.
|
| 218 |
+
"""
|
| 219 |
+
model = Backbone(input_size, 50, 'ir_se')
|
| 220 |
+
|
| 221 |
+
return model
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def IR_SE_101(input_size):
|
| 225 |
+
"""Constructs a ir_se-101 model.
|
| 226 |
+
"""
|
| 227 |
+
model = Backbone(input_size, 100, 'ir_se')
|
| 228 |
+
|
| 229 |
+
return model
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def IR_SE_152(input_size):
|
| 233 |
+
"""Constructs a ir_se-152 model.
|
| 234 |
+
"""
|
| 235 |
+
model = Backbone(input_size, 152, 'ir_se')
|
| 236 |
+
|
| 237 |
+
return model
|
backbone/model_resnet.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, ReLU, Dropout, MaxPool2d, Sequential, Module
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
# Support: ['ResNet_50', 'ResNet_101', 'ResNet_152']
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def conv3x3(in_planes, out_planes, stride = 1):
|
| 9 |
+
"""3x3 convolution with padding"""
|
| 10 |
+
|
| 11 |
+
return Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,
|
| 12 |
+
padding = 1, bias = False)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def conv1x1(in_planes, out_planes, stride = 1):
|
| 16 |
+
"""1x1 convolution"""
|
| 17 |
+
|
| 18 |
+
return Conv2d(in_planes, out_planes, kernel_size = 1, stride = stride, bias = False)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class BasicBlock(Module):
|
| 22 |
+
expansion = 1
|
| 23 |
+
|
| 24 |
+
def __init__(self, inplanes, planes, stride = 1, downsample = None):
|
| 25 |
+
super(BasicBlock, self).__init__()
|
| 26 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
| 27 |
+
self.bn1 = BatchNorm2d(planes)
|
| 28 |
+
self.relu = ReLU(inplace = True)
|
| 29 |
+
self.conv2 = conv3x3(planes, planes)
|
| 30 |
+
self.bn2 = BatchNorm2d(planes)
|
| 31 |
+
self.downsample = downsample
|
| 32 |
+
self.stride = stride
|
| 33 |
+
|
| 34 |
+
def forward(self, x):
|
| 35 |
+
identity = x
|
| 36 |
+
|
| 37 |
+
out = self.conv1(x)
|
| 38 |
+
out = self.bn1(out)
|
| 39 |
+
out = self.relu(out)
|
| 40 |
+
|
| 41 |
+
out = self.conv2(out)
|
| 42 |
+
out = self.bn2(out)
|
| 43 |
+
|
| 44 |
+
if self.downsample is not None:
|
| 45 |
+
identity = self.downsample(x)
|
| 46 |
+
|
| 47 |
+
out += identity
|
| 48 |
+
out = self.relu(out)
|
| 49 |
+
|
| 50 |
+
return out
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Bottleneck(Module):
|
| 54 |
+
expansion = 4
|
| 55 |
+
|
| 56 |
+
def __init__(self, inplanes, planes, stride = 1, downsample = None):
|
| 57 |
+
super(Bottleneck, self).__init__()
|
| 58 |
+
self.conv1 = conv1x1(inplanes, planes)
|
| 59 |
+
self.bn1 = BatchNorm2d(planes)
|
| 60 |
+
self.conv2 = conv3x3(planes, planes, stride)
|
| 61 |
+
self.bn2 = BatchNorm2d(planes)
|
| 62 |
+
self.conv3 = conv1x1(planes, planes * self.expansion)
|
| 63 |
+
self.bn3 = BatchNorm2d(planes * self.expansion)
|
| 64 |
+
self.relu = ReLU(inplace = True)
|
| 65 |
+
self.downsample = downsample
|
| 66 |
+
self.stride = stride
|
| 67 |
+
|
| 68 |
+
def forward(self, x):
|
| 69 |
+
identity = x
|
| 70 |
+
|
| 71 |
+
out = self.conv1(x)
|
| 72 |
+
out = self.bn1(out)
|
| 73 |
+
out = self.relu(out)
|
| 74 |
+
|
| 75 |
+
out = self.conv2(out)
|
| 76 |
+
out = self.bn2(out)
|
| 77 |
+
out = self.relu(out)
|
| 78 |
+
|
| 79 |
+
out = self.conv3(out)
|
| 80 |
+
out = self.bn3(out)
|
| 81 |
+
|
| 82 |
+
if self.downsample is not None:
|
| 83 |
+
identity = self.downsample(x)
|
| 84 |
+
|
| 85 |
+
out += identity
|
| 86 |
+
out = self.relu(out)
|
| 87 |
+
|
| 88 |
+
return out
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class ResNet(Module):
|
| 92 |
+
|
| 93 |
+
def __init__(self, input_size, block, layers, zero_init_residual = True):
|
| 94 |
+
super(ResNet, self).__init__()
|
| 95 |
+
assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
|
| 96 |
+
self.inplanes = 64
|
| 97 |
+
self.conv1 = Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
|
| 98 |
+
self.bn1 = BatchNorm2d(64)
|
| 99 |
+
self.relu = ReLU(inplace = True)
|
| 100 |
+
self.maxpool = MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
|
| 101 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
| 102 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride = 2)
|
| 103 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride = 2)
|
| 104 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride = 2)
|
| 105 |
+
|
| 106 |
+
self.bn_o1 = BatchNorm2d(2048)
|
| 107 |
+
self.dropout = Dropout()
|
| 108 |
+
if input_size[0] == 112:
|
| 109 |
+
self.fc = Linear(2048 * 4 * 4, 512)
|
| 110 |
+
else:
|
| 111 |
+
self.fc = Linear(2048 * 8 * 8, 512)
|
| 112 |
+
self.bn_o2 = BatchNorm1d(512)
|
| 113 |
+
|
| 114 |
+
for m in self.modules():
|
| 115 |
+
if isinstance(m, Conv2d):
|
| 116 |
+
nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
|
| 117 |
+
elif isinstance(m, BatchNorm2d):
|
| 118 |
+
nn.init.constant_(m.weight, 1)
|
| 119 |
+
nn.init.constant_(m.bias, 0)
|
| 120 |
+
|
| 121 |
+
# Zero-initialize the last BN in each residual branch,
|
| 122 |
+
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
| 123 |
+
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
| 124 |
+
if zero_init_residual:
|
| 125 |
+
for m in self.modules():
|
| 126 |
+
if isinstance(m, Bottleneck):
|
| 127 |
+
nn.init.constant_(m.bn3.weight, 0)
|
| 128 |
+
elif isinstance(m, BasicBlock):
|
| 129 |
+
nn.init.constant_(m.bn2.weight, 0)
|
| 130 |
+
|
| 131 |
+
def _make_layer(self, block, planes, blocks, stride = 1):
|
| 132 |
+
downsample = None
|
| 133 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 134 |
+
downsample = Sequential(
|
| 135 |
+
conv1x1(self.inplanes, planes * block.expansion, stride),
|
| 136 |
+
BatchNorm2d(planes * block.expansion),
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
layers = []
|
| 140 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
| 141 |
+
self.inplanes = planes * block.expansion
|
| 142 |
+
for _ in range(1, blocks):
|
| 143 |
+
layers.append(block(self.inplanes, planes))
|
| 144 |
+
|
| 145 |
+
return Sequential(*layers)
|
| 146 |
+
|
| 147 |
+
def forward(self, x):
|
| 148 |
+
x = self.conv1(x)
|
| 149 |
+
x = self.bn1(x)
|
| 150 |
+
x = self.relu(x)
|
| 151 |
+
x = self.maxpool(x)
|
| 152 |
+
|
| 153 |
+
x = self.layer1(x)
|
| 154 |
+
x = self.layer2(x)
|
| 155 |
+
x = self.layer3(x)
|
| 156 |
+
x = self.layer4(x)
|
| 157 |
+
|
| 158 |
+
x = self.bn_o1(x)
|
| 159 |
+
x = self.dropout(x)
|
| 160 |
+
x = x.view(x.size(0), -1)
|
| 161 |
+
x = self.fc(x)
|
| 162 |
+
x = self.bn_o2(x)
|
| 163 |
+
|
| 164 |
+
return x
|
| 165 |
+
|
| 166 |
+
def ResNet_18(input_size, **kwargs):
|
| 167 |
+
"""Constructs a ResNet-50 model.
|
| 168 |
+
"""
|
| 169 |
+
model = ResNet(input_size, Bottleneck, [2, 2, 2, 2], **kwargs)
|
| 170 |
+
|
| 171 |
+
return model
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def ResNet_50(input_size, **kwargs):
|
| 175 |
+
"""Constructs a ResNet-50 model.
|
| 176 |
+
"""
|
| 177 |
+
model = ResNet(input_size, Bottleneck, [3, 4, 6, 3], **kwargs)
|
| 178 |
+
|
| 179 |
+
return model
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def ResNet_101(input_size, **kwargs):
|
| 183 |
+
"""Constructs a ResNet-101 model.
|
| 184 |
+
"""
|
| 185 |
+
model = ResNet(input_size, Bottleneck, [3, 4, 23, 3], **kwargs)
|
| 186 |
+
|
| 187 |
+
return model
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def ResNet_152(input_size, **kwargs):
|
| 191 |
+
"""Constructs a ResNet-152 model.
|
| 192 |
+
"""
|
| 193 |
+
model = ResNet(input_size, Bottleneck, [3, 8, 36, 3], **kwargs)
|
| 194 |
+
|
| 195 |
+
return model
|
backbone/models2.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout2d, Dropout, AvgPool2d, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module, Parameter
|
| 2 |
+
import torch.nn.functional as F
|
| 3 |
+
import torch
|
| 4 |
+
from collections import namedtuple
|
| 5 |
+
import math
|
| 6 |
+
import pdb
|
| 7 |
+
|
| 8 |
+
################################## Original Arcface Model #############################################################
|
| 9 |
+
|
| 10 |
+
class Flatten(Module):
|
| 11 |
+
def forward(self, input):
|
| 12 |
+
return input.view(input.size(0), -1)
|
| 13 |
+
|
| 14 |
+
def l2_norm(input,axis=1):
|
| 15 |
+
norm = torch.norm(input,2,axis,True)
|
| 16 |
+
output = torch.div(input, norm)
|
| 17 |
+
return output
|
| 18 |
+
|
| 19 |
+
class SEModule(Module):
|
| 20 |
+
def __init__(self, channels, reduction):
|
| 21 |
+
super(SEModule, self).__init__()
|
| 22 |
+
self.avg_pool = AdaptiveAvgPool2d(1)
|
| 23 |
+
self.fc1 = Conv2d(
|
| 24 |
+
channels, channels // reduction, kernel_size=1, padding=0 ,bias=False)
|
| 25 |
+
self.relu = ReLU(inplace=True)
|
| 26 |
+
self.fc2 = Conv2d(
|
| 27 |
+
channels // reduction, channels, kernel_size=1, padding=0 ,bias=False)
|
| 28 |
+
self.sigmoid = Sigmoid()
|
| 29 |
+
|
| 30 |
+
def forward(self, x):
|
| 31 |
+
module_input = x
|
| 32 |
+
x = self.avg_pool(x)
|
| 33 |
+
x = self.fc1(x)
|
| 34 |
+
x = self.relu(x)
|
| 35 |
+
x = self.fc2(x)
|
| 36 |
+
x = self.sigmoid(x)
|
| 37 |
+
return module_input * x
|
| 38 |
+
|
| 39 |
+
class bottleneck_IR(Module):
|
| 40 |
+
def __init__(self, in_channel, depth, stride):
|
| 41 |
+
super(bottleneck_IR, self).__init__()
|
| 42 |
+
if in_channel == depth:
|
| 43 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
| 44 |
+
else:
|
| 45 |
+
self.shortcut_layer = Sequential(
|
| 46 |
+
Conv2d(in_channel, depth, (1, 1), stride ,bias=False), BatchNorm2d(depth))
|
| 47 |
+
self.res_layer = Sequential(
|
| 48 |
+
BatchNorm2d(in_channel),
|
| 49 |
+
Conv2d(in_channel, depth, (3, 3), (1, 1), 1 ,bias=False), PReLU(depth),
|
| 50 |
+
Conv2d(depth, depth, (3, 3), stride, 1 ,bias=False), BatchNorm2d(depth))
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
shortcut = self.shortcut_layer(x)
|
| 54 |
+
res = self.res_layer(x)
|
| 55 |
+
return res + shortcut
|
| 56 |
+
|
| 57 |
+
class bottleneck_IR_SE(Module):
|
| 58 |
+
def __init__(self, in_channel, depth, stride):
|
| 59 |
+
super(bottleneck_IR_SE, self).__init__()
|
| 60 |
+
if in_channel == depth:
|
| 61 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
| 62 |
+
else:
|
| 63 |
+
self.shortcut_layer = Sequential(
|
| 64 |
+
Conv2d(in_channel, depth, (1, 1), stride ,bias=False),
|
| 65 |
+
BatchNorm2d(depth))
|
| 66 |
+
self.res_layer = Sequential(
|
| 67 |
+
BatchNorm2d(in_channel),
|
| 68 |
+
Conv2d(in_channel, depth, (3,3), (1,1),1 ,bias=False),
|
| 69 |
+
PReLU(depth),
|
| 70 |
+
Conv2d(depth, depth, (3,3), stride, 1 ,bias=False),
|
| 71 |
+
BatchNorm2d(depth),
|
| 72 |
+
SEModule(depth,16)
|
| 73 |
+
)
|
| 74 |
+
def forward(self,x):
|
| 75 |
+
shortcut = self.shortcut_layer(x)
|
| 76 |
+
res = self.res_layer(x)
|
| 77 |
+
return res + shortcut
|
| 78 |
+
|
| 79 |
+
class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
|
| 80 |
+
'''A named tuple describing a ResNet block.'''
|
| 81 |
+
|
| 82 |
+
def get_block(in_channel, depth, num_units, stride = 2):
|
| 83 |
+
return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units-1)]
|
| 84 |
+
|
| 85 |
+
def get_blocks(num_layers):
|
| 86 |
+
if num_layers == 50:
|
| 87 |
+
blocks = [
|
| 88 |
+
get_block(in_channel=64, depth=64, num_units = 3),
|
| 89 |
+
get_block(in_channel=64, depth=128, num_units=4),
|
| 90 |
+
get_block(in_channel=128, depth=256, num_units=14),
|
| 91 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 92 |
+
]
|
| 93 |
+
elif num_layers == 100:
|
| 94 |
+
blocks = [
|
| 95 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
| 96 |
+
get_block(in_channel=64, depth=128, num_units=13),
|
| 97 |
+
get_block(in_channel=128, depth=256, num_units=30),
|
| 98 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 99 |
+
]
|
| 100 |
+
elif num_layers == 152:
|
| 101 |
+
blocks = [
|
| 102 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
| 103 |
+
get_block(in_channel=64, depth=128, num_units=8),
|
| 104 |
+
get_block(in_channel=128, depth=256, num_units=36),
|
| 105 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
| 106 |
+
]
|
| 107 |
+
return blocks
|
| 108 |
+
|
| 109 |
+
class Backbone(Module):
|
| 110 |
+
def __init__(self, num_layers, drop_ratio, mode='ir'):
|
| 111 |
+
super(Backbone, self).__init__()
|
| 112 |
+
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
|
| 113 |
+
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
|
| 114 |
+
blocks = get_blocks(num_layers)
|
| 115 |
+
if mode == 'ir':
|
| 116 |
+
unit_module = bottleneck_IR
|
| 117 |
+
elif mode == 'ir_se':
|
| 118 |
+
unit_module = bottleneck_IR_SE
|
| 119 |
+
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1 ,bias=False),
|
| 120 |
+
BatchNorm2d(64),
|
| 121 |
+
PReLU(64))
|
| 122 |
+
self.output_layer = Sequential(BatchNorm2d(512),
|
| 123 |
+
Dropout(drop_ratio),
|
| 124 |
+
Flatten(),
|
| 125 |
+
Linear(512 * 7 * 7, 512),
|
| 126 |
+
BatchNorm1d(512))
|
| 127 |
+
modules = []
|
| 128 |
+
for block in blocks:
|
| 129 |
+
for bottleneck in block:
|
| 130 |
+
modules.append(
|
| 131 |
+
unit_module(bottleneck.in_channel,
|
| 132 |
+
bottleneck.depth,
|
| 133 |
+
bottleneck.stride))
|
| 134 |
+
self.body = Sequential(*modules)
|
| 135 |
+
|
| 136 |
+
def forward(self,x):
|
| 137 |
+
x = self.input_layer(x)
|
| 138 |
+
x = self.body(x)
|
| 139 |
+
x = self.output_layer(x)
|
| 140 |
+
return l2_norm(x)
|
| 141 |
+
|
| 142 |
+
################################## MobileFaceNet #############################################################
|
| 143 |
+
|
| 144 |
+
class Conv_block(Module):
|
| 145 |
+
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
| 146 |
+
super(Conv_block, self).__init__()
|
| 147 |
+
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
| 148 |
+
self.bn = BatchNorm2d(out_c)
|
| 149 |
+
self.prelu = PReLU(out_c)
|
| 150 |
+
def forward(self, x):
|
| 151 |
+
x = self.conv(x)
|
| 152 |
+
x = self.bn(x)
|
| 153 |
+
x = self.prelu(x)
|
| 154 |
+
return x
|
| 155 |
+
|
| 156 |
+
class Linear_block(Module):
|
| 157 |
+
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
| 158 |
+
super(Linear_block, self).__init__()
|
| 159 |
+
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
| 160 |
+
self.bn = BatchNorm2d(out_c)
|
| 161 |
+
def forward(self, x):
|
| 162 |
+
x = self.conv(x)
|
| 163 |
+
x = self.bn(x)
|
| 164 |
+
return x
|
| 165 |
+
|
| 166 |
+
class Depth_Wise(Module):
|
| 167 |
+
def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
| 168 |
+
super(Depth_Wise, self).__init__()
|
| 169 |
+
self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
| 170 |
+
self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
|
| 171 |
+
self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
| 172 |
+
self.residual = residual
|
| 173 |
+
def forward(self, x):
|
| 174 |
+
if self.residual:
|
| 175 |
+
short_cut = x
|
| 176 |
+
x = self.conv(x)
|
| 177 |
+
x = self.conv_dw(x)
|
| 178 |
+
x = self.project(x)
|
| 179 |
+
if self.residual:
|
| 180 |
+
output = short_cut + x
|
| 181 |
+
else:
|
| 182 |
+
output = x
|
| 183 |
+
return output
|
| 184 |
+
|
| 185 |
+
class Residual(Module):
|
| 186 |
+
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
| 187 |
+
super(Residual, self).__init__()
|
| 188 |
+
modules = []
|
| 189 |
+
for _ in range(num_block):
|
| 190 |
+
modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
|
| 191 |
+
self.model = Sequential(*modules)
|
| 192 |
+
def forward(self, x):
|
| 193 |
+
return self.model(x)
|
| 194 |
+
|
| 195 |
+
class MobileFaceNet(Module):
|
| 196 |
+
def __init__(self, embedding_size):
|
| 197 |
+
super(MobileFaceNet, self).__init__()
|
| 198 |
+
self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
| 199 |
+
self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
|
| 200 |
+
self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
|
| 201 |
+
self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
| 202 |
+
self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
|
| 203 |
+
self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
| 204 |
+
self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
|
| 205 |
+
self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
| 206 |
+
self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
| 207 |
+
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0))
|
| 208 |
+
self.conv_6_flatten = Flatten()
|
| 209 |
+
self.linear = Linear(512, embedding_size, bias=False)
|
| 210 |
+
self.bn = BatchNorm1d(embedding_size)
|
| 211 |
+
|
| 212 |
+
def forward(self, x):
|
| 213 |
+
out = self.conv1(x)
|
| 214 |
+
|
| 215 |
+
out = self.conv2_dw(out)
|
| 216 |
+
|
| 217 |
+
out = self.conv_23(out)
|
| 218 |
+
|
| 219 |
+
out = self.conv_3(out)
|
| 220 |
+
|
| 221 |
+
out = self.conv_34(out)
|
| 222 |
+
|
| 223 |
+
out = self.conv_4(out)
|
| 224 |
+
|
| 225 |
+
out = self.conv_45(out)
|
| 226 |
+
|
| 227 |
+
out = self.conv_5(out)
|
| 228 |
+
|
| 229 |
+
out = self.conv_6_sep(out)
|
| 230 |
+
|
| 231 |
+
out = self.conv_6_dw(out)
|
| 232 |
+
|
| 233 |
+
out = self.conv_6_flatten(out)
|
| 234 |
+
|
| 235 |
+
out = self.linear(out)
|
| 236 |
+
|
| 237 |
+
out = self.bn(out)
|
| 238 |
+
return l2_norm(out)
|
| 239 |
+
|
| 240 |
+
################################## Arcface head #############################################################
|
| 241 |
+
|
| 242 |
+
class Arcface(Module):
|
| 243 |
+
# implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
|
| 244 |
+
def __init__(self, embedding_size=512, classnum=51332, s=64., m=0.5):
|
| 245 |
+
super(Arcface, self).__init__()
|
| 246 |
+
self.classnum = classnum
|
| 247 |
+
self.kernel = Parameter(torch.Tensor(embedding_size,classnum))
|
| 248 |
+
# initial kernel
|
| 249 |
+
self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
|
| 250 |
+
self.m = m # the margin value, default is 0.5
|
| 251 |
+
self.s = s # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369
|
| 252 |
+
self.cos_m = math.cos(m)
|
| 253 |
+
self.sin_m = math.sin(m)
|
| 254 |
+
self.mm = self.sin_m * m # issue 1
|
| 255 |
+
self.threshold = math.cos(math.pi - m)
|
| 256 |
+
def forward(self, embbedings, label):
|
| 257 |
+
# weights norm
|
| 258 |
+
nB = len(embbedings)
|
| 259 |
+
kernel_norm = l2_norm(self.kernel,axis=0)
|
| 260 |
+
# cos(theta+m)
|
| 261 |
+
cos_theta = torch.mm(embbedings,kernel_norm)
|
| 262 |
+
# output = torch.mm(embbedings,kernel_norm)
|
| 263 |
+
cos_theta = cos_theta.clamp(-1,1) # for numerical stability
|
| 264 |
+
cos_theta_2 = torch.pow(cos_theta, 2)
|
| 265 |
+
sin_theta_2 = 1 - cos_theta_2
|
| 266 |
+
sin_theta = torch.sqrt(sin_theta_2)
|
| 267 |
+
cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m)
|
| 268 |
+
# this condition controls the theta+m should in range [0, pi]
|
| 269 |
+
# 0<=theta+m<=pi
|
| 270 |
+
# -m<=theta<=pi-m
|
| 271 |
+
cond_v = cos_theta - self.threshold
|
| 272 |
+
cond_mask = cond_v <= 0
|
| 273 |
+
keep_val = (cos_theta - self.mm) # when theta not in [0,pi], use cosface instead
|
| 274 |
+
cos_theta_m[cond_mask] = keep_val[cond_mask]
|
| 275 |
+
output = cos_theta * 1.0 # a little bit hacky way to prevent in_place operation on cos_theta
|
| 276 |
+
idx_ = torch.arange(0, nB, dtype=torch.long)
|
| 277 |
+
output[idx_, label] = cos_theta_m[idx_, label]
|
| 278 |
+
output *= self.s # scale up in order to make softmax work, first introduced in normface
|
| 279 |
+
return output
|
| 280 |
+
|
| 281 |
+
################################## Cosface head #############################################################
|
| 282 |
+
|
| 283 |
+
class Am_softmax(Module):
|
| 284 |
+
# implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
|
| 285 |
+
def __init__(self,embedding_size=512,classnum=51332):
|
| 286 |
+
super(Am_softmax, self).__init__()
|
| 287 |
+
self.classnum = classnum
|
| 288 |
+
self.kernel = Parameter(torch.Tensor(embedding_size,classnum))
|
| 289 |
+
# initial kernel
|
| 290 |
+
self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
|
| 291 |
+
self.m = 0.35 # additive margin recommended by the paper
|
| 292 |
+
self.s = 30. # see normface https://arxiv.org/abs/1704.06369
|
| 293 |
+
def forward(self,embbedings,label):
|
| 294 |
+
kernel_norm = l2_norm(self.kernel,axis=0)
|
| 295 |
+
cos_theta = torch.mm(embbedings,kernel_norm)
|
| 296 |
+
cos_theta = cos_theta.clamp(-1,1) # for numerical stability
|
| 297 |
+
phi = cos_theta - self.m
|
| 298 |
+
label = label.view(-1,1) #size=(B,1)
|
| 299 |
+
index = cos_theta.data * 0.0 #size=(B,Classnum)
|
| 300 |
+
index.scatter_(1,label.data.view(-1,1),1)
|
| 301 |
+
index = index.byte()
|
| 302 |
+
output = cos_theta * 1.0
|
| 303 |
+
output[index] = phi[index] #only change the correct predicted output
|
| 304 |
+
output *= self.s # scale up in order to make softmax work, first introduced in normface
|
| 305 |
+
return output
|
models/Backbone_IR_152_Arcface_Epoch_112.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e906b5405eba33e2b90887832ebfc72369df4e349fef65d354d3e6ca9e709fb
|
| 3 |
+
size 134
|
models/Backbone_IR_152_Cosface_Epoch_70.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c89ffc23eec6c3adb66067a74ffb42b3d90a54ee5e8da2d058190bc65b4333b
|
| 3 |
+
size 134
|
models/Backbone_ResNet_152_Arcface_Epoch_65.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64fd31b1bb52e89b10662d4d2a33c246b7058311b5eb4065509584f15322460b
|
| 3 |
+
size 134
|
models/Backbone_ResNet_152_Cosface_Epoch_68.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e13a1c8d6df7f6999a54e56f3e6398ed4da17ca8d63b665d0d749ee9dc3aedad
|
| 3 |
+
size 134
|
requirements-min.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scikit-learn
|
| 2 |
+
git+https://github.com/logasja/lpips-pytorch.git
|
| 3 |
+
gradio
|
| 4 |
+
pillow
|
| 5 |
+
opencv-python
|
requirements.txt
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==23.1.0
|
| 2 |
+
aiohttp==3.8.1
|
| 3 |
+
aiosignal==1.2.0
|
| 4 |
+
altair==4.2.2
|
| 5 |
+
analytics-python==1.4.0
|
| 6 |
+
anyio==3.6.1
|
| 7 |
+
asgiref==3.5.2
|
| 8 |
+
async-timeout==4.0.2
|
| 9 |
+
attrs==21.4.0
|
| 10 |
+
backoff==1.10.0
|
| 11 |
+
bcrypt==3.2.2
|
| 12 |
+
blis==0.7.7
|
| 13 |
+
catalogue==2.0.7
|
| 14 |
+
certifi==2022.6.15
|
| 15 |
+
cffi==1.15.0
|
| 16 |
+
charset-normalizer==2.0.12
|
| 17 |
+
click==8.1.3
|
| 18 |
+
confection==0.0.4
|
| 19 |
+
contourpy==1.0.7
|
| 20 |
+
cryptography==37.0.2
|
| 21 |
+
cycler==0.11.0
|
| 22 |
+
cymem==2.0.6
|
| 23 |
+
entrypoints==0.4
|
| 24 |
+
fastapi==0.78.0
|
| 25 |
+
fastcore==1.4.5
|
| 26 |
+
fastdownload==0.0.6
|
| 27 |
+
fastprogress==1.0.2
|
| 28 |
+
ffmpy==0.3.0
|
| 29 |
+
filelock==3.9.1
|
| 30 |
+
fonttools==4.33.3
|
| 31 |
+
frozenlist==1.3.0
|
| 32 |
+
fsspec==2022.5.0
|
| 33 |
+
gradio==3.21.0
|
| 34 |
+
h11==0.13.0
|
| 35 |
+
httpcore==0.16.3
|
| 36 |
+
httpx==0.23.3
|
| 37 |
+
huggingface-hub==0.13.2
|
| 38 |
+
idna==3.3
|
| 39 |
+
Jinja2==3.1.2
|
| 40 |
+
joblib==1.1.0
|
| 41 |
+
jsonschema==4.17.3
|
| 42 |
+
kiwisolver==1.4.3
|
| 43 |
+
langcodes==3.3.0
|
| 44 |
+
linkify-it-py==1.0.3
|
| 45 |
+
git+https://github.com/logasja/lpips-pytorch.git
|
| 46 |
+
markdown-it-py==2.1.0
|
| 47 |
+
MarkupSafe==2.1.1
|
| 48 |
+
matplotlib==3.5.2
|
| 49 |
+
mdit-py-plugins==0.3.0
|
| 50 |
+
mdurl==0.1.1
|
| 51 |
+
monotonic==1.6
|
| 52 |
+
multidict==6.0.2
|
| 53 |
+
murmurhash==1.0.7
|
| 54 |
+
numpy==1.22.4
|
| 55 |
+
nvidia-cublas-cu11==11.10.3.66
|
| 56 |
+
nvidia-cuda-nvrtc-cu11==11.7.99
|
| 57 |
+
nvidia-cuda-runtime-cu11==11.7.99
|
| 58 |
+
nvidia-cudnn-cu11==8.5.0.96
|
| 59 |
+
opencv-python==4.6.0.66
|
| 60 |
+
orjson==3.7.2
|
| 61 |
+
packaging==21.3
|
| 62 |
+
pandas==1.4.2
|
| 63 |
+
paramiko==2.11.0
|
| 64 |
+
pathy==0.6.1
|
| 65 |
+
Pillow==9.1.1
|
| 66 |
+
preshed==3.0.6
|
| 67 |
+
pycparser==2.21
|
| 68 |
+
pycryptodome==3.14.1
|
| 69 |
+
pydantic==1.8.2
|
| 70 |
+
pydub==0.25.1
|
| 71 |
+
PyNaCl==1.5.0
|
| 72 |
+
pyparsing==3.0.9
|
| 73 |
+
pyrsistent==0.19.3
|
| 74 |
+
python-dateutil==2.8.2
|
| 75 |
+
python-multipart==0.0.5
|
| 76 |
+
pytz==2022.1
|
| 77 |
+
PyYAML==6.0
|
| 78 |
+
requests==2.28.0
|
| 79 |
+
rfc3986==1.5.0
|
| 80 |
+
scikit-learn==1.1.1
|
| 81 |
+
scipy==1.8.1
|
| 82 |
+
six==1.16.0
|
| 83 |
+
smart-open==5.2.1
|
| 84 |
+
sniffio==1.2.0
|
| 85 |
+
spacy==3.3.1
|
| 86 |
+
spacy-legacy==3.0.9
|
| 87 |
+
spacy-loggers==1.0.2
|
| 88 |
+
srsly==2.4.3
|
| 89 |
+
starlette==0.19.1
|
| 90 |
+
thinc==8.0.17
|
| 91 |
+
threadpoolctl==3.1.0
|
| 92 |
+
toolz==0.12.0
|
| 93 |
+
torch==1.11.0
|
| 94 |
+
torchvision==0.12.0
|
| 95 |
+
tqdm==4.64.0
|
| 96 |
+
typer==0.4.1
|
| 97 |
+
typing_extensions==4.2.0
|
| 98 |
+
uc-micro-py==1.0.1
|
| 99 |
+
urllib3==1.26.9
|
| 100 |
+
uvicorn==0.17.6
|
| 101 |
+
wasabi==0.9.1
|
| 102 |
+
websockets==10.4
|
| 103 |
+
yarl==1.7.2
|
util/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
util/attack_utils.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Helper function for extracting features from pre-trained models
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import math
|
| 5 |
+
import numbers
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
import torchvision.transforms as transforms
|
| 10 |
+
from torch.autograd import Variable
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import numpy as np
|
| 14 |
+
from util.feature_extraction_utils import warp_image, normalize_batch
|
| 15 |
+
from util.prepare_utils import get_ensemble, extract_features
|
| 16 |
+
from lpips_pytorch import LPIPS, lpips
|
| 17 |
+
from tqdm import tqdm
|
| 18 |
+
|
| 19 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 20 |
+
tensor_transform = transforms.ToTensor()
|
| 21 |
+
pil_transform = transforms.ToPILImage()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class Attack(nn.Module):
|
| 29 |
+
|
| 30 |
+
def __init__(self, models, dim, attack_type, eps, c_sim=0.5, net_type='alex', lr=0.05,
|
| 31 |
+
n_iters=100, noise_size=0.001, n_starts=10, c_tv=None, sigma_gf=None, kernel_size_gf=None,
|
| 32 |
+
combination=False, warp=False, theta_warp=None, V_reduction=None):
|
| 33 |
+
super(Attack, self).__init__()
|
| 34 |
+
self.extractor_ens = get_ensemble(models, sigma_gf, kernel_size_gf, combination, V_reduction, warp, theta_warp)
|
| 35 |
+
#print("There are '{}'' models in the attack ensemble".format(len(self.extractor_ens)))
|
| 36 |
+
self.dim = dim
|
| 37 |
+
self.eps = eps
|
| 38 |
+
self.c_sim = c_sim
|
| 39 |
+
self.net_type = net_type
|
| 40 |
+
self.lr = lr
|
| 41 |
+
self.n_iters = n_iters
|
| 42 |
+
self.noise_size = noise_size
|
| 43 |
+
self.n_starts = n_starts
|
| 44 |
+
self.c_tv=None
|
| 45 |
+
self.attack_type = attack_type
|
| 46 |
+
self.warp = warp
|
| 47 |
+
self.theta_warp = theta_warp
|
| 48 |
+
if self.attack_type == 'lpips':
|
| 49 |
+
self.lpips_loss = LPIPS(self.net_type).to(device)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def execute(self, images, dir_vec, direction):
|
| 53 |
+
|
| 54 |
+
images = Variable(images).to(device)
|
| 55 |
+
dir_vec = dir_vec.to(device)
|
| 56 |
+
# take norm wrt dim
|
| 57 |
+
dir_vec_norm = dir_vec.norm(dim = 2).unsqueeze(2).to(device)
|
| 58 |
+
dist = torch.zeros(images.shape[0]).to(device)
|
| 59 |
+
adv_images = images.detach().clone()
|
| 60 |
+
|
| 61 |
+
if self.warp:
|
| 62 |
+
self.face_img = warp_image(images, self.theta_warp)
|
| 63 |
+
|
| 64 |
+
for start in range(self.n_starts):
|
| 65 |
+
# update adversarial images old and distance old
|
| 66 |
+
adv_images_old = adv_images.detach().clone()
|
| 67 |
+
dist_old = dist.clone()
|
| 68 |
+
# add noise to initialize ( - noise_size, noise_size)
|
| 69 |
+
noise_uniform = Variable(2 * self.noise_size * torch.rand(images.size()) - self.noise_size).to(device)
|
| 70 |
+
adv_images = Variable(images.detach().clone() + noise_uniform, requires_grad=True).to(device)
|
| 71 |
+
|
| 72 |
+
for i in range(self.n_iters):
|
| 73 |
+
adv_features = extract_features(adv_images, self.extractor_ens, self.dim).to(device)
|
| 74 |
+
# normalize feature vectors in ensembles
|
| 75 |
+
loss = direction*torch.mean((adv_features - dir_vec)**2/dir_vec_norm)
|
| 76 |
+
|
| 77 |
+
if self.c_tv != None:
|
| 78 |
+
tv_out = self.total_var_reg(images, adv_images)
|
| 79 |
+
loss -= self.c_tv * tv_out
|
| 80 |
+
|
| 81 |
+
if self.attack_type == 'lpips':
|
| 82 |
+
lpips_out = self.lpips_reg(images, adv_images)
|
| 83 |
+
loss -= self.c_sim * lpips_out
|
| 84 |
+
|
| 85 |
+
grad = torch.autograd.grad(loss, [adv_images])
|
| 86 |
+
adv_images = adv_images + self.lr * grad[0].sign()
|
| 87 |
+
perturbation = adv_images - images
|
| 88 |
+
|
| 89 |
+
if self.attack_type == 'sgd':
|
| 90 |
+
perturbation = torch.clamp(perturbation, min=-self.eps, max=self.eps)
|
| 91 |
+
adv_images = images + perturbation
|
| 92 |
+
|
| 93 |
+
adv_images = torch.clamp(adv_images, min=0, max=1)
|
| 94 |
+
adv_features = extract_features(adv_images, self.extractor_ens, self.dim).to(device)
|
| 95 |
+
dist = torch.mean((adv_features - dir_vec)**2/dir_vec_norm, dim = [1,2])
|
| 96 |
+
|
| 97 |
+
if direction ==1:
|
| 98 |
+
adv_images[dist < dist_old] = adv_images_old[dist < dist_old]
|
| 99 |
+
dist[dist < dist_old] = dist_old[dist < dist_old]
|
| 100 |
+
else:
|
| 101 |
+
adv_images[dist > dist_old] = adv_images_old[dist > dist_old]
|
| 102 |
+
dist[dist > dist_old] = dist_old[dist > dist_old]
|
| 103 |
+
|
| 104 |
+
return adv_images.detach().cpu()
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def lpips_reg(self, images, adv_images):
|
| 108 |
+
if self.warp:
|
| 109 |
+
face_adv = warp_image(adv_images, self.theta_warp)
|
| 110 |
+
lpips_out = self.lpips_loss(normalize_batch(self.face_img).to(device), normalize_batch(face_adv).to(device))[0][0][0][0] /(2*adv_images.shape[0])
|
| 111 |
+
lpips_out += self.lpips_loss(normalize_batch(images).to(device), normalize_batch(adv_images).to(device))[0][0][0][0] / (2*adv_images.shape[0])
|
| 112 |
+
|
| 113 |
+
else:
|
| 114 |
+
lpips_out = self.lpips_loss(normalize_batch(images).to(device), normalize_batch(adv_images).to(device))[0][0][0][0] / adv_images.shape[0]
|
| 115 |
+
|
| 116 |
+
return lpips_out
|
| 117 |
+
|
| 118 |
+
def total_var_reg(images, adv_images):
|
| 119 |
+
perturbation = adv_images - images
|
| 120 |
+
tv = torch.mean(torch.abs(perturbation[:, :, :, :-1] - perturbation[:, :, :, 1:])) + \
|
| 121 |
+
torch.mean(torch.abs(perturbation[:, :, :-1, :] - perturbation[:, :, 1:, :]))
|
| 122 |
+
|
| 123 |
+
return tv
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
util/feature_extraction_utils.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Helper function for extracting features from pre-trained models
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import numpy as np
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
|
| 10 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def warp_image(tensor_img, theta_warp, crop_size=112):
|
| 14 |
+
# applies affine transform theta to image and crops it
|
| 15 |
+
|
| 16 |
+
theta_warp = torch.Tensor(theta_warp).unsqueeze(0).to(device)
|
| 17 |
+
grid = F.affine_grid(theta_warp, tensor_img.size())
|
| 18 |
+
img_warped = F.grid_sample(tensor_img, grid)
|
| 19 |
+
img_cropped = img_warped[:,:,0:crop_size, 0:crop_size]
|
| 20 |
+
return(img_cropped)
|
| 21 |
+
|
| 22 |
+
def normalize_transforms(tfm, W,H):
|
| 23 |
+
# normalizes affine transform from cv2 for pytorch
|
| 24 |
+
tfm_t = np.concatenate((tfm, np.array([[0,0,1]])), axis = 0)
|
| 25 |
+
transforms = np.linalg.inv(tfm_t)[0:2,:]
|
| 26 |
+
transforms[0,0] = transforms[0,0]
|
| 27 |
+
transforms[0,1] = transforms[0,1]*H/W
|
| 28 |
+
transforms[0,2] = transforms[0,2]*2/W + transforms[0,0] + transforms[0,1] - 1
|
| 29 |
+
|
| 30 |
+
transforms[1,0] = transforms[1,0]*W/H
|
| 31 |
+
transforms[1,1] = transforms[1,1]
|
| 32 |
+
transforms[1,2] = transforms[1,2]*2/H + transforms[1,0] + transforms[1,1] - 1
|
| 33 |
+
|
| 34 |
+
return transforms
|
| 35 |
+
|
| 36 |
+
def l2_norm(input, axis = 1):
|
| 37 |
+
# normalizes input with respect to second norm
|
| 38 |
+
norm = torch.norm(input, 2, axis, True)
|
| 39 |
+
output = torch.div(input, norm)
|
| 40 |
+
return output
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def de_preprocess(tensor):
|
| 44 |
+
# normalize images from [-1,1] to [0,1]
|
| 45 |
+
return tensor * 0.5 + 0.5
|
| 46 |
+
|
| 47 |
+
# normalize image to [-1,1]
|
| 48 |
+
normalize = transforms.Compose([
|
| 49 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
|
| 50 |
+
])
|
| 51 |
+
|
| 52 |
+
def normalize_batch(imgs_tensor):
|
| 53 |
+
normalized_imgs = torch.empty_like(imgs_tensor)
|
| 54 |
+
for i, img_ten in enumerate(imgs_tensor):
|
| 55 |
+
normalized_imgs[i] = normalize(img_ten)
|
| 56 |
+
|
| 57 |
+
return normalized_imgs
|
| 58 |
+
|
| 59 |
+
def resize2d(img, size):
|
| 60 |
+
# resizes image
|
| 61 |
+
return (F.adaptive_avg_pool2d(img, size))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class face_extractor(nn.Module):
|
| 66 |
+
def __init__(self, crop_size = 112, warp = False, theta_warp = None):
|
| 67 |
+
super(face_extractor, self).__init__()
|
| 68 |
+
self.crop_size = crop_size
|
| 69 |
+
self.warp = warp
|
| 70 |
+
self.theta_warp = theta_warp
|
| 71 |
+
|
| 72 |
+
def forward(self, input):
|
| 73 |
+
|
| 74 |
+
if self.warp:
|
| 75 |
+
assert(input.shape[0] == 1)
|
| 76 |
+
input = warp_image(input, self.theta_warp, self.crop_size)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
return input
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class feature_extractor(nn.Module):
|
| 84 |
+
def __init__(self, model, crop_size = 112, tta = True, warp = False, theta_warp = None):
|
| 85 |
+
super(feature_extractor, self).__init__()
|
| 86 |
+
self.model = model
|
| 87 |
+
self.crop_size = crop_size
|
| 88 |
+
self.tta = tta
|
| 89 |
+
self.warp = warp
|
| 90 |
+
self.theta_warp = theta_warp
|
| 91 |
+
|
| 92 |
+
self.model = model
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def forward(self, input):
|
| 96 |
+
|
| 97 |
+
if self.warp:
|
| 98 |
+
assert(input.shape[0] == 1)
|
| 99 |
+
input = warp_image(input, self.theta_warp, self.crop_size)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
batch_normalized = normalize_batch(input)
|
| 103 |
+
batch_flipped = torch.flip(batch_normalized, [3])
|
| 104 |
+
# extract features
|
| 105 |
+
self.model.eval() # set to evaluation mode
|
| 106 |
+
if self.tta:
|
| 107 |
+
embed = self.model(batch_normalized) + self.model(batch_flipped)
|
| 108 |
+
features = l2_norm(embed)
|
| 109 |
+
else:
|
| 110 |
+
features = l2_norm(self.model(batch_normalized))
|
| 111 |
+
return features
|
| 112 |
+
|
util/prepare_utils.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Helper function for extracting features from pre-trained models
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import math
|
| 5 |
+
import numbers
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
import torchvision.transforms as transforms
|
| 10 |
+
from torch.autograd import Variable
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import numpy as np
|
| 14 |
+
import torchvision.datasets as datasets
|
| 15 |
+
import copy
|
| 16 |
+
import time
|
| 17 |
+
from util.feature_extraction_utils import feature_extractor, face_extractor, warp_image, de_preprocess, normalize_batch
|
| 18 |
+
from lpips_pytorch import LPIPS, lpips
|
| 19 |
+
from backbone.model_irse import IR_50, IR_101, IR_152, IR_SE_50, IR_SE_101, IR_SE_152
|
| 20 |
+
from backbone.model_resnet import ResNet_50, ResNet_101, ResNet_152
|
| 21 |
+
from tqdm import tqdm
|
| 22 |
+
|
| 23 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 24 |
+
tensor_transform = transforms.ToTensor()
|
| 25 |
+
pil_transform = transforms.ToPILImage()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ImageFolderWithPaths(datasets.ImageFolder):
|
| 30 |
+
"""Custom dataset that includes image file paths. Extends
|
| 31 |
+
torchvision.datasets.ImageFolder
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# override the __getitem__ method. this is the method that dataloader calls
|
| 35 |
+
def __getitem__(self, index):
|
| 36 |
+
# this is what ImageFolder normally returns
|
| 37 |
+
original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
|
| 38 |
+
# the image file path
|
| 39 |
+
path = self.imgs[index][0]
|
| 40 |
+
# make a new tuple that includes original and the path
|
| 41 |
+
tuple_with_path = (original_tuple + (path,))
|
| 42 |
+
return tuple_with_path
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class GaussianSmoothing(nn.Module):
|
| 47 |
+
"""
|
| 48 |
+
Apply gaussian smoothing on a
|
| 49 |
+
1d, 2d or 3d tensor. Filtering is performed seperately for each channel
|
| 50 |
+
in the input using a depthwise convolution.
|
| 51 |
+
Arguments:
|
| 52 |
+
channels (int, sequence): Number of channels of the input tensors. Output will
|
| 53 |
+
have this number of channels as well.
|
| 54 |
+
kernel_size (int, sequence): Size of the gaussian kernel.
|
| 55 |
+
sigma (float, sequence): Standard deviation of the gaussian kernel.
|
| 56 |
+
dim (int, optional): The number of dimensions of the data.
|
| 57 |
+
Default value is 2 (spatial).
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
def __init__(self, channels, kernel_size, sigma, dim=2):
|
| 61 |
+
super(GaussianSmoothing, self).__init__()
|
| 62 |
+
if isinstance(kernel_size, numbers.Number):
|
| 63 |
+
kernel_size = [kernel_size] * dim
|
| 64 |
+
if isinstance(sigma, numbers.Number):
|
| 65 |
+
sigma = [sigma] * dim
|
| 66 |
+
|
| 67 |
+
# The gaussian kernel is the product of the
|
| 68 |
+
# gaussian function of each dimension.
|
| 69 |
+
kernel = 1
|
| 70 |
+
meshgrids = torch.meshgrid(
|
| 71 |
+
[
|
| 72 |
+
torch.arange(size, dtype=torch.float32)
|
| 73 |
+
for size in kernel_size
|
| 74 |
+
]
|
| 75 |
+
)
|
| 76 |
+
for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
|
| 77 |
+
mean = (size - 1) / 2
|
| 78 |
+
kernel *= 1 / (std * math.sqrt(2 * math.pi)) * \
|
| 79 |
+
torch.exp(-((mgrid - mean) / std) ** 2 / 2)
|
| 80 |
+
|
| 81 |
+
# Make sure sum of values in gaussian kernel equals 1.
|
| 82 |
+
kernel = kernel / torch.sum(kernel)
|
| 83 |
+
|
| 84 |
+
# Reshape to depthwise convolutional weight
|
| 85 |
+
kernel = kernel.view(1, 1, *kernel.size())
|
| 86 |
+
kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
|
| 87 |
+
|
| 88 |
+
self.register_buffer('weight', kernel)
|
| 89 |
+
self.groups = channels
|
| 90 |
+
|
| 91 |
+
if dim == 1:
|
| 92 |
+
self.conv = F.conv1d
|
| 93 |
+
elif dim == 2:
|
| 94 |
+
self.conv = F.conv2d
|
| 95 |
+
elif dim == 3:
|
| 96 |
+
self.conv = F.conv3d
|
| 97 |
+
else:
|
| 98 |
+
raise RuntimeError(
|
| 99 |
+
'Only 1, 2 and 3 dimensions are supported. Received {}.'.format(dim)
|
| 100 |
+
)
|
| 101 |
+
self.pad_size = int(kernel_size[0] / 2)
|
| 102 |
+
|
| 103 |
+
def forward(self, input):
|
| 104 |
+
"""
|
| 105 |
+
Apply gaussian filter to input.
|
| 106 |
+
Arguments:
|
| 107 |
+
input (torch.Tensor): Input to apply gaussian filter on.
|
| 108 |
+
Returns:
|
| 109 |
+
filtered (torch.Tensor): Filtered output.
|
| 110 |
+
"""
|
| 111 |
+
input = F.pad(input, (self.pad_size, self.pad_size, self.pad_size, self.pad_size), mode='reflect')
|
| 112 |
+
return self.conv(input, weight=self.weight, groups=self.groups)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class dim_reduction(nn.Module):
|
| 118 |
+
def __init__(self, V):
|
| 119 |
+
super(dim_reduction, self).__init__()
|
| 120 |
+
self.V = V
|
| 121 |
+
|
| 122 |
+
def forward(self, input):
|
| 123 |
+
return torch.matmul(input, self.V.to(input.device))
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_ensemble(models, sigma_gf, kernel_size_gf, combination, V_reduction, warp=False,
|
| 128 |
+
theta_warp=None):
|
| 129 |
+
# function prepares ensemble of feature extractors
|
| 130 |
+
# outputs list of pytorch nn models
|
| 131 |
+
feature_extractor_ensemble = []
|
| 132 |
+
if sigma_gf != None:
|
| 133 |
+
# if apply gaussian filterng during attack
|
| 134 |
+
gaussian_filtering = GaussianSmoothing(3, kernel_size_gf, sigma_gf)
|
| 135 |
+
if V_reduction == None:
|
| 136 |
+
for model in models:
|
| 137 |
+
feature_extractor_model = nn.DataParallel(nn.Sequential(gaussian_filtering,
|
| 138 |
+
feature_extractor(model=model, warp=warp,
|
| 139 |
+
theta_warp=theta_warp))).to(device)
|
| 140 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 141 |
+
if combination:
|
| 142 |
+
feature_extractor_model = nn.DataParallel(
|
| 143 |
+
feature_extractor(model=model, warp=warp, theta_warp=theta_warp)).to(device)
|
| 144 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 145 |
+
|
| 146 |
+
else:
|
| 147 |
+
for i, model in enumerate(models):
|
| 148 |
+
feature_extractor_model = nn.DataParallel(
|
| 149 |
+
nn.Sequential(gaussian_filtering, feature_extractor(model=model, warp=warp, theta_warp=theta_warp),
|
| 150 |
+
dim_reduction(V_reduction[i]))).to(device)
|
| 151 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 152 |
+
if combination:
|
| 153 |
+
feature_extractor_model = nn.DataParallel(
|
| 154 |
+
nn.Sequential(feature_extractor(model=model, warp=warp, theta_warp=theta_warp),
|
| 155 |
+
dim_reduction(V_reduction[i]))).to(device)
|
| 156 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 157 |
+
|
| 158 |
+
else:
|
| 159 |
+
if V_reduction == None:
|
| 160 |
+
for model in models:
|
| 161 |
+
feature_extractor_model = nn.DataParallel(
|
| 162 |
+
feature_extractor(model=model, warp=warp, theta_warp=theta_warp)).to(device)
|
| 163 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 164 |
+
else:
|
| 165 |
+
for i, model in enumerate(models):
|
| 166 |
+
feature_extractor_model = nn.DataParallel(
|
| 167 |
+
nn.Sequential(feature_extractor(model=model, warp=warp, theta_warp=theta_warp),
|
| 168 |
+
dim_reduction(V_reduction[i]))).to(device)
|
| 169 |
+
feature_extractor_ensemble.append(feature_extractor_model)
|
| 170 |
+
|
| 171 |
+
return feature_extractor_ensemble
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def extract_features(imgs, feature_extractor_ensemble, dim):
|
| 175 |
+
# function computes mean feature vector of images with ensemble of feature extractors
|
| 176 |
+
|
| 177 |
+
features = torch.zeros(imgs.shape[0], len(feature_extractor_ensemble), dim)
|
| 178 |
+
for i, feature_extractor_model in enumerate(feature_extractor_ensemble):
|
| 179 |
+
# batch size, model in ensemble, dim
|
| 180 |
+
features_model = feature_extractor_model(imgs)
|
| 181 |
+
features[:, i, :] = features_model
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
return features
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def prepare_models(model_backbones,
|
| 191 |
+
input_size,
|
| 192 |
+
model_roots,
|
| 193 |
+
kernel_size_attack,
|
| 194 |
+
sigma_attack,
|
| 195 |
+
combination,
|
| 196 |
+
using_subspace,
|
| 197 |
+
V_reduction_root):
|
| 198 |
+
|
| 199 |
+
backbone_dict = {'IR_50': IR_50(input_size), 'IR_152': IR_152(input_size), 'ResNet_50': ResNet_50(input_size),
|
| 200 |
+
'ResNet_152': ResNet_152(input_size)}
|
| 201 |
+
|
| 202 |
+
print("Loading Attack Backbone Checkpoint '{}'".format(model_roots))
|
| 203 |
+
print('=' * 20)
|
| 204 |
+
|
| 205 |
+
models_attack = []
|
| 206 |
+
for i in range(len(model_backbones)):
|
| 207 |
+
model = backbone_dict[model_backbones[i]]
|
| 208 |
+
model.load_state_dict(torch.load(model_roots[i], map_location=device))
|
| 209 |
+
models_attack.append(model)
|
| 210 |
+
|
| 211 |
+
if using_subspace:
|
| 212 |
+
|
| 213 |
+
V_reduction = []
|
| 214 |
+
for i in range(len(model_backbones)):
|
| 215 |
+
V_reduction.append(torch.tensor(np.load(V_reduction_root[i])))
|
| 216 |
+
|
| 217 |
+
dim = V_reduction[0].shape[1]
|
| 218 |
+
else:
|
| 219 |
+
V_reduction = None
|
| 220 |
+
dim = 512
|
| 221 |
+
|
| 222 |
+
return models_attack, V_reduction, dim
|
| 223 |
+
|
| 224 |
+
def prepare_data(query_data_root, target_data_root, freq, batch_size, warp = False, theta_warp = None):
|
| 225 |
+
|
| 226 |
+
data = datasets.ImageFolder(query_data_root, tensor_transform)
|
| 227 |
+
|
| 228 |
+
subset_query = list(range(0, len(data), freq))
|
| 229 |
+
subset_gallery = [x for x in list(range(0, len(data))) if x not in subset_query]
|
| 230 |
+
query_set = torch.utils.data.Subset(data, subset_query)
|
| 231 |
+
gallery_set = torch.utils.data.Subset(data, subset_gallery)
|
| 232 |
+
|
| 233 |
+
if target_data_root != None:
|
| 234 |
+
target_data = datasets.ImageFolder(target_data_root, tensor_transform)
|
| 235 |
+
target_loader = torch.utils.data.DataLoader(
|
| 236 |
+
target_data, batch_size = batch_size)
|
| 237 |
+
else:
|
| 238 |
+
target_loader = None
|
| 239 |
+
|
| 240 |
+
query_loader = torch.utils.data.DataLoader(
|
| 241 |
+
query_set, batch_size = batch_size)
|
| 242 |
+
gallery_loader = torch.utils.data.DataLoader(
|
| 243 |
+
gallery_set, batch_size = batch_size)
|
| 244 |
+
|
| 245 |
+
return query_loader, gallery_loader, target_loader
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def prepare_dir_vec(dir_vec_extractor, imgs, dim, combination):
|
| 249 |
+
dir_vec = extract_features(imgs, dir_vec_extractor, dim).detach().cpu()
|
| 250 |
+
if combination:
|
| 251 |
+
dir_vec = torch.repeat_interleave(dir_vec,2,1)
|
| 252 |
+
return dir_vec
|
| 253 |
+
|
| 254 |
+
|