Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +22 -0
- .gitmodules +6 -0
- README.md +153 -0
- arguments/__init__.py +118 -0
- assets/main.png +0 -0
- data/.gitkeep +0 -0
- data_utils/deepspeech_features/README.md +20 -0
- data_utils/deepspeech_features/deepspeech_features.py +274 -0
- data_utils/deepspeech_features/deepspeech_store.py +172 -0
- data_utils/deepspeech_features/extract_ds_features.py +130 -0
- data_utils/deepspeech_features/extract_wav.py +87 -0
- data_utils/deepspeech_features/fea_win.py +11 -0
- data_utils/easyportrait/create_teeth_mask.py +34 -0
- data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_1024x1024.py +59 -0
- data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_384x384.py +59 -0
- data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_512x512.py +59 -0
- data_utils/easyportrait/local_configs/__base__/default_runtime.py +14 -0
- data_utils/easyportrait/local_configs/__base__/models/bisenetv2.py +80 -0
- data_utils/easyportrait/local_configs/__base__/models/fcn_resnet50.py +45 -0
- data_utils/easyportrait/local_configs/__base__/models/fpn_resnet50.py +36 -0
- data_utils/easyportrait/local_configs/__base__/models/lraspp.py +25 -0
- data_utils/easyportrait/local_configs/__base__/models/segformer.py +34 -0
- data_utils/easyportrait/local_configs/__base__/schedules/schedule_10k_adamw.py +11 -0
- data_utils/easyportrait/local_configs/__base__/schedules/schedule_160k_adamw.py +9 -0
- data_utils/easyportrait/local_configs/__base__/schedules/schedule_20k_adamw.py +11 -0
- data_utils/easyportrait/local_configs/__base__/schedules/schedule_40k_adamw.py +9 -0
- data_utils/easyportrait/local_configs/__base__/schedules/schedule_80k_adamw.py +9 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/bisenet-fp/bisenetv2-fp.py +221 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/bisenet-ps/bisenetv2-ps.py +218 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/danet-fp/danet-fp.py +174 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/danet-ps/danet-ps.py +171 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/deeplab-fp/deeplabv3-fp.py +174 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/deeplab-ps/deeplabv3-ps.py +171 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fastscnn-fp/fastscnn-fp.py +165 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fastscnn-ps/fastscnn-ps.py +162 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fcn-fp/fcn-fp.py +187 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fcn-ps/fcn-ps.py +184 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fpn-fp/fpn-fp.py +182 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fpn-ps/fpn-ps.py +179 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/segformer-fp/segformer-fp.py +182 -0
- data_utils/easyportrait/local_configs/easyportrait_experiments_v2/segformer-ps/segformer-ps.py +179 -0
- data_utils/easyportrait/mmseg/.mim/configs +0 -0
- data_utils/easyportrait/mmseg/.mim/tools +0 -0
- data_utils/easyportrait/mmseg/__init__.py +62 -0
- data_utils/easyportrait/mmseg/apis/__init__.py +11 -0
- data_utils/easyportrait/mmseg/apis/inference.py +145 -0
- data_utils/easyportrait/mmseg/apis/test.py +233 -0
- data_utils/easyportrait/mmseg/apis/train.py +194 -0
- data_utils/easyportrait/mmseg/core/__init__.py +12 -0
- data_utils/easyportrait/mmseg/core/builder.py +33 -0
.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
build/
|
| 3 |
+
*.egg-info/
|
| 4 |
+
*.so
|
| 5 |
+
*.mp4
|
| 6 |
+
*.pth
|
| 7 |
+
|
| 8 |
+
data_utils/face_tracking/3DMM/*
|
| 9 |
+
data_utils/face_parsing/79999_iter.pth
|
| 10 |
+
|
| 11 |
+
*.pyc
|
| 12 |
+
.vscode
|
| 13 |
+
output*
|
| 14 |
+
build
|
| 15 |
+
gridencoder/gridencoder.egg-info
|
| 16 |
+
diff_rasterization/diff_rast.egg-info
|
| 17 |
+
diff_rasterization/dist
|
| 18 |
+
tensorboard_3d
|
| 19 |
+
screenshots
|
| 20 |
+
|
| 21 |
+
data/*
|
| 22 |
+
!*.gitkeep
|
.gitmodules
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[submodule "submodules/simple-knn"]
|
| 2 |
+
path = submodules/simple-knn
|
| 3 |
+
url = https://gitlab.inria.fr/bkerbl/simple-knn.git
|
| 4 |
+
[submodule "submodules/diff-gaussian-rasterization"]
|
| 5 |
+
path = submodules/diff-gaussian-rasterization
|
| 6 |
+
url = https://github.com/ashawkey/diff-gaussian-rasterization.git
|
README.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via Gaussian Splatting
|
| 2 |
+
|
| 3 |
+
This is the official repository for our ECCV 2024 paper **TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via Gaussian Splatting**.
|
| 4 |
+
|
| 5 |
+
[Paper](https://arxiv.org/abs/2404.15264) | [Project](https://fictionarry.github.io/TalkingGaussian/) | [Video](https://youtu.be/c5VG7HkDs8I)
|
| 6 |
+
|
| 7 |
+

|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
## Installation
|
| 11 |
+
|
| 12 |
+
Tested on Ubuntu 18.04, CUDA 11.3, PyTorch 1.12.1
|
| 13 |
+
|
| 14 |
+
```
|
| 15 |
+
git clone [email protected]:Fictionarry/TalkingGaussian.git --recursive
|
| 16 |
+
|
| 17 |
+
conda env create --file environment.yml
|
| 18 |
+
conda activate talking_gaussian
|
| 19 |
+
pip install "git+https://github.com/facebookresearch/pytorch3d.git"
|
| 20 |
+
pip install tensorflow-gpu==2.8.0
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
If encounter installation problem from the `diff-gaussian-rasterization` or `gridencoder`, please refer to [gaussian-splatting](https://github.com/graphdeco-inria/gaussian-splatting) and [torch-ngp](https://github.com/ashawkey/torch-ngp).
|
| 24 |
+
|
| 25 |
+
### Preparation
|
| 26 |
+
|
| 27 |
+
- Prepare face-parsing model and the 3DMM model for head pose estimation.
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
bash scripts/prepare.sh
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
- Download 3DMM model from [Basel Face Model 2009](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-1-0&id=details):
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
# 1. copy 01_MorphableModel.mat to data_util/face_tracking/3DMM/
|
| 37 |
+
# 2. run following
|
| 38 |
+
cd data_utils/face_tracking
|
| 39 |
+
python convert_BFM.py
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
- Prepare the environment for [EasyPortrait](https://github.com/hukenovs/easyportrait):
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
# prepare mmcv
|
| 46 |
+
conda activate talking_gaussian
|
| 47 |
+
pip install -U openmim
|
| 48 |
+
mim install mmcv-full==1.7.1
|
| 49 |
+
|
| 50 |
+
# download model weight
|
| 51 |
+
cd data_utils/easyportrait
|
| 52 |
+
wget "https://n-ws-620xz-pd11.s3pd11.sbercloud.ru/b-ws-620xz-pd11-jux/easyportrait/experiments/models/fpn-fp-512.pth"
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
## Usage
|
| 56 |
+
|
| 57 |
+
### Important Notice
|
| 58 |
+
|
| 59 |
+
- This code is provided for research purposes only. The author makes no warranties, express or implied, as to the accuracy, completeness, or fitness for a particular purpose of the code. Use this code at your own risk.
|
| 60 |
+
|
| 61 |
+
- The author explicitly prohibits the use of this code for any malicious or illegal activities. By using this code, you agree to comply with all applicable laws and regulations, and you agree not to use it to harm others or to perform any actions that would be considered unethical or illegal.
|
| 62 |
+
|
| 63 |
+
- The author will not be responsible for any damages, losses, or issues that arise from the use of this code.
|
| 64 |
+
|
| 65 |
+
- Users are encouraged to use this code responsibly and ethically.
|
| 66 |
+
|
| 67 |
+
### Video Dataset
|
| 68 |
+
[Here](https://drive.google.com/drive/folders/1E_8W805lioIznqbkvTQHWWi5IFXUG7Er?usp=drive_link) we provide two video clips used in our experiments, which are captured from YouTube. Please respect the original content creators' rights and comply with YouTube’s copyright policies in the usage.
|
| 69 |
+
|
| 70 |
+
Other used videos can be found from [GeneFace](https://github.com/yerfor/GeneFace) and [AD-NeRF](https://github.com/YudongGuo/AD-NeRF).
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
### Pre-processing Training Video
|
| 74 |
+
|
| 75 |
+
* Put training video under `data/<ID>/<ID>.mp4`.
|
| 76 |
+
|
| 77 |
+
The video **must be 25FPS, with all frames containing the talking person**.
|
| 78 |
+
The resolution should be about 512x512, and duration about 1-5 min.
|
| 79 |
+
|
| 80 |
+
* Run script to process the video.
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
python data_utils/process.py data/<ID>/<ID>.mp4
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
* Obtain Action Units
|
| 87 |
+
|
| 88 |
+
Run `FeatureExtraction` in [OpenFace](https://github.com/TadasBaltrusaitis/OpenFace), rename and move the output CSV file to `data/<ID>/au.csv`.
|
| 89 |
+
|
| 90 |
+
* Generate tooth masks
|
| 91 |
+
|
| 92 |
+
```bash
|
| 93 |
+
export PYTHONPATH=./data_utils/easyportrait
|
| 94 |
+
python ./data_utils/easyportrait/create_teeth_mask.py ./data/<ID>
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### Audio Pre-process
|
| 98 |
+
|
| 99 |
+
In our paper, we use DeepSpeech features for evaluation.
|
| 100 |
+
|
| 101 |
+
* DeepSpeech
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
python data_utils/deepspeech_features/extract_ds_features.py --input data/<name>.wav # saved to data/<name>.npy
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
- HuBERT
|
| 108 |
+
|
| 109 |
+
Similar to ER-NeRF, HuBERT is also available. Recommended for situations if the audio is not in English.
|
| 110 |
+
|
| 111 |
+
Specify `--audio_extractor hubert` when training and testing.
|
| 112 |
+
|
| 113 |
+
```
|
| 114 |
+
python data_utils/hubert.py --wav data/<name>.wav # save to data/<name>_hu.npy
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### Train
|
| 118 |
+
|
| 119 |
+
```bash
|
| 120 |
+
# If resources are sufficient, partially parallel is available to speed up the training. See the script.
|
| 121 |
+
bash scripts/train_xx.sh data/<ID> output/<project_name> <GPU_ID>
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
### Test
|
| 125 |
+
|
| 126 |
+
```bash
|
| 127 |
+
# saved to output/<project_name>/test/ours_None/renders
|
| 128 |
+
python synthesize_fuse.py -S data/<ID> -M output/<project_name> --eval
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### Inference with target audio
|
| 132 |
+
|
| 133 |
+
```bash
|
| 134 |
+
python synthesize_fuse.py -S data/<ID> -M output/<project_name> --use_train --audio <preprocessed_audio_feature>.npy
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
## Citation
|
| 138 |
+
|
| 139 |
+
Consider citing as below if you find this repository helpful to your project:
|
| 140 |
+
|
| 141 |
+
```
|
| 142 |
+
@article{li2024talkinggaussian,
|
| 143 |
+
title={TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via Gaussian Splatting},
|
| 144 |
+
author={Jiahe Li and Jiawei Zhang and Xiao Bai and Jin Zheng and Xin Ning and Jun Zhou and Lin Gu},
|
| 145 |
+
journal={arXiv preprint arXiv:2404.15264},
|
| 146 |
+
year={2024}
|
| 147 |
+
}
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
## Acknowledgement
|
| 152 |
+
|
| 153 |
+
This code is developed on [gaussian-splatting](https://github.com/graphdeco-inria/gaussian-splatting) with [simple-knn](https://gitlab.inria.fr/bkerbl/simple-knn), and a modified [diff-gaussian-rasterization](https://github.com/ashawkey/diff-gaussian-rasterization). Partial codes are from [RAD-NeRF](https://github.com/ashawkey/RAD-NeRF), [DFRF](https://github.com/sstzal/DFRF), [GeneFace](https://github.com/yerfor/GeneFace), and [AD-NeRF](https://github.com/YudongGuo/AD-NeRF). Teeth mask is from [EasyPortrait](https://github.com/hukenovs/easyportrait). Thanks for these great projects!
|
arguments/__init__.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# Copyright (C) 2023, Inria
|
| 3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
| 4 |
+
# All rights reserved.
|
| 5 |
+
#
|
| 6 |
+
# This software is free for non-commercial, research and evaluation use
|
| 7 |
+
# under the terms of the LICENSE.md file.
|
| 8 |
+
#
|
| 9 |
+
# For inquiries contact [email protected]
|
| 10 |
+
#
|
| 11 |
+
|
| 12 |
+
from argparse import ArgumentParser, Namespace
|
| 13 |
+
import sys
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
class GroupParams:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
class ParamGroup:
|
| 20 |
+
def __init__(self, parser: ArgumentParser, name : str, fill_none = False):
|
| 21 |
+
group = parser.add_argument_group(name)
|
| 22 |
+
for key, value in vars(self).items():
|
| 23 |
+
shorthand = False
|
| 24 |
+
if key.startswith("_"):
|
| 25 |
+
shorthand = True
|
| 26 |
+
key = key[1:]
|
| 27 |
+
t = type(value)
|
| 28 |
+
value = value if not fill_none else None
|
| 29 |
+
if shorthand:
|
| 30 |
+
if t == bool:
|
| 31 |
+
group.add_argument("--" + key, ("-" + key[0:1]), ("-" + key[0:1].upper()), default=value, action="store_true")
|
| 32 |
+
else:
|
| 33 |
+
group.add_argument("--" + key, ("-" + key[0:1]), ("-" + key[0:1].upper()), default=value, type=t)
|
| 34 |
+
else:
|
| 35 |
+
if t == bool:
|
| 36 |
+
group.add_argument("--" + key, default=value, action="store_true")
|
| 37 |
+
else:
|
| 38 |
+
group.add_argument("--" + key, default=value, type=t)
|
| 39 |
+
|
| 40 |
+
def extract(self, args):
|
| 41 |
+
group = GroupParams()
|
| 42 |
+
for arg in vars(args).items():
|
| 43 |
+
if arg[0] in vars(self) or ("_" + arg[0]) in vars(self):
|
| 44 |
+
setattr(group, arg[0], arg[1])
|
| 45 |
+
return group
|
| 46 |
+
|
| 47 |
+
class ModelParams(ParamGroup):
|
| 48 |
+
def __init__(self, parser, sentinel=False):
|
| 49 |
+
self.sh_degree = 2
|
| 50 |
+
self._source_path = ""
|
| 51 |
+
self._model_path = ""
|
| 52 |
+
self._images = "images"
|
| 53 |
+
self._resolution = -1
|
| 54 |
+
self._white_background = False
|
| 55 |
+
self.data_device = "cpu"
|
| 56 |
+
self.eval = False
|
| 57 |
+
self.audio = ""
|
| 58 |
+
self.init_num = 10_000
|
| 59 |
+
self.audio_extractor = "deepspeech"
|
| 60 |
+
super().__init__(parser, "Loading Parameters", sentinel)
|
| 61 |
+
|
| 62 |
+
def extract(self, args):
|
| 63 |
+
g = super().extract(args)
|
| 64 |
+
g.source_path = os.path.abspath(g.source_path)
|
| 65 |
+
|
| 66 |
+
return g
|
| 67 |
+
|
| 68 |
+
class PipelineParams(ParamGroup):
|
| 69 |
+
def __init__(self, parser):
|
| 70 |
+
self.convert_SHs_python = False
|
| 71 |
+
self.compute_cov3D_python = False
|
| 72 |
+
self.debug = False
|
| 73 |
+
super().__init__(parser, "Pipeline Parameters")
|
| 74 |
+
|
| 75 |
+
class OptimizationParams(ParamGroup):
|
| 76 |
+
def __init__(self, parser):
|
| 77 |
+
self.iterations = 50_000
|
| 78 |
+
self.position_lr_init = 0.00016
|
| 79 |
+
self.position_lr_final = 0.0000016
|
| 80 |
+
self.position_lr_delay_mult = 0.01
|
| 81 |
+
self.position_lr_max_steps = 45_000
|
| 82 |
+
self.feature_lr = 0.0025
|
| 83 |
+
self.opacity_lr = 0.05
|
| 84 |
+
self.scaling_lr = 0.003
|
| 85 |
+
self.rotation_lr = 0.001
|
| 86 |
+
self.percent_dense = 0.005
|
| 87 |
+
self.lambda_dssim = 0.2
|
| 88 |
+
self.densification_interval = 100
|
| 89 |
+
self.opacity_reset_interval = 3000
|
| 90 |
+
self.densify_from_iter = 500
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
self.densify_until_iter = 45_000
|
| 94 |
+
self.densify_grad_threshold = 0.0002
|
| 95 |
+
self.random_background = False
|
| 96 |
+
super().__init__(parser, "Optimization Parameters")
|
| 97 |
+
|
| 98 |
+
def get_combined_args(parser : ArgumentParser):
|
| 99 |
+
cmdlne_string = sys.argv[1:]
|
| 100 |
+
cfgfile_string = "Namespace()"
|
| 101 |
+
args_cmdline = parser.parse_args(cmdlne_string)
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args")
|
| 105 |
+
print("Looking for config file in", cfgfilepath)
|
| 106 |
+
with open(cfgfilepath) as cfg_file:
|
| 107 |
+
print("Config file found: {}".format(cfgfilepath))
|
| 108 |
+
cfgfile_string = cfg_file.read()
|
| 109 |
+
except TypeError:
|
| 110 |
+
print("Config file not found at")
|
| 111 |
+
pass
|
| 112 |
+
args_cfgfile = eval(cfgfile_string)
|
| 113 |
+
|
| 114 |
+
merged_dict = vars(args_cfgfile).copy()
|
| 115 |
+
for k,v in vars(args_cmdline).items():
|
| 116 |
+
if v != None:
|
| 117 |
+
merged_dict[k] = v
|
| 118 |
+
return Namespace(**merged_dict)
|
assets/main.png
ADDED
|
data/.gitkeep
ADDED
|
File without changes
|
data_utils/deepspeech_features/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Routines for DeepSpeech features processing
|
| 2 |
+
Several routines for [DeepSpeech](https://github.com/mozilla/DeepSpeech) features processing, like speech features generation for [VOCA](https://github.com/TimoBolkart/voca) model.
|
| 3 |
+
|
| 4 |
+
## Installation
|
| 5 |
+
|
| 6 |
+
```
|
| 7 |
+
pip3 install -r requirements.txt
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
## Usage
|
| 11 |
+
|
| 12 |
+
Generate wav files:
|
| 13 |
+
```
|
| 14 |
+
python3 extract_wav.py --in-video=<you_data_dir>
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Generate files with DeepSpeech features:
|
| 18 |
+
```
|
| 19 |
+
python3 extract_ds_features.py --input=<you_data_dir>
|
| 20 |
+
```
|
data_utils/deepspeech_features/deepspeech_features.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DeepSpeech features processing routines.
|
| 3 |
+
NB: Based on VOCA code. See the corresponding license restrictions.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
__all__ = ['conv_audios_to_deepspeech']
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import warnings
|
| 10 |
+
import resampy
|
| 11 |
+
from scipy.io import wavfile
|
| 12 |
+
from python_speech_features import mfcc
|
| 13 |
+
import tensorflow.compat.v1 as tf
|
| 14 |
+
tf.disable_v2_behavior()
|
| 15 |
+
|
| 16 |
+
def conv_audios_to_deepspeech(audios,
|
| 17 |
+
out_files,
|
| 18 |
+
num_frames_info,
|
| 19 |
+
deepspeech_pb_path,
|
| 20 |
+
audio_window_size=1,
|
| 21 |
+
audio_window_stride=1):
|
| 22 |
+
"""
|
| 23 |
+
Convert list of audio files into files with DeepSpeech features.
|
| 24 |
+
|
| 25 |
+
Parameters
|
| 26 |
+
----------
|
| 27 |
+
audios : list of str or list of None
|
| 28 |
+
Paths to input audio files.
|
| 29 |
+
out_files : list of str
|
| 30 |
+
Paths to output files with DeepSpeech features.
|
| 31 |
+
num_frames_info : list of int
|
| 32 |
+
List of numbers of frames.
|
| 33 |
+
deepspeech_pb_path : str
|
| 34 |
+
Path to DeepSpeech 0.1.0 frozen model.
|
| 35 |
+
audio_window_size : int, default 16
|
| 36 |
+
Audio window size.
|
| 37 |
+
audio_window_stride : int, default 1
|
| 38 |
+
Audio window stride.
|
| 39 |
+
"""
|
| 40 |
+
graph, logits_ph, input_node_ph, input_lengths_ph = prepare_deepspeech_net(
|
| 41 |
+
deepspeech_pb_path)
|
| 42 |
+
|
| 43 |
+
with tf.compat.v1.Session(graph=graph) as sess:
|
| 44 |
+
for audio_file_path, out_file_path, num_frames in zip(audios, out_files, num_frames_info):
|
| 45 |
+
print(audio_file_path)
|
| 46 |
+
print(out_file_path)
|
| 47 |
+
audio_sample_rate, audio = wavfile.read(audio_file_path)
|
| 48 |
+
if audio.ndim != 1:
|
| 49 |
+
warnings.warn(
|
| 50 |
+
"Audio has multiple channels, the first channel is used")
|
| 51 |
+
audio = audio[:, 0]
|
| 52 |
+
ds_features = pure_conv_audio_to_deepspeech(
|
| 53 |
+
audio=audio,
|
| 54 |
+
audio_sample_rate=audio_sample_rate,
|
| 55 |
+
audio_window_size=audio_window_size,
|
| 56 |
+
audio_window_stride=audio_window_stride,
|
| 57 |
+
num_frames=num_frames,
|
| 58 |
+
net_fn=lambda x: sess.run(
|
| 59 |
+
logits_ph,
|
| 60 |
+
feed_dict={
|
| 61 |
+
input_node_ph: x[np.newaxis, ...],
|
| 62 |
+
input_lengths_ph: [x.shape[0]]}))
|
| 63 |
+
|
| 64 |
+
net_output = ds_features.reshape(-1, 29)
|
| 65 |
+
win_size = 16
|
| 66 |
+
zero_pad = np.zeros((int(win_size / 2), net_output.shape[1]))
|
| 67 |
+
net_output = np.concatenate(
|
| 68 |
+
(zero_pad, net_output, zero_pad), axis=0)
|
| 69 |
+
windows = []
|
| 70 |
+
for window_index in range(0, net_output.shape[0] - win_size, 2):
|
| 71 |
+
windows.append(
|
| 72 |
+
net_output[window_index:window_index + win_size])
|
| 73 |
+
print(np.array(windows).shape)
|
| 74 |
+
np.save(out_file_path, np.array(windows))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def prepare_deepspeech_net(deepspeech_pb_path):
|
| 78 |
+
"""
|
| 79 |
+
Load and prepare DeepSpeech network.
|
| 80 |
+
|
| 81 |
+
Parameters
|
| 82 |
+
----------
|
| 83 |
+
deepspeech_pb_path : str
|
| 84 |
+
Path to DeepSpeech 0.1.0 frozen model.
|
| 85 |
+
|
| 86 |
+
Returns
|
| 87 |
+
-------
|
| 88 |
+
graph : obj
|
| 89 |
+
ThensorFlow graph.
|
| 90 |
+
logits_ph : obj
|
| 91 |
+
ThensorFlow placeholder for `logits`.
|
| 92 |
+
input_node_ph : obj
|
| 93 |
+
ThensorFlow placeholder for `input_node`.
|
| 94 |
+
input_lengths_ph : obj
|
| 95 |
+
ThensorFlow placeholder for `input_lengths`.
|
| 96 |
+
"""
|
| 97 |
+
# Load graph and place_holders:
|
| 98 |
+
with tf.io.gfile.GFile(deepspeech_pb_path, "rb") as f:
|
| 99 |
+
graph_def = tf.compat.v1.GraphDef()
|
| 100 |
+
graph_def.ParseFromString(f.read())
|
| 101 |
+
|
| 102 |
+
graph = tf.compat.v1.get_default_graph()
|
| 103 |
+
tf.import_graph_def(graph_def, name="deepspeech")
|
| 104 |
+
logits_ph = graph.get_tensor_by_name("deepspeech/logits:0")
|
| 105 |
+
input_node_ph = graph.get_tensor_by_name("deepspeech/input_node:0")
|
| 106 |
+
input_lengths_ph = graph.get_tensor_by_name("deepspeech/input_lengths:0")
|
| 107 |
+
|
| 108 |
+
return graph, logits_ph, input_node_ph, input_lengths_ph
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def pure_conv_audio_to_deepspeech(audio,
|
| 112 |
+
audio_sample_rate,
|
| 113 |
+
audio_window_size,
|
| 114 |
+
audio_window_stride,
|
| 115 |
+
num_frames,
|
| 116 |
+
net_fn):
|
| 117 |
+
"""
|
| 118 |
+
Core routine for converting audion into DeepSpeech features.
|
| 119 |
+
|
| 120 |
+
Parameters
|
| 121 |
+
----------
|
| 122 |
+
audio : np.array
|
| 123 |
+
Audio data.
|
| 124 |
+
audio_sample_rate : int
|
| 125 |
+
Audio sample rate.
|
| 126 |
+
audio_window_size : int
|
| 127 |
+
Audio window size.
|
| 128 |
+
audio_window_stride : int
|
| 129 |
+
Audio window stride.
|
| 130 |
+
num_frames : int or None
|
| 131 |
+
Numbers of frames.
|
| 132 |
+
net_fn : func
|
| 133 |
+
Function for DeepSpeech model call.
|
| 134 |
+
|
| 135 |
+
Returns
|
| 136 |
+
-------
|
| 137 |
+
np.array
|
| 138 |
+
DeepSpeech features.
|
| 139 |
+
"""
|
| 140 |
+
target_sample_rate = 16000
|
| 141 |
+
if audio_sample_rate != target_sample_rate:
|
| 142 |
+
resampled_audio = resampy.resample(
|
| 143 |
+
x=audio.astype(np.float),
|
| 144 |
+
sr_orig=audio_sample_rate,
|
| 145 |
+
sr_new=target_sample_rate)
|
| 146 |
+
else:
|
| 147 |
+
resampled_audio = audio.astype(np.float32)
|
| 148 |
+
input_vector = conv_audio_to_deepspeech_input_vector(
|
| 149 |
+
audio=resampled_audio.astype(np.int16),
|
| 150 |
+
sample_rate=target_sample_rate,
|
| 151 |
+
num_cepstrum=26,
|
| 152 |
+
num_context=9)
|
| 153 |
+
|
| 154 |
+
network_output = net_fn(input_vector)
|
| 155 |
+
# print(network_output.shape)
|
| 156 |
+
|
| 157 |
+
deepspeech_fps = 50
|
| 158 |
+
video_fps = 50 # Change this option if video fps is different
|
| 159 |
+
audio_len_s = float(audio.shape[0]) / audio_sample_rate
|
| 160 |
+
if num_frames is None:
|
| 161 |
+
num_frames = int(round(audio_len_s * video_fps))
|
| 162 |
+
else:
|
| 163 |
+
video_fps = num_frames / audio_len_s
|
| 164 |
+
network_output = interpolate_features(
|
| 165 |
+
features=network_output[:, 0],
|
| 166 |
+
input_rate=deepspeech_fps,
|
| 167 |
+
output_rate=video_fps,
|
| 168 |
+
output_len=num_frames)
|
| 169 |
+
|
| 170 |
+
# Make windows:
|
| 171 |
+
zero_pad = np.zeros((int(audio_window_size / 2), network_output.shape[1]))
|
| 172 |
+
network_output = np.concatenate(
|
| 173 |
+
(zero_pad, network_output, zero_pad), axis=0)
|
| 174 |
+
windows = []
|
| 175 |
+
for window_index in range(0, network_output.shape[0] - audio_window_size, audio_window_stride):
|
| 176 |
+
windows.append(
|
| 177 |
+
network_output[window_index:window_index + audio_window_size])
|
| 178 |
+
|
| 179 |
+
return np.array(windows)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def conv_audio_to_deepspeech_input_vector(audio,
|
| 183 |
+
sample_rate,
|
| 184 |
+
num_cepstrum,
|
| 185 |
+
num_context):
|
| 186 |
+
"""
|
| 187 |
+
Convert audio raw data into DeepSpeech input vector.
|
| 188 |
+
|
| 189 |
+
Parameters
|
| 190 |
+
----------
|
| 191 |
+
audio : np.array
|
| 192 |
+
Audio data.
|
| 193 |
+
audio_sample_rate : int
|
| 194 |
+
Audio sample rate.
|
| 195 |
+
num_cepstrum : int
|
| 196 |
+
Number of cepstrum.
|
| 197 |
+
num_context : int
|
| 198 |
+
Number of context.
|
| 199 |
+
|
| 200 |
+
Returns
|
| 201 |
+
-------
|
| 202 |
+
np.array
|
| 203 |
+
DeepSpeech input vector.
|
| 204 |
+
"""
|
| 205 |
+
# Get mfcc coefficients:
|
| 206 |
+
features = mfcc(
|
| 207 |
+
signal=audio,
|
| 208 |
+
samplerate=sample_rate,
|
| 209 |
+
numcep=num_cepstrum)
|
| 210 |
+
|
| 211 |
+
# We only keep every second feature (BiRNN stride = 2):
|
| 212 |
+
features = features[::2]
|
| 213 |
+
|
| 214 |
+
# One stride per time step in the input:
|
| 215 |
+
num_strides = len(features)
|
| 216 |
+
|
| 217 |
+
# Add empty initial and final contexts:
|
| 218 |
+
empty_context = np.zeros((num_context, num_cepstrum), dtype=features.dtype)
|
| 219 |
+
features = np.concatenate((empty_context, features, empty_context))
|
| 220 |
+
|
| 221 |
+
# Create a view into the array with overlapping strides of size
|
| 222 |
+
# numcontext (past) + 1 (present) + numcontext (future):
|
| 223 |
+
window_size = 2 * num_context + 1
|
| 224 |
+
train_inputs = np.lib.stride_tricks.as_strided(
|
| 225 |
+
features,
|
| 226 |
+
shape=(num_strides, window_size, num_cepstrum),
|
| 227 |
+
strides=(features.strides[0],
|
| 228 |
+
features.strides[0], features.strides[1]),
|
| 229 |
+
writeable=False)
|
| 230 |
+
|
| 231 |
+
# Flatten the second and third dimensions:
|
| 232 |
+
train_inputs = np.reshape(train_inputs, [num_strides, -1])
|
| 233 |
+
|
| 234 |
+
train_inputs = np.copy(train_inputs)
|
| 235 |
+
train_inputs = (train_inputs - np.mean(train_inputs)) / \
|
| 236 |
+
np.std(train_inputs)
|
| 237 |
+
|
| 238 |
+
return train_inputs
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def interpolate_features(features,
|
| 242 |
+
input_rate,
|
| 243 |
+
output_rate,
|
| 244 |
+
output_len):
|
| 245 |
+
"""
|
| 246 |
+
Interpolate DeepSpeech features.
|
| 247 |
+
|
| 248 |
+
Parameters
|
| 249 |
+
----------
|
| 250 |
+
features : np.array
|
| 251 |
+
DeepSpeech features.
|
| 252 |
+
input_rate : int
|
| 253 |
+
input rate (FPS).
|
| 254 |
+
output_rate : int
|
| 255 |
+
Output rate (FPS).
|
| 256 |
+
output_len : int
|
| 257 |
+
Output data length.
|
| 258 |
+
|
| 259 |
+
Returns
|
| 260 |
+
-------
|
| 261 |
+
np.array
|
| 262 |
+
Interpolated data.
|
| 263 |
+
"""
|
| 264 |
+
input_len = features.shape[0]
|
| 265 |
+
num_features = features.shape[1]
|
| 266 |
+
input_timestamps = np.arange(input_len) / float(input_rate)
|
| 267 |
+
output_timestamps = np.arange(output_len) / float(output_rate)
|
| 268 |
+
output_features = np.zeros((output_len, num_features))
|
| 269 |
+
for feature_idx in range(num_features):
|
| 270 |
+
output_features[:, feature_idx] = np.interp(
|
| 271 |
+
x=output_timestamps,
|
| 272 |
+
xp=input_timestamps,
|
| 273 |
+
fp=features[:, feature_idx])
|
| 274 |
+
return output_features
|
data_utils/deepspeech_features/deepspeech_store.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Routines for loading DeepSpeech model.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
__all__ = ['get_deepspeech_model_file']
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import zipfile
|
| 9 |
+
import logging
|
| 10 |
+
import hashlib
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
deepspeech_features_repo_url = 'https://github.com/osmr/deepspeech_features'
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_deepspeech_model_file(local_model_store_dir_path=os.path.join("~", ".tensorflow", "models")):
|
| 17 |
+
"""
|
| 18 |
+
Return location for the pretrained on local file system. This function will download from online model zoo when
|
| 19 |
+
model cannot be found or has mismatch. The root directory will be created if it doesn't exist.
|
| 20 |
+
|
| 21 |
+
Parameters
|
| 22 |
+
----------
|
| 23 |
+
local_model_store_dir_path : str, default $TENSORFLOW_HOME/models
|
| 24 |
+
Location for keeping the model parameters.
|
| 25 |
+
|
| 26 |
+
Returns
|
| 27 |
+
-------
|
| 28 |
+
file_path
|
| 29 |
+
Path to the requested pretrained model file.
|
| 30 |
+
"""
|
| 31 |
+
sha1_hash = "b90017e816572ddce84f5843f1fa21e6a377975e"
|
| 32 |
+
file_name = "deepspeech-0_1_0-b90017e8.pb"
|
| 33 |
+
local_model_store_dir_path = os.path.expanduser(local_model_store_dir_path)
|
| 34 |
+
file_path = os.path.join(local_model_store_dir_path, file_name)
|
| 35 |
+
if os.path.exists(file_path):
|
| 36 |
+
if _check_sha1(file_path, sha1_hash):
|
| 37 |
+
return file_path
|
| 38 |
+
else:
|
| 39 |
+
logging.warning("Mismatch in the content of model file detected. Downloading again.")
|
| 40 |
+
else:
|
| 41 |
+
logging.info("Model file not found. Downloading to {}.".format(file_path))
|
| 42 |
+
|
| 43 |
+
if not os.path.exists(local_model_store_dir_path):
|
| 44 |
+
os.makedirs(local_model_store_dir_path)
|
| 45 |
+
|
| 46 |
+
zip_file_path = file_path + ".zip"
|
| 47 |
+
_download(
|
| 48 |
+
url="{repo_url}/releases/download/{repo_release_tag}/{file_name}.zip".format(
|
| 49 |
+
repo_url=deepspeech_features_repo_url,
|
| 50 |
+
repo_release_tag="v0.0.1",
|
| 51 |
+
file_name=file_name),
|
| 52 |
+
path=zip_file_path,
|
| 53 |
+
overwrite=True)
|
| 54 |
+
with zipfile.ZipFile(zip_file_path) as zf:
|
| 55 |
+
zf.extractall(local_model_store_dir_path)
|
| 56 |
+
os.remove(zip_file_path)
|
| 57 |
+
|
| 58 |
+
if _check_sha1(file_path, sha1_hash):
|
| 59 |
+
return file_path
|
| 60 |
+
else:
|
| 61 |
+
raise ValueError("Downloaded file has different hash. Please try again.")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _download(url, path=None, overwrite=False, sha1_hash=None, retries=5, verify_ssl=True):
|
| 65 |
+
"""
|
| 66 |
+
Download an given URL
|
| 67 |
+
|
| 68 |
+
Parameters
|
| 69 |
+
----------
|
| 70 |
+
url : str
|
| 71 |
+
URL to download
|
| 72 |
+
path : str, optional
|
| 73 |
+
Destination path to store downloaded file. By default stores to the
|
| 74 |
+
current directory with same name as in url.
|
| 75 |
+
overwrite : bool, optional
|
| 76 |
+
Whether to overwrite destination file if already exists.
|
| 77 |
+
sha1_hash : str, optional
|
| 78 |
+
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
|
| 79 |
+
but doesn't match.
|
| 80 |
+
retries : integer, default 5
|
| 81 |
+
The number of times to attempt the download in case of failure or non 200 return codes
|
| 82 |
+
verify_ssl : bool, default True
|
| 83 |
+
Verify SSL certificates.
|
| 84 |
+
|
| 85 |
+
Returns
|
| 86 |
+
-------
|
| 87 |
+
str
|
| 88 |
+
The file path of the downloaded file.
|
| 89 |
+
"""
|
| 90 |
+
import warnings
|
| 91 |
+
try:
|
| 92 |
+
import requests
|
| 93 |
+
except ImportError:
|
| 94 |
+
class requests_failed_to_import(object):
|
| 95 |
+
pass
|
| 96 |
+
requests = requests_failed_to_import
|
| 97 |
+
|
| 98 |
+
if path is None:
|
| 99 |
+
fname = url.split("/")[-1]
|
| 100 |
+
# Empty filenames are invalid
|
| 101 |
+
assert fname, "Can't construct file-name from this URL. Please set the `path` option manually."
|
| 102 |
+
else:
|
| 103 |
+
path = os.path.expanduser(path)
|
| 104 |
+
if os.path.isdir(path):
|
| 105 |
+
fname = os.path.join(path, url.split("/")[-1])
|
| 106 |
+
else:
|
| 107 |
+
fname = path
|
| 108 |
+
assert retries >= 0, "Number of retries should be at least 0"
|
| 109 |
+
|
| 110 |
+
if not verify_ssl:
|
| 111 |
+
warnings.warn(
|
| 112 |
+
"Unverified HTTPS request is being made (verify_ssl=False). "
|
| 113 |
+
"Adding certificate verification is strongly advised.")
|
| 114 |
+
|
| 115 |
+
if overwrite or not os.path.exists(fname) or (sha1_hash and not _check_sha1(fname, sha1_hash)):
|
| 116 |
+
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
|
| 117 |
+
if not os.path.exists(dirname):
|
| 118 |
+
os.makedirs(dirname)
|
| 119 |
+
while retries + 1 > 0:
|
| 120 |
+
# Disable pyling too broad Exception
|
| 121 |
+
# pylint: disable=W0703
|
| 122 |
+
try:
|
| 123 |
+
print("Downloading {} from {}...".format(fname, url))
|
| 124 |
+
r = requests.get(url, stream=True, verify=verify_ssl)
|
| 125 |
+
if r.status_code != 200:
|
| 126 |
+
raise RuntimeError("Failed downloading url {}".format(url))
|
| 127 |
+
with open(fname, "wb") as f:
|
| 128 |
+
for chunk in r.iter_content(chunk_size=1024):
|
| 129 |
+
if chunk: # filter out keep-alive new chunks
|
| 130 |
+
f.write(chunk)
|
| 131 |
+
if sha1_hash and not _check_sha1(fname, sha1_hash):
|
| 132 |
+
raise UserWarning("File {} is downloaded but the content hash does not match."
|
| 133 |
+
" The repo may be outdated or download may be incomplete. "
|
| 134 |
+
"If the `repo_url` is overridden, consider switching to "
|
| 135 |
+
"the default repo.".format(fname))
|
| 136 |
+
break
|
| 137 |
+
except Exception as e:
|
| 138 |
+
retries -= 1
|
| 139 |
+
if retries <= 0:
|
| 140 |
+
raise e
|
| 141 |
+
else:
|
| 142 |
+
print("download failed, retrying, {} attempt{} left"
|
| 143 |
+
.format(retries, "s" if retries > 1 else ""))
|
| 144 |
+
|
| 145 |
+
return fname
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _check_sha1(filename, sha1_hash):
|
| 149 |
+
"""
|
| 150 |
+
Check whether the sha1 hash of the file content matches the expected hash.
|
| 151 |
+
|
| 152 |
+
Parameters
|
| 153 |
+
----------
|
| 154 |
+
filename : str
|
| 155 |
+
Path to the file.
|
| 156 |
+
sha1_hash : str
|
| 157 |
+
Expected sha1 hash in hexadecimal digits.
|
| 158 |
+
|
| 159 |
+
Returns
|
| 160 |
+
-------
|
| 161 |
+
bool
|
| 162 |
+
Whether the file content matches the expected hash.
|
| 163 |
+
"""
|
| 164 |
+
sha1 = hashlib.sha1()
|
| 165 |
+
with open(filename, "rb") as f:
|
| 166 |
+
while True:
|
| 167 |
+
data = f.read(1048576)
|
| 168 |
+
if not data:
|
| 169 |
+
break
|
| 170 |
+
sha1.update(data)
|
| 171 |
+
|
| 172 |
+
return sha1.hexdigest() == sha1_hash
|
data_utils/deepspeech_features/extract_ds_features.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Script for extracting DeepSpeech features from audio file.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import argparse
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from deepspeech_store import get_deepspeech_model_file
|
| 10 |
+
from deepspeech_features import conv_audios_to_deepspeech
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def parse_args():
|
| 14 |
+
"""
|
| 15 |
+
Create python script parameters.
|
| 16 |
+
Returns
|
| 17 |
+
-------
|
| 18 |
+
ArgumentParser
|
| 19 |
+
Resulted args.
|
| 20 |
+
"""
|
| 21 |
+
parser = argparse.ArgumentParser(
|
| 22 |
+
description="Extract DeepSpeech features from audio file",
|
| 23 |
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 24 |
+
parser.add_argument(
|
| 25 |
+
"--input",
|
| 26 |
+
type=str,
|
| 27 |
+
required=True,
|
| 28 |
+
help="path to input audio file or directory")
|
| 29 |
+
parser.add_argument(
|
| 30 |
+
"--output",
|
| 31 |
+
type=str,
|
| 32 |
+
help="path to output file with DeepSpeech features")
|
| 33 |
+
parser.add_argument(
|
| 34 |
+
"--deepspeech",
|
| 35 |
+
type=str,
|
| 36 |
+
help="path to DeepSpeech 0.1.0 frozen model")
|
| 37 |
+
parser.add_argument(
|
| 38 |
+
"--metainfo",
|
| 39 |
+
type=str,
|
| 40 |
+
help="path to file with meta-information")
|
| 41 |
+
|
| 42 |
+
args = parser.parse_args()
|
| 43 |
+
return args
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def extract_features(in_audios,
|
| 47 |
+
out_files,
|
| 48 |
+
deepspeech_pb_path,
|
| 49 |
+
metainfo_file_path=None):
|
| 50 |
+
"""
|
| 51 |
+
Real extract audio from video file.
|
| 52 |
+
Parameters
|
| 53 |
+
----------
|
| 54 |
+
in_audios : list of str
|
| 55 |
+
Paths to input audio files.
|
| 56 |
+
out_files : list of str
|
| 57 |
+
Paths to output files with DeepSpeech features.
|
| 58 |
+
deepspeech_pb_path : str
|
| 59 |
+
Path to DeepSpeech 0.1.0 frozen model.
|
| 60 |
+
metainfo_file_path : str, default None
|
| 61 |
+
Path to file with meta-information.
|
| 62 |
+
"""
|
| 63 |
+
if metainfo_file_path is None:
|
| 64 |
+
num_frames_info = [None] * len(in_audios)
|
| 65 |
+
else:
|
| 66 |
+
train_df = pd.read_csv(
|
| 67 |
+
metainfo_file_path,
|
| 68 |
+
sep="\t",
|
| 69 |
+
index_col=False,
|
| 70 |
+
dtype={"Id": np.int, "File": np.unicode, "Count": np.int})
|
| 71 |
+
num_frames_info = train_df["Count"].values
|
| 72 |
+
assert (len(num_frames_info) == len(in_audios))
|
| 73 |
+
|
| 74 |
+
for i, in_audio in enumerate(in_audios):
|
| 75 |
+
if not out_files[i]:
|
| 76 |
+
file_stem, _ = os.path.splitext(in_audio)
|
| 77 |
+
out_files[i] = file_stem + ".npy"
|
| 78 |
+
#print(out_files[i])
|
| 79 |
+
conv_audios_to_deepspeech(
|
| 80 |
+
audios=in_audios,
|
| 81 |
+
out_files=out_files,
|
| 82 |
+
num_frames_info=num_frames_info,
|
| 83 |
+
deepspeech_pb_path=deepspeech_pb_path)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def main():
|
| 87 |
+
"""
|
| 88 |
+
Main body of script.
|
| 89 |
+
"""
|
| 90 |
+
args = parse_args()
|
| 91 |
+
in_audio = os.path.expanduser(args.input)
|
| 92 |
+
if not os.path.exists(in_audio):
|
| 93 |
+
raise Exception("Input file/directory doesn't exist: {}".format(in_audio))
|
| 94 |
+
deepspeech_pb_path = args.deepspeech
|
| 95 |
+
#add
|
| 96 |
+
deepspeech_pb_path = True
|
| 97 |
+
args.deepspeech = '~/.tensorflow/models/deepspeech-0_1_0-b90017e8.pb'
|
| 98 |
+
if deepspeech_pb_path is None:
|
| 99 |
+
deepspeech_pb_path = ""
|
| 100 |
+
if deepspeech_pb_path:
|
| 101 |
+
deepspeech_pb_path = os.path.expanduser(args.deepspeech)
|
| 102 |
+
if not os.path.exists(deepspeech_pb_path):
|
| 103 |
+
deepspeech_pb_path = get_deepspeech_model_file()
|
| 104 |
+
if os.path.isfile(in_audio):
|
| 105 |
+
extract_features(
|
| 106 |
+
in_audios=[in_audio],
|
| 107 |
+
out_files=[args.output],
|
| 108 |
+
deepspeech_pb_path=deepspeech_pb_path,
|
| 109 |
+
metainfo_file_path=args.metainfo)
|
| 110 |
+
else:
|
| 111 |
+
audio_file_paths = []
|
| 112 |
+
for file_name in os.listdir(in_audio):
|
| 113 |
+
if not os.path.isfile(os.path.join(in_audio, file_name)):
|
| 114 |
+
continue
|
| 115 |
+
_, file_ext = os.path.splitext(file_name)
|
| 116 |
+
if file_ext.lower() == ".wav":
|
| 117 |
+
audio_file_path = os.path.join(in_audio, file_name)
|
| 118 |
+
audio_file_paths.append(audio_file_path)
|
| 119 |
+
audio_file_paths = sorted(audio_file_paths)
|
| 120 |
+
out_file_paths = [""] * len(audio_file_paths)
|
| 121 |
+
extract_features(
|
| 122 |
+
in_audios=audio_file_paths,
|
| 123 |
+
out_files=out_file_paths,
|
| 124 |
+
deepspeech_pb_path=deepspeech_pb_path,
|
| 125 |
+
metainfo_file_path=args.metainfo)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
main()
|
| 130 |
+
|
data_utils/deepspeech_features/extract_wav.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Script for extracting audio (16-bit, mono, 22000 Hz) from video file.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import argparse
|
| 7 |
+
import subprocess
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def parse_args():
|
| 11 |
+
"""
|
| 12 |
+
Create python script parameters.
|
| 13 |
+
|
| 14 |
+
Returns
|
| 15 |
+
-------
|
| 16 |
+
ArgumentParser
|
| 17 |
+
Resulted args.
|
| 18 |
+
"""
|
| 19 |
+
parser = argparse.ArgumentParser(
|
| 20 |
+
description="Extract audio from video file",
|
| 21 |
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 22 |
+
parser.add_argument(
|
| 23 |
+
"--in-video",
|
| 24 |
+
type=str,
|
| 25 |
+
required=True,
|
| 26 |
+
help="path to input video file or directory")
|
| 27 |
+
parser.add_argument(
|
| 28 |
+
"--out-audio",
|
| 29 |
+
type=str,
|
| 30 |
+
help="path to output audio file")
|
| 31 |
+
|
| 32 |
+
args = parser.parse_args()
|
| 33 |
+
return args
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def extract_audio(in_video,
|
| 37 |
+
out_audio):
|
| 38 |
+
"""
|
| 39 |
+
Real extract audio from video file.
|
| 40 |
+
|
| 41 |
+
Parameters
|
| 42 |
+
----------
|
| 43 |
+
in_video : str
|
| 44 |
+
Path to input video file.
|
| 45 |
+
out_audio : str
|
| 46 |
+
Path to output audio file.
|
| 47 |
+
"""
|
| 48 |
+
if not out_audio:
|
| 49 |
+
file_stem, _ = os.path.splitext(in_video)
|
| 50 |
+
out_audio = file_stem + ".wav"
|
| 51 |
+
# command1 = "ffmpeg -i {in_video} -vn -acodec copy {aac_audio}"
|
| 52 |
+
# command2 = "ffmpeg -i {aac_audio} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}"
|
| 53 |
+
# command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}"
|
| 54 |
+
command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 16000 {out_audio}"
|
| 55 |
+
subprocess.call([command.format(in_video=in_video, out_audio=out_audio)], shell=True)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def main():
|
| 59 |
+
"""
|
| 60 |
+
Main body of script.
|
| 61 |
+
"""
|
| 62 |
+
args = parse_args()
|
| 63 |
+
in_video = os.path.expanduser(args.in_video)
|
| 64 |
+
if not os.path.exists(in_video):
|
| 65 |
+
raise Exception("Input file/directory doesn't exist: {}".format(in_video))
|
| 66 |
+
if os.path.isfile(in_video):
|
| 67 |
+
extract_audio(
|
| 68 |
+
in_video=in_video,
|
| 69 |
+
out_audio=args.out_audio)
|
| 70 |
+
else:
|
| 71 |
+
video_file_paths = []
|
| 72 |
+
for file_name in os.listdir(in_video):
|
| 73 |
+
if not os.path.isfile(os.path.join(in_video, file_name)):
|
| 74 |
+
continue
|
| 75 |
+
_, file_ext = os.path.splitext(file_name)
|
| 76 |
+
if file_ext.lower() in (".mp4", ".mkv", ".avi"):
|
| 77 |
+
video_file_path = os.path.join(in_video, file_name)
|
| 78 |
+
video_file_paths.append(video_file_path)
|
| 79 |
+
video_file_paths = sorted(video_file_paths)
|
| 80 |
+
for video_file_path in video_file_paths:
|
| 81 |
+
extract_audio(
|
| 82 |
+
in_video=video_file_path,
|
| 83 |
+
out_audio="")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
main()
|
data_utils/deepspeech_features/fea_win.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
net_output = np.load('french.ds.npy').reshape(-1, 29)
|
| 4 |
+
win_size = 16
|
| 5 |
+
zero_pad = np.zeros((int(win_size / 2), net_output.shape[1]))
|
| 6 |
+
net_output = np.concatenate((zero_pad, net_output, zero_pad), axis=0)
|
| 7 |
+
windows = []
|
| 8 |
+
for window_index in range(0, net_output.shape[0] - win_size, 2):
|
| 9 |
+
windows.append(net_output[window_index:window_index + win_size])
|
| 10 |
+
print(np.array(windows).shape)
|
| 11 |
+
np.save('aud_french.npy', np.array(windows))
|
data_utils/easyportrait/create_teeth_mask.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from argparse import ArgumentParser
|
| 3 |
+
|
| 4 |
+
from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import glob
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
def main():
|
| 12 |
+
parser = ArgumentParser()
|
| 13 |
+
parser.add_argument('datset', help='Image file')
|
| 14 |
+
parser.add_argument('--config', default="./data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fpn-fp/fpn-fp.py", help='Config file')
|
| 15 |
+
parser.add_argument('--checkpoint', default="./data_utils/easyportrait/fpn-fp-512.pth", help='Checkpoint file')
|
| 16 |
+
|
| 17 |
+
args = parser.parse_args()
|
| 18 |
+
|
| 19 |
+
# build the model from a config file and a checkpoint file
|
| 20 |
+
model = init_segmentor(args.config, args.checkpoint, device='cuda:0')
|
| 21 |
+
|
| 22 |
+
# test a single image
|
| 23 |
+
dataset_path = os.path.join(args.datset, 'ori_imgs')
|
| 24 |
+
out_path = os.path.join(args.datset, 'teeth_mask')
|
| 25 |
+
os.makedirs(out_path, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
for file in tqdm(glob.glob(os.path.join(dataset_path, '*.jpg'))):
|
| 28 |
+
result = inference_segmentor(model, file)
|
| 29 |
+
result[0][result[0]!=7] = 0
|
| 30 |
+
np.save(file.replace('jpg', 'npy').replace('ori_imgs', 'teeth_mask'), result[0].astype(np.bool_))
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if __name__ == '__main__':
|
| 34 |
+
main()
|
data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_1024x1024.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# dataset settings
|
| 2 |
+
dataset_type = 'EasyPortraitDataset'
|
| 3 |
+
data_root = 'path/to/data/EasyPortrait'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
|
| 7 |
+
train_pipeline = [
|
| 8 |
+
dict(type='LoadImageFromFile'),
|
| 9 |
+
dict(type='LoadAnnotations'),
|
| 10 |
+
dict(type='Pad', size=(1920, 1920), pad_val=0, seg_pad_val=255),
|
| 11 |
+
dict(type='Resize', img_scale=(1024, 1024)),
|
| 12 |
+
|
| 13 |
+
# We don't use RandomFlip, but need it in the code to fix error: https://github.com/open-mmlab/mmsegmentation/issues/231
|
| 14 |
+
dict(type='RandomFlip', prob=0.0),
|
| 15 |
+
dict(type='PhotoMetricDistortion',
|
| 16 |
+
brightness_delta=16,
|
| 17 |
+
contrast_range=(0.5, 1.0),
|
| 18 |
+
saturation_range=(0.5, 1.0),
|
| 19 |
+
hue_delta=9),
|
| 20 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 21 |
+
dict(type='DefaultFormatBundle'),
|
| 22 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
test_pipeline = [
|
| 26 |
+
dict(type='LoadImageFromFile'),
|
| 27 |
+
dict(
|
| 28 |
+
type='MultiScaleFlipAug',
|
| 29 |
+
img_scale=(1024, 1024),
|
| 30 |
+
flip=False,
|
| 31 |
+
transforms=[
|
| 32 |
+
dict(type='Resize', keep_ratio=True),
|
| 33 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 34 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 35 |
+
dict(type='Collect', keys=['img']),
|
| 36 |
+
])
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
data = dict(
|
| 40 |
+
samples_per_gpu=4,
|
| 41 |
+
workers_per_gpu=4,
|
| 42 |
+
train=dict(
|
| 43 |
+
type=dataset_type,
|
| 44 |
+
data_root=data_root,
|
| 45 |
+
img_dir='images/train',
|
| 46 |
+
ann_dir='annotations/train',
|
| 47 |
+
pipeline=train_pipeline),
|
| 48 |
+
val=dict(
|
| 49 |
+
type=dataset_type,
|
| 50 |
+
data_root=data_root,
|
| 51 |
+
img_dir='images/val',
|
| 52 |
+
ann_dir='annotations/val',
|
| 53 |
+
pipeline=test_pipeline),
|
| 54 |
+
test=dict(
|
| 55 |
+
type=dataset_type,
|
| 56 |
+
data_root=data_root,
|
| 57 |
+
img_dir='images/test',
|
| 58 |
+
ann_dir='annotations/test',
|
| 59 |
+
pipeline=test_pipeline))
|
data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_384x384.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# dataset settings
|
| 2 |
+
dataset_type = 'EasyPortraitDataset'
|
| 3 |
+
data_root = 'path/to/data/EasyPortrait'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
|
| 7 |
+
train_pipeline = [
|
| 8 |
+
dict(type='LoadImageFromFile'),
|
| 9 |
+
dict(type='LoadAnnotations'),
|
| 10 |
+
dict(type='Pad', size=(1920, 1920), pad_val=0, seg_pad_val=255),
|
| 11 |
+
dict(type='Resize', img_scale=(384, 384)),
|
| 12 |
+
|
| 13 |
+
# We don't use RandomFlip, but need it in the code to fix error: https://github.com/open-mmlab/mmsegmentation/issues/231
|
| 14 |
+
dict(type='RandomFlip', prob=0.0),
|
| 15 |
+
dict(type='PhotoMetricDistortion',
|
| 16 |
+
brightness_delta=16,
|
| 17 |
+
contrast_range=(0.5, 1.0),
|
| 18 |
+
saturation_range=(0.5, 1.0),
|
| 19 |
+
hue_delta=9),
|
| 20 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 21 |
+
dict(type='DefaultFormatBundle'),
|
| 22 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
test_pipeline = [
|
| 26 |
+
dict(type='LoadImageFromFile'),
|
| 27 |
+
dict(
|
| 28 |
+
type='MultiScaleFlipAug',
|
| 29 |
+
img_scale=(384, 384),
|
| 30 |
+
flip=False,
|
| 31 |
+
transforms=[
|
| 32 |
+
dict(type='Resize', keep_ratio=True),
|
| 33 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 34 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 35 |
+
dict(type='Collect', keys=['img']),
|
| 36 |
+
])
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
data = dict(
|
| 40 |
+
samples_per_gpu=4,
|
| 41 |
+
workers_per_gpu=4,
|
| 42 |
+
train=dict(
|
| 43 |
+
type=dataset_type,
|
| 44 |
+
data_root=data_root,
|
| 45 |
+
img_dir='images/train',
|
| 46 |
+
ann_dir='annotations/train',
|
| 47 |
+
pipeline=train_pipeline),
|
| 48 |
+
val=dict(
|
| 49 |
+
type=dataset_type,
|
| 50 |
+
data_root=data_root,
|
| 51 |
+
img_dir='images/val',
|
| 52 |
+
ann_dir='annotations/val',
|
| 53 |
+
pipeline=test_pipeline),
|
| 54 |
+
test=dict(
|
| 55 |
+
type=dataset_type,
|
| 56 |
+
data_root=data_root,
|
| 57 |
+
img_dir='images/test',
|
| 58 |
+
ann_dir='annotations/test',
|
| 59 |
+
pipeline=test_pipeline))
|
data_utils/easyportrait/local_configs/__base__/datasets/easyportrait_512x512.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# dataset settings
|
| 2 |
+
dataset_type = 'EasyPortraitDataset'
|
| 3 |
+
data_root = 'path/to/data/EasyPortrait'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
|
| 7 |
+
train_pipeline = [
|
| 8 |
+
dict(type='LoadImageFromFile'),
|
| 9 |
+
dict(type='LoadAnnotations'),
|
| 10 |
+
dict(type='Pad', size=(1920, 1920), pad_val=0, seg_pad_val=255),
|
| 11 |
+
dict(type='Resize', img_scale=(512, 512)),
|
| 12 |
+
|
| 13 |
+
# We don't use RandomFlip, but need it in the code to fix error: https://github.com/open-mmlab/mmsegmentation/issues/231
|
| 14 |
+
dict(type='RandomFlip', prob=0.0),
|
| 15 |
+
dict(type='PhotoMetricDistortion',
|
| 16 |
+
brightness_delta=16,
|
| 17 |
+
contrast_range=(0.5, 1.0),
|
| 18 |
+
saturation_range=(0.5, 1.0),
|
| 19 |
+
hue_delta=9),
|
| 20 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 21 |
+
dict(type='DefaultFormatBundle'),
|
| 22 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
test_pipeline = [
|
| 26 |
+
dict(type='LoadImageFromFile'),
|
| 27 |
+
dict(
|
| 28 |
+
type='MultiScaleFlipAug',
|
| 29 |
+
img_scale=(512, 512),
|
| 30 |
+
flip=False,
|
| 31 |
+
transforms=[
|
| 32 |
+
dict(type='Resize', keep_ratio=True),
|
| 33 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 34 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 35 |
+
dict(type='Collect', keys=['img']),
|
| 36 |
+
])
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
data = dict(
|
| 40 |
+
samples_per_gpu=4,
|
| 41 |
+
workers_per_gpu=4,
|
| 42 |
+
train=dict(
|
| 43 |
+
type=dataset_type,
|
| 44 |
+
data_root=data_root,
|
| 45 |
+
img_dir='images/train',
|
| 46 |
+
ann_dir='annotations/train',
|
| 47 |
+
pipeline=train_pipeline),
|
| 48 |
+
val=dict(
|
| 49 |
+
type=dataset_type,
|
| 50 |
+
data_root=data_root,
|
| 51 |
+
img_dir='images/val',
|
| 52 |
+
ann_dir='annotations/val',
|
| 53 |
+
pipeline=test_pipeline),
|
| 54 |
+
test=dict(
|
| 55 |
+
type=dataset_type,
|
| 56 |
+
data_root=data_root,
|
| 57 |
+
img_dir='images/test',
|
| 58 |
+
ann_dir='annotations/test',
|
| 59 |
+
pipeline=test_pipeline))
|
data_utils/easyportrait/local_configs/__base__/default_runtime.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# yapf:disable
|
| 2 |
+
log_config = dict(
|
| 3 |
+
interval=50,
|
| 4 |
+
hooks=[
|
| 5 |
+
dict(type='TextLoggerHook', by_epoch=False),
|
| 6 |
+
# dict(type='TensorboardLoggerHook')
|
| 7 |
+
])
|
| 8 |
+
# yapf:enable
|
| 9 |
+
dist_params = dict(backend='nccl')
|
| 10 |
+
log_level = 'INFO'
|
| 11 |
+
load_from = None
|
| 12 |
+
resume_from = None
|
| 13 |
+
workflow = [('train', 1)]
|
| 14 |
+
cudnn_benchmark = True
|
data_utils/easyportrait/local_configs/__base__/models/bisenetv2.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='BiSeNetV2',
|
| 8 |
+
detail_channels=(64, 64, 128),
|
| 9 |
+
semantic_channels=(16, 32, 64, 128),
|
| 10 |
+
semantic_expansion_ratio=6,
|
| 11 |
+
bga_channels=128,
|
| 12 |
+
out_indices=(0, 1, 2, 3, 4),
|
| 13 |
+
init_cfg=None,
|
| 14 |
+
align_corners=False),
|
| 15 |
+
decode_head=dict(
|
| 16 |
+
type='FCNHead',
|
| 17 |
+
in_channels=128,
|
| 18 |
+
in_index=0,
|
| 19 |
+
channels=1024,
|
| 20 |
+
num_convs=1,
|
| 21 |
+
concat_input=False,
|
| 22 |
+
dropout_ratio=0.1,
|
| 23 |
+
num_classes=19,
|
| 24 |
+
norm_cfg=norm_cfg,
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 28 |
+
auxiliary_head=[
|
| 29 |
+
dict(
|
| 30 |
+
type='FCNHead',
|
| 31 |
+
in_channels=16,
|
| 32 |
+
channels=16,
|
| 33 |
+
num_convs=2,
|
| 34 |
+
num_classes=19,
|
| 35 |
+
in_index=1,
|
| 36 |
+
norm_cfg=norm_cfg,
|
| 37 |
+
concat_input=False,
|
| 38 |
+
align_corners=False,
|
| 39 |
+
loss_decode=dict(
|
| 40 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 41 |
+
dict(
|
| 42 |
+
type='FCNHead',
|
| 43 |
+
in_channels=32,
|
| 44 |
+
channels=64,
|
| 45 |
+
num_convs=2,
|
| 46 |
+
num_classes=19,
|
| 47 |
+
in_index=2,
|
| 48 |
+
norm_cfg=norm_cfg,
|
| 49 |
+
concat_input=False,
|
| 50 |
+
align_corners=False,
|
| 51 |
+
loss_decode=dict(
|
| 52 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 53 |
+
dict(
|
| 54 |
+
type='FCNHead',
|
| 55 |
+
in_channels=64,
|
| 56 |
+
channels=256,
|
| 57 |
+
num_convs=2,
|
| 58 |
+
num_classes=19,
|
| 59 |
+
in_index=3,
|
| 60 |
+
norm_cfg=norm_cfg,
|
| 61 |
+
concat_input=False,
|
| 62 |
+
align_corners=False,
|
| 63 |
+
loss_decode=dict(
|
| 64 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 65 |
+
dict(
|
| 66 |
+
type='FCNHead',
|
| 67 |
+
in_channels=128,
|
| 68 |
+
channels=1024,
|
| 69 |
+
num_convs=2,
|
| 70 |
+
num_classes=19,
|
| 71 |
+
in_index=4,
|
| 72 |
+
norm_cfg=norm_cfg,
|
| 73 |
+
concat_input=False,
|
| 74 |
+
align_corners=False,
|
| 75 |
+
loss_decode=dict(
|
| 76 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 77 |
+
],
|
| 78 |
+
# model training and testing settings
|
| 79 |
+
train_cfg=dict(),
|
| 80 |
+
test_cfg=dict(mode='whole'))
|
data_utils/easyportrait/local_configs/__base__/models/fcn_resnet50.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='ResNetV1c',
|
| 8 |
+
depth=50,
|
| 9 |
+
num_stages=4,
|
| 10 |
+
out_indices=(0, 1, 2, 3),
|
| 11 |
+
dilations=(1, 1, 2, 4),
|
| 12 |
+
strides=(1, 2, 1, 1),
|
| 13 |
+
norm_cfg=norm_cfg,
|
| 14 |
+
norm_eval=False,
|
| 15 |
+
style='pytorch',
|
| 16 |
+
contract_dilation=True),
|
| 17 |
+
decode_head=dict(
|
| 18 |
+
type='FCNHead',
|
| 19 |
+
in_channels=2048,
|
| 20 |
+
in_index=3,
|
| 21 |
+
channels=512,
|
| 22 |
+
num_convs=2,
|
| 23 |
+
concat_input=True,
|
| 24 |
+
dropout_ratio=0.1,
|
| 25 |
+
num_classes=19,
|
| 26 |
+
norm_cfg=norm_cfg,
|
| 27 |
+
align_corners=False,
|
| 28 |
+
loss_decode=dict(
|
| 29 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 30 |
+
auxiliary_head=dict(
|
| 31 |
+
type='FCNHead',
|
| 32 |
+
in_channels=1024,
|
| 33 |
+
in_index=2,
|
| 34 |
+
channels=256,
|
| 35 |
+
num_convs=1,
|
| 36 |
+
concat_input=False,
|
| 37 |
+
dropout_ratio=0.1,
|
| 38 |
+
num_classes=19,
|
| 39 |
+
norm_cfg=norm_cfg,
|
| 40 |
+
align_corners=False,
|
| 41 |
+
loss_decode=dict(
|
| 42 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 43 |
+
# model training and testing settings
|
| 44 |
+
train_cfg=dict(),
|
| 45 |
+
test_cfg=dict(mode='whole'))
|
data_utils/easyportrait/local_configs/__base__/models/fpn_resnet50.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='ResNetV1c',
|
| 8 |
+
depth=50,
|
| 9 |
+
num_stages=4,
|
| 10 |
+
out_indices=(0, 1, 2, 3),
|
| 11 |
+
dilations=(1, 1, 1, 1),
|
| 12 |
+
strides=(1, 2, 2, 2),
|
| 13 |
+
norm_cfg=norm_cfg,
|
| 14 |
+
norm_eval=False,
|
| 15 |
+
style='pytorch',
|
| 16 |
+
contract_dilation=True),
|
| 17 |
+
neck=dict(
|
| 18 |
+
type='FPN',
|
| 19 |
+
in_channels=[256, 512, 1024, 2048],
|
| 20 |
+
out_channels=256,
|
| 21 |
+
num_outs=4),
|
| 22 |
+
decode_head=dict(
|
| 23 |
+
type='FPNHead',
|
| 24 |
+
in_channels=[256, 256, 256, 256],
|
| 25 |
+
in_index=[0, 1, 2, 3],
|
| 26 |
+
feature_strides=[4, 8, 16, 32],
|
| 27 |
+
channels=128,
|
| 28 |
+
dropout_ratio=0.1,
|
| 29 |
+
num_classes=19,
|
| 30 |
+
norm_cfg=norm_cfg,
|
| 31 |
+
align_corners=False,
|
| 32 |
+
loss_decode=dict(
|
| 33 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 34 |
+
# model training and testing settings
|
| 35 |
+
train_cfg=dict(),
|
| 36 |
+
test_cfg=dict(mode='whole'))
|
data_utils/easyportrait/local_configs/__base__/models/lraspp.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='MobileNetV3',
|
| 7 |
+
arch='large',
|
| 8 |
+
out_indices=(1, 3, 16),
|
| 9 |
+
norm_cfg=norm_cfg),
|
| 10 |
+
decode_head=dict(
|
| 11 |
+
type='LRASPPHead',
|
| 12 |
+
in_channels=(16, 24, 960),
|
| 13 |
+
in_index=(0, 1, 2),
|
| 14 |
+
channels=128,
|
| 15 |
+
input_transform='multiple_select',
|
| 16 |
+
dropout_ratio=0.1,
|
| 17 |
+
num_classes=19,
|
| 18 |
+
norm_cfg=norm_cfg,
|
| 19 |
+
act_cfg=dict(type='ReLU'),
|
| 20 |
+
align_corners=False,
|
| 21 |
+
loss_decode=dict(
|
| 22 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 23 |
+
# model training and testing settings
|
| 24 |
+
train_cfg=dict(),
|
| 25 |
+
test_cfg=dict(mode='whole'))
|
data_utils/easyportrait/local_configs/__base__/models/segformer.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MixVisionTransformer',
|
| 8 |
+
in_channels=3,
|
| 9 |
+
embed_dims=32,
|
| 10 |
+
num_stages=4,
|
| 11 |
+
num_layers=[2, 2, 2, 2],
|
| 12 |
+
num_heads=[1, 2, 5, 8],
|
| 13 |
+
patch_sizes=[7, 3, 3, 3],
|
| 14 |
+
sr_ratios=[8, 4, 2, 1],
|
| 15 |
+
out_indices=(0, 1, 2, 3),
|
| 16 |
+
mlp_ratio=4,
|
| 17 |
+
qkv_bias=True,
|
| 18 |
+
drop_rate=0.0,
|
| 19 |
+
attn_drop_rate=0.0,
|
| 20 |
+
drop_path_rate=0.1),
|
| 21 |
+
decode_head=dict(
|
| 22 |
+
type='SegformerHead',
|
| 23 |
+
in_channels=[32, 64, 160, 256],
|
| 24 |
+
in_index=[0, 1, 2, 3],
|
| 25 |
+
channels=256,
|
| 26 |
+
dropout_ratio=0.1,
|
| 27 |
+
num_classes=19,
|
| 28 |
+
norm_cfg=norm_cfg,
|
| 29 |
+
align_corners=False,
|
| 30 |
+
loss_decode=dict(
|
| 31 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 32 |
+
# model training and testing settings
|
| 33 |
+
train_cfg=dict(),
|
| 34 |
+
test_cfg=dict(mode='whole'))
|
data_utils/easyportrait/local_configs/__base__/schedules/schedule_10k_adamw.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimizer
|
| 2 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 3 |
+
optimizer_config = dict()
|
| 4 |
+
|
| 5 |
+
# learning policy
|
| 6 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
| 7 |
+
|
| 8 |
+
# runtime settings
|
| 9 |
+
runner = dict(type='IterBasedRunner', max_iters=10000)
|
| 10 |
+
checkpoint_config = dict(by_epoch=False, interval=2000)
|
| 11 |
+
evaluation = dict(interval=2000, metric='mIoU')
|
data_utils/easyportrait/local_configs/__base__/schedules/schedule_160k_adamw.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimizer
|
| 2 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 3 |
+
optimizer_config = dict()
|
| 4 |
+
# learning policy
|
| 5 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
| 6 |
+
# runtime settings
|
| 7 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 8 |
+
checkpoint_config = dict(by_epoch=False, interval=4000)
|
| 9 |
+
evaluation = dict(interval=4000, metric='mIoU')
|
data_utils/easyportrait/local_configs/__base__/schedules/schedule_20k_adamw.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimizer
|
| 2 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 3 |
+
optimizer_config = dict()
|
| 4 |
+
|
| 5 |
+
# learning policy
|
| 6 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
| 7 |
+
|
| 8 |
+
# runtime settings
|
| 9 |
+
runner = dict(type='IterBasedRunner', max_iters=20000)
|
| 10 |
+
checkpoint_config = dict(by_epoch=False, interval=2000)
|
| 11 |
+
evaluation = dict(interval=2000, metric='mIoU')
|
data_utils/easyportrait/local_configs/__base__/schedules/schedule_40k_adamw.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimizer
|
| 2 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 3 |
+
optimizer_config = dict()
|
| 4 |
+
# learning policy
|
| 5 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
| 6 |
+
# runtime settings
|
| 7 |
+
runner = dict(type='IterBasedRunner', max_iters=40000)
|
| 8 |
+
checkpoint_config = dict(by_epoch=False, interval=4000)
|
| 9 |
+
evaluation = dict(interval=4000, metric='mIoU')
|
data_utils/easyportrait/local_configs/__base__/schedules/schedule_80k_adamw.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# optimizer
|
| 2 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 3 |
+
optimizer_config = dict()
|
| 4 |
+
# learning policy
|
| 5 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
| 6 |
+
# runtime settings
|
| 7 |
+
runner = dict(type='IterBasedRunner', max_iters=80000)
|
| 8 |
+
checkpoint_config = dict(by_epoch=False, interval=4000)
|
| 9 |
+
evaluation = dict(interval=4000, metric='mIoU')
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/bisenet-fp/bisenetv2-fp.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained=None,
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='BiSeNetV2',
|
| 7 |
+
detail_channels=(64, 64, 128),
|
| 8 |
+
semantic_channels=(16, 32, 64, 128),
|
| 9 |
+
semantic_expansion_ratio=6,
|
| 10 |
+
bga_channels=128,
|
| 11 |
+
out_indices=(0, 1, 2, 3, 4),
|
| 12 |
+
init_cfg=None,
|
| 13 |
+
align_corners=False),
|
| 14 |
+
decode_head=dict(
|
| 15 |
+
type='FCNHead',
|
| 16 |
+
in_channels=128,
|
| 17 |
+
in_index=0,
|
| 18 |
+
channels=1024,
|
| 19 |
+
num_convs=1,
|
| 20 |
+
concat_input=False,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=19,
|
| 23 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 27 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)),
|
| 28 |
+
auxiliary_head=[
|
| 29 |
+
dict(
|
| 30 |
+
type='FCNHead',
|
| 31 |
+
in_channels=16,
|
| 32 |
+
channels=16,
|
| 33 |
+
num_convs=2,
|
| 34 |
+
num_classes=8,
|
| 35 |
+
in_index=1,
|
| 36 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 37 |
+
concat_input=False,
|
| 38 |
+
align_corners=False,
|
| 39 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 40 |
+
loss_decode=dict(
|
| 41 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 42 |
+
dict(
|
| 43 |
+
type='FCNHead',
|
| 44 |
+
in_channels=32,
|
| 45 |
+
channels=64,
|
| 46 |
+
num_convs=2,
|
| 47 |
+
num_classes=8,
|
| 48 |
+
in_index=2,
|
| 49 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 50 |
+
concat_input=False,
|
| 51 |
+
align_corners=False,
|
| 52 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 53 |
+
loss_decode=dict(
|
| 54 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 55 |
+
dict(
|
| 56 |
+
type='FCNHead',
|
| 57 |
+
in_channels=64,
|
| 58 |
+
channels=256,
|
| 59 |
+
num_convs=2,
|
| 60 |
+
num_classes=8,
|
| 61 |
+
in_index=3,
|
| 62 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 63 |
+
concat_input=False,
|
| 64 |
+
align_corners=False,
|
| 65 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 66 |
+
loss_decode=dict(
|
| 67 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 68 |
+
dict(
|
| 69 |
+
type='FCNHead',
|
| 70 |
+
in_channels=128,
|
| 71 |
+
channels=1024,
|
| 72 |
+
num_convs=2,
|
| 73 |
+
num_classes=8,
|
| 74 |
+
in_index=4,
|
| 75 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 76 |
+
concat_input=False,
|
| 77 |
+
align_corners=False,
|
| 78 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 79 |
+
loss_decode=dict(
|
| 80 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
| 81 |
+
],
|
| 82 |
+
train_cfg=dict(),
|
| 83 |
+
test_cfg=dict(mode='whole'))
|
| 84 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 85 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 86 |
+
img_norm_cfg = dict(
|
| 87 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 88 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 89 |
+
to_rgb=True)
|
| 90 |
+
train_pipeline = [
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(type='LoadAnnotations'),
|
| 93 |
+
dict(type='RandomFlip', prob=0.0),
|
| 94 |
+
dict(
|
| 95 |
+
type='PhotoMetricDistortion',
|
| 96 |
+
brightness_delta=16,
|
| 97 |
+
contrast_range=(0.5, 1.0),
|
| 98 |
+
saturation_range=(0.5, 1.0),
|
| 99 |
+
hue_delta=5),
|
| 100 |
+
dict(
|
| 101 |
+
type='Normalize',
|
| 102 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 103 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 104 |
+
to_rgb=True),
|
| 105 |
+
dict(type='DefaultFormatBundle'),
|
| 106 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 107 |
+
]
|
| 108 |
+
test_pipeline = [
|
| 109 |
+
dict(type='LoadImageFromFile'),
|
| 110 |
+
dict(
|
| 111 |
+
type='MultiScaleFlipAug',
|
| 112 |
+
img_scale=(384, 384),
|
| 113 |
+
flip=False,
|
| 114 |
+
transforms=[
|
| 115 |
+
dict(
|
| 116 |
+
type='Normalize',
|
| 117 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 118 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 119 |
+
to_rgb=True),
|
| 120 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 121 |
+
dict(type='Collect', keys=['img'])
|
| 122 |
+
])
|
| 123 |
+
]
|
| 124 |
+
data = dict(
|
| 125 |
+
train=dict(
|
| 126 |
+
type='EasyPortraitFPDataset',
|
| 127 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 128 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 129 |
+
'right eye', 'lips', 'teeth'),
|
| 130 |
+
img_dir='easyportrait_384/images/train',
|
| 131 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 132 |
+
pipeline=[
|
| 133 |
+
dict(type='LoadImageFromFile'),
|
| 134 |
+
dict(type='LoadAnnotations'),
|
| 135 |
+
dict(type='RandomFlip', prob=0.0),
|
| 136 |
+
dict(
|
| 137 |
+
type='PhotoMetricDistortion',
|
| 138 |
+
brightness_delta=16,
|
| 139 |
+
contrast_range=(0.5, 1.0),
|
| 140 |
+
saturation_range=(0.5, 1.0),
|
| 141 |
+
hue_delta=5),
|
| 142 |
+
dict(
|
| 143 |
+
type='Normalize',
|
| 144 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 145 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 146 |
+
to_rgb=True),
|
| 147 |
+
dict(type='DefaultFormatBundle'),
|
| 148 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 149 |
+
]),
|
| 150 |
+
val=dict(
|
| 151 |
+
type='EasyPortraitFPDataset',
|
| 152 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 153 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 154 |
+
'right eye', 'lips', 'teeth'),
|
| 155 |
+
img_dir='easyportrait_384/images/val',
|
| 156 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 157 |
+
pipeline=[
|
| 158 |
+
dict(type='LoadImageFromFile'),
|
| 159 |
+
dict(
|
| 160 |
+
type='MultiScaleFlipAug',
|
| 161 |
+
img_scale=(384, 384),
|
| 162 |
+
flip=False,
|
| 163 |
+
transforms=[
|
| 164 |
+
dict(
|
| 165 |
+
type='Normalize',
|
| 166 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 167 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 168 |
+
to_rgb=True),
|
| 169 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 170 |
+
dict(type='Collect', keys=['img'])
|
| 171 |
+
])
|
| 172 |
+
]),
|
| 173 |
+
test=dict(
|
| 174 |
+
type='EasyPortraitFPDataset',
|
| 175 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 176 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 177 |
+
'right eye', 'lips', 'teeth'),
|
| 178 |
+
img_dir='easyportrait_384/images/test',
|
| 179 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 180 |
+
pipeline=[
|
| 181 |
+
dict(type='LoadImageFromFile'),
|
| 182 |
+
dict(
|
| 183 |
+
type='MultiScaleFlipAug',
|
| 184 |
+
img_scale=(384, 384),
|
| 185 |
+
flip=False,
|
| 186 |
+
transforms=[
|
| 187 |
+
dict(
|
| 188 |
+
type='Normalize',
|
| 189 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 190 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 191 |
+
to_rgb=True),
|
| 192 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 193 |
+
dict(type='Collect', keys=['img'])
|
| 194 |
+
])
|
| 195 |
+
]),
|
| 196 |
+
samples_per_gpu=32,
|
| 197 |
+
workers_per_gpu=8)
|
| 198 |
+
log_config = dict(
|
| 199 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 200 |
+
dist_params = dict(backend='nccl')
|
| 201 |
+
log_level = 'INFO'
|
| 202 |
+
load_from = None
|
| 203 |
+
resume_from = None
|
| 204 |
+
workflow = [('train', 1)]
|
| 205 |
+
cudnn_benchmark = True
|
| 206 |
+
optimizer = dict(type='AdamW', lr=0.05, weight_decay=0.0001)
|
| 207 |
+
optimizer_config = dict()
|
| 208 |
+
lr_config = dict(
|
| 209 |
+
policy='poly',
|
| 210 |
+
power=0.9,
|
| 211 |
+
min_lr=0.0,
|
| 212 |
+
by_epoch=True,
|
| 213 |
+
warmup='linear',
|
| 214 |
+
warmup_iters=1000)
|
| 215 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 216 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 217 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 218 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 219 |
+
work_dir = 'work_dirs/petrova/bisenet-fp'
|
| 220 |
+
gpu_ids = [0]
|
| 221 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/bisenet-ps/bisenetv2-ps.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained=None,
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='BiSeNetV2',
|
| 7 |
+
detail_channels=(64, 64, 128),
|
| 8 |
+
semantic_channels=(16, 32, 64, 128),
|
| 9 |
+
semantic_expansion_ratio=6,
|
| 10 |
+
bga_channels=128,
|
| 11 |
+
out_indices=(0, 1, 2, 3, 4),
|
| 12 |
+
init_cfg=None,
|
| 13 |
+
align_corners=False),
|
| 14 |
+
decode_head=dict(
|
| 15 |
+
type='FCNHead',
|
| 16 |
+
in_channels=128,
|
| 17 |
+
in_index=0,
|
| 18 |
+
channels=1024,
|
| 19 |
+
num_convs=1,
|
| 20 |
+
concat_input=False,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=19,
|
| 23 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 27 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)),
|
| 28 |
+
auxiliary_head=[
|
| 29 |
+
dict(
|
| 30 |
+
type='FCNHead',
|
| 31 |
+
in_channels=16,
|
| 32 |
+
channels=16,
|
| 33 |
+
num_convs=2,
|
| 34 |
+
num_classes=2,
|
| 35 |
+
in_index=1,
|
| 36 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 37 |
+
concat_input=False,
|
| 38 |
+
align_corners=False,
|
| 39 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 40 |
+
loss_decode=dict(
|
| 41 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 42 |
+
dict(
|
| 43 |
+
type='FCNHead',
|
| 44 |
+
in_channels=32,
|
| 45 |
+
channels=64,
|
| 46 |
+
num_convs=2,
|
| 47 |
+
num_classes=2,
|
| 48 |
+
in_index=2,
|
| 49 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 50 |
+
concat_input=False,
|
| 51 |
+
align_corners=False,
|
| 52 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 53 |
+
loss_decode=dict(
|
| 54 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 55 |
+
dict(
|
| 56 |
+
type='FCNHead',
|
| 57 |
+
in_channels=64,
|
| 58 |
+
channels=256,
|
| 59 |
+
num_convs=2,
|
| 60 |
+
num_classes=2,
|
| 61 |
+
in_index=3,
|
| 62 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 63 |
+
concat_input=False,
|
| 64 |
+
align_corners=False,
|
| 65 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 66 |
+
loss_decode=dict(
|
| 67 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 68 |
+
dict(
|
| 69 |
+
type='FCNHead',
|
| 70 |
+
in_channels=128,
|
| 71 |
+
channels=1024,
|
| 72 |
+
num_convs=2,
|
| 73 |
+
num_classes=2,
|
| 74 |
+
in_index=4,
|
| 75 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 76 |
+
concat_input=False,
|
| 77 |
+
align_corners=False,
|
| 78 |
+
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
| 79 |
+
loss_decode=dict(
|
| 80 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
| 81 |
+
],
|
| 82 |
+
train_cfg=dict(),
|
| 83 |
+
test_cfg=dict(mode='whole'))
|
| 84 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 85 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 86 |
+
img_norm_cfg = dict(
|
| 87 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 88 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 89 |
+
to_rgb=True)
|
| 90 |
+
train_pipeline = [
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(type='LoadAnnotations'),
|
| 93 |
+
dict(type='RandomFlip', prob=0.0),
|
| 94 |
+
dict(
|
| 95 |
+
type='PhotoMetricDistortion',
|
| 96 |
+
brightness_delta=16,
|
| 97 |
+
contrast_range=(0.5, 1.0),
|
| 98 |
+
saturation_range=(0.5, 1.0),
|
| 99 |
+
hue_delta=5),
|
| 100 |
+
dict(
|
| 101 |
+
type='Normalize',
|
| 102 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 103 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 104 |
+
to_rgb=True),
|
| 105 |
+
dict(type='DefaultFormatBundle'),
|
| 106 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 107 |
+
]
|
| 108 |
+
test_pipeline = [
|
| 109 |
+
dict(type='LoadImageFromFile'),
|
| 110 |
+
dict(
|
| 111 |
+
type='MultiScaleFlipAug',
|
| 112 |
+
img_scale=(384, 384),
|
| 113 |
+
flip=False,
|
| 114 |
+
transforms=[
|
| 115 |
+
dict(
|
| 116 |
+
type='Normalize',
|
| 117 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 118 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 119 |
+
to_rgb=True),
|
| 120 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 121 |
+
dict(type='Collect', keys=['img'])
|
| 122 |
+
])
|
| 123 |
+
]
|
| 124 |
+
data = dict(
|
| 125 |
+
train=dict(
|
| 126 |
+
type='EasyPortraitPSDataset',
|
| 127 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 128 |
+
classes=('background', 'person'),
|
| 129 |
+
img_dir='easyportrait_384/images/train',
|
| 130 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 131 |
+
pipeline=[
|
| 132 |
+
dict(type='LoadImageFromFile'),
|
| 133 |
+
dict(type='LoadAnnotations'),
|
| 134 |
+
dict(type='RandomFlip', prob=0.0),
|
| 135 |
+
dict(
|
| 136 |
+
type='PhotoMetricDistortion',
|
| 137 |
+
brightness_delta=16,
|
| 138 |
+
contrast_range=(0.5, 1.0),
|
| 139 |
+
saturation_range=(0.5, 1.0),
|
| 140 |
+
hue_delta=5),
|
| 141 |
+
dict(
|
| 142 |
+
type='Normalize',
|
| 143 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 144 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 145 |
+
to_rgb=True),
|
| 146 |
+
dict(type='DefaultFormatBundle'),
|
| 147 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 148 |
+
]),
|
| 149 |
+
val=dict(
|
| 150 |
+
type='EasyPortraitPSDataset',
|
| 151 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 152 |
+
classes=('background', 'person'),
|
| 153 |
+
img_dir='easyportrait_384/images/val',
|
| 154 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 155 |
+
pipeline=[
|
| 156 |
+
dict(type='LoadImageFromFile'),
|
| 157 |
+
dict(
|
| 158 |
+
type='MultiScaleFlipAug',
|
| 159 |
+
img_scale=(384, 384),
|
| 160 |
+
flip=False,
|
| 161 |
+
transforms=[
|
| 162 |
+
dict(
|
| 163 |
+
type='Normalize',
|
| 164 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 165 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 166 |
+
to_rgb=True),
|
| 167 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 168 |
+
dict(type='Collect', keys=['img'])
|
| 169 |
+
])
|
| 170 |
+
]),
|
| 171 |
+
test=dict(
|
| 172 |
+
type='EasyPortraitPSDataset',
|
| 173 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 174 |
+
classes=('background', 'person'),
|
| 175 |
+
img_dir='easyportrait_384/images/test',
|
| 176 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 177 |
+
pipeline=[
|
| 178 |
+
dict(type='LoadImageFromFile'),
|
| 179 |
+
dict(
|
| 180 |
+
type='MultiScaleFlipAug',
|
| 181 |
+
img_scale=(384, 384),
|
| 182 |
+
flip=False,
|
| 183 |
+
transforms=[
|
| 184 |
+
dict(
|
| 185 |
+
type='Normalize',
|
| 186 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 187 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 188 |
+
to_rgb=True),
|
| 189 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 190 |
+
dict(type='Collect', keys=['img'])
|
| 191 |
+
])
|
| 192 |
+
]),
|
| 193 |
+
samples_per_gpu=32,
|
| 194 |
+
workers_per_gpu=8)
|
| 195 |
+
log_config = dict(
|
| 196 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 197 |
+
dist_params = dict(backend='nccl')
|
| 198 |
+
log_level = 'INFO'
|
| 199 |
+
load_from = None
|
| 200 |
+
resume_from = None
|
| 201 |
+
workflow = [('train', 1)]
|
| 202 |
+
cudnn_benchmark = True
|
| 203 |
+
optimizer = dict(type='AdamW', lr=0.05, weight_decay=0.0001)
|
| 204 |
+
optimizer_config = dict()
|
| 205 |
+
lr_config = dict(
|
| 206 |
+
policy='poly',
|
| 207 |
+
power=0.9,
|
| 208 |
+
min_lr=0.0,
|
| 209 |
+
by_epoch=True,
|
| 210 |
+
warmup='linear',
|
| 211 |
+
warmup_iters=1000)
|
| 212 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 213 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 214 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 215 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 216 |
+
work_dir = 'work_dirs/petrova/bisenet-ps/'
|
| 217 |
+
gpu_ids = [0]
|
| 218 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/danet-fp/danet-fp.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 2, 4),
|
| 11 |
+
strides=(1, 2, 1, 1),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
decode_head=dict(
|
| 17 |
+
type='DAHead',
|
| 18 |
+
in_channels=2048,
|
| 19 |
+
in_index=3,
|
| 20 |
+
channels=512,
|
| 21 |
+
pam_channels=64,
|
| 22 |
+
dropout_ratio=0.1,
|
| 23 |
+
num_classes=8,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 28 |
+
auxiliary_head=dict(
|
| 29 |
+
type='FCNHead',
|
| 30 |
+
in_channels=1024,
|
| 31 |
+
in_index=2,
|
| 32 |
+
channels=256,
|
| 33 |
+
num_convs=1,
|
| 34 |
+
concat_input=False,
|
| 35 |
+
dropout_ratio=0.1,
|
| 36 |
+
num_classes=8,
|
| 37 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 38 |
+
align_corners=False,
|
| 39 |
+
loss_decode=dict(
|
| 40 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 41 |
+
train_cfg=dict(),
|
| 42 |
+
test_cfg=dict(mode='whole'))
|
| 43 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 44 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 45 |
+
img_norm_cfg = dict(
|
| 46 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 47 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 48 |
+
to_rgb=True)
|
| 49 |
+
train_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(type='LoadAnnotations'),
|
| 52 |
+
dict(type='RandomFlip', prob=0.0),
|
| 53 |
+
dict(
|
| 54 |
+
type='PhotoMetricDistortion',
|
| 55 |
+
brightness_delta=16,
|
| 56 |
+
contrast_range=(0.5, 1.0),
|
| 57 |
+
saturation_range=(0.5, 1.0),
|
| 58 |
+
hue_delta=5),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 62 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='DefaultFormatBundle'),
|
| 65 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 66 |
+
]
|
| 67 |
+
test_pipeline = [
|
| 68 |
+
dict(type='LoadImageFromFile'),
|
| 69 |
+
dict(
|
| 70 |
+
type='MultiScaleFlipAug',
|
| 71 |
+
img_scale=(384, 384),
|
| 72 |
+
flip=False,
|
| 73 |
+
transforms=[
|
| 74 |
+
dict(
|
| 75 |
+
type='Normalize',
|
| 76 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 77 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 78 |
+
to_rgb=True),
|
| 79 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 80 |
+
dict(type='Collect', keys=['img'])
|
| 81 |
+
])
|
| 82 |
+
]
|
| 83 |
+
data = dict(
|
| 84 |
+
train=dict(
|
| 85 |
+
type='EasyPortraitFPDataset',
|
| 86 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 87 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 88 |
+
'right eye', 'lips', 'teeth'),
|
| 89 |
+
img_dir='easyportrait_384/images/train',
|
| 90 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 91 |
+
pipeline=[
|
| 92 |
+
dict(type='LoadImageFromFile'),
|
| 93 |
+
dict(type='LoadAnnotations'),
|
| 94 |
+
dict(type='RandomFlip', prob=0.0),
|
| 95 |
+
dict(
|
| 96 |
+
type='PhotoMetricDistortion',
|
| 97 |
+
brightness_delta=16,
|
| 98 |
+
contrast_range=(0.5, 1.0),
|
| 99 |
+
saturation_range=(0.5, 1.0),
|
| 100 |
+
hue_delta=5),
|
| 101 |
+
dict(
|
| 102 |
+
type='Normalize',
|
| 103 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 104 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 105 |
+
to_rgb=True),
|
| 106 |
+
dict(type='DefaultFormatBundle'),
|
| 107 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 108 |
+
]),
|
| 109 |
+
val=dict(
|
| 110 |
+
type='EasyPortraitFPDataset',
|
| 111 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 112 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 113 |
+
'right eye', 'lips', 'teeth'),
|
| 114 |
+
img_dir='easyportrait_384/images/val',
|
| 115 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 116 |
+
pipeline=[
|
| 117 |
+
dict(type='LoadImageFromFile'),
|
| 118 |
+
dict(
|
| 119 |
+
type='MultiScaleFlipAug',
|
| 120 |
+
img_scale=(384, 384),
|
| 121 |
+
flip=False,
|
| 122 |
+
transforms=[
|
| 123 |
+
dict(
|
| 124 |
+
type='Normalize',
|
| 125 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 126 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 127 |
+
to_rgb=True),
|
| 128 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 129 |
+
dict(type='Collect', keys=['img'])
|
| 130 |
+
])
|
| 131 |
+
]),
|
| 132 |
+
test=dict(
|
| 133 |
+
type='EasyPortraitFPDataset',
|
| 134 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 135 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 136 |
+
'right eye', 'lips', 'teeth'),
|
| 137 |
+
img_dir='easyportrait_384/images/test',
|
| 138 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 139 |
+
pipeline=[
|
| 140 |
+
dict(type='LoadImageFromFile'),
|
| 141 |
+
dict(
|
| 142 |
+
type='MultiScaleFlipAug',
|
| 143 |
+
img_scale=(384, 384),
|
| 144 |
+
flip=False,
|
| 145 |
+
transforms=[
|
| 146 |
+
dict(
|
| 147 |
+
type='Normalize',
|
| 148 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 149 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 150 |
+
to_rgb=True),
|
| 151 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 152 |
+
dict(type='Collect', keys=['img'])
|
| 153 |
+
])
|
| 154 |
+
]),
|
| 155 |
+
samples_per_gpu=32,
|
| 156 |
+
workers_per_gpu=8)
|
| 157 |
+
log_config = dict(
|
| 158 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 159 |
+
dist_params = dict(backend='nccl')
|
| 160 |
+
log_level = 'INFO'
|
| 161 |
+
load_from = None
|
| 162 |
+
resume_from = None
|
| 163 |
+
workflow = [('train', 1)]
|
| 164 |
+
cudnn_benchmark = True
|
| 165 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 166 |
+
optimizer_config = dict()
|
| 167 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 168 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 169 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 170 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 171 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 172 |
+
work_dir = 'work_dirs/petrova/danet-fp'
|
| 173 |
+
gpu_ids = [0]
|
| 174 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/danet-ps/danet-ps.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 2, 4),
|
| 11 |
+
strides=(1, 2, 1, 1),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
decode_head=dict(
|
| 17 |
+
type='DAHead',
|
| 18 |
+
in_channels=2048,
|
| 19 |
+
in_index=3,
|
| 20 |
+
channels=512,
|
| 21 |
+
pam_channels=64,
|
| 22 |
+
dropout_ratio=0.1,
|
| 23 |
+
num_classes=2,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 28 |
+
auxiliary_head=dict(
|
| 29 |
+
type='FCNHead',
|
| 30 |
+
in_channels=1024,
|
| 31 |
+
in_index=2,
|
| 32 |
+
channels=256,
|
| 33 |
+
num_convs=1,
|
| 34 |
+
concat_input=False,
|
| 35 |
+
dropout_ratio=0.1,
|
| 36 |
+
num_classes=2,
|
| 37 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 38 |
+
align_corners=False,
|
| 39 |
+
loss_decode=dict(
|
| 40 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 41 |
+
train_cfg=dict(),
|
| 42 |
+
test_cfg=dict(mode='whole'))
|
| 43 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 44 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 45 |
+
img_norm_cfg = dict(
|
| 46 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 47 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 48 |
+
to_rgb=True)
|
| 49 |
+
train_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(type='LoadAnnotations'),
|
| 52 |
+
dict(type='RandomFlip', prob=0.0),
|
| 53 |
+
dict(
|
| 54 |
+
type='PhotoMetricDistortion',
|
| 55 |
+
brightness_delta=16,
|
| 56 |
+
contrast_range=(0.5, 1.0),
|
| 57 |
+
saturation_range=(0.5, 1.0),
|
| 58 |
+
hue_delta=5),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 62 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='DefaultFormatBundle'),
|
| 65 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 66 |
+
]
|
| 67 |
+
test_pipeline = [
|
| 68 |
+
dict(type='LoadImageFromFile'),
|
| 69 |
+
dict(
|
| 70 |
+
type='MultiScaleFlipAug',
|
| 71 |
+
img_scale=(384, 384),
|
| 72 |
+
flip=False,
|
| 73 |
+
transforms=[
|
| 74 |
+
dict(
|
| 75 |
+
type='Normalize',
|
| 76 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 77 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 78 |
+
to_rgb=True),
|
| 79 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 80 |
+
dict(type='Collect', keys=['img'])
|
| 81 |
+
])
|
| 82 |
+
]
|
| 83 |
+
data = dict(
|
| 84 |
+
train=dict(
|
| 85 |
+
type='EasyPortraitPSDataset',
|
| 86 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 87 |
+
classes=('background', 'person'),
|
| 88 |
+
img_dir='easyportrait_384/images/train',
|
| 89 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 90 |
+
pipeline=[
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(type='LoadAnnotations'),
|
| 93 |
+
dict(type='RandomFlip', prob=0.0),
|
| 94 |
+
dict(
|
| 95 |
+
type='PhotoMetricDistortion',
|
| 96 |
+
brightness_delta=16,
|
| 97 |
+
contrast_range=(0.5, 1.0),
|
| 98 |
+
saturation_range=(0.5, 1.0),
|
| 99 |
+
hue_delta=5),
|
| 100 |
+
dict(
|
| 101 |
+
type='Normalize',
|
| 102 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 103 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 104 |
+
to_rgb=True),
|
| 105 |
+
dict(type='DefaultFormatBundle'),
|
| 106 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 107 |
+
]),
|
| 108 |
+
val=dict(
|
| 109 |
+
type='EasyPortraitPSDataset',
|
| 110 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 111 |
+
classes=('background', 'person'),
|
| 112 |
+
img_dir='easyportrait_384/images/val',
|
| 113 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 114 |
+
pipeline=[
|
| 115 |
+
dict(type='LoadImageFromFile'),
|
| 116 |
+
dict(
|
| 117 |
+
type='MultiScaleFlipAug',
|
| 118 |
+
img_scale=(384, 384),
|
| 119 |
+
flip=False,
|
| 120 |
+
transforms=[
|
| 121 |
+
dict(
|
| 122 |
+
type='Normalize',
|
| 123 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 124 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 125 |
+
to_rgb=True),
|
| 126 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 127 |
+
dict(type='Collect', keys=['img'])
|
| 128 |
+
])
|
| 129 |
+
]),
|
| 130 |
+
test=dict(
|
| 131 |
+
type='EasyPortraitPSDataset',
|
| 132 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 133 |
+
classes=('background', 'person'),
|
| 134 |
+
img_dir='easyportrait_384/images/test',
|
| 135 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 136 |
+
pipeline=[
|
| 137 |
+
dict(type='LoadImageFromFile'),
|
| 138 |
+
dict(
|
| 139 |
+
type='MultiScaleFlipAug',
|
| 140 |
+
img_scale=(384, 384),
|
| 141 |
+
flip=False,
|
| 142 |
+
transforms=[
|
| 143 |
+
dict(
|
| 144 |
+
type='Normalize',
|
| 145 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 146 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 147 |
+
to_rgb=True),
|
| 148 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 149 |
+
dict(type='Collect', keys=['img'])
|
| 150 |
+
])
|
| 151 |
+
]),
|
| 152 |
+
samples_per_gpu=32,
|
| 153 |
+
workers_per_gpu=8)
|
| 154 |
+
log_config = dict(
|
| 155 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 156 |
+
dist_params = dict(backend='nccl')
|
| 157 |
+
log_level = 'INFO'
|
| 158 |
+
load_from = None
|
| 159 |
+
resume_from = None
|
| 160 |
+
workflow = [('train', 1)]
|
| 161 |
+
cudnn_benchmark = True
|
| 162 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 163 |
+
optimizer_config = dict()
|
| 164 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 165 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 166 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 167 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 168 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 169 |
+
work_dir = 'work_dirs/petrova/danet-ps'
|
| 170 |
+
gpu_ids = [0]
|
| 171 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/deeplab-fp/deeplabv3-fp.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 2, 4),
|
| 11 |
+
strides=(1, 2, 1, 1),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
decode_head=dict(
|
| 17 |
+
type='ASPPHead',
|
| 18 |
+
in_channels=2048,
|
| 19 |
+
in_index=3,
|
| 20 |
+
channels=512,
|
| 21 |
+
dilations=(1, 12, 24, 36),
|
| 22 |
+
dropout_ratio=0.1,
|
| 23 |
+
num_classes=8,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 28 |
+
auxiliary_head=dict(
|
| 29 |
+
type='FCNHead',
|
| 30 |
+
in_channels=1024,
|
| 31 |
+
in_index=2,
|
| 32 |
+
channels=256,
|
| 33 |
+
num_convs=1,
|
| 34 |
+
concat_input=False,
|
| 35 |
+
dropout_ratio=0.1,
|
| 36 |
+
num_classes=8,
|
| 37 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 38 |
+
align_corners=False,
|
| 39 |
+
loss_decode=dict(
|
| 40 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 41 |
+
train_cfg=dict(),
|
| 42 |
+
test_cfg=dict(mode='whole'))
|
| 43 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 44 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 45 |
+
img_norm_cfg = dict(
|
| 46 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 47 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 48 |
+
to_rgb=True)
|
| 49 |
+
train_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(type='LoadAnnotations'),
|
| 52 |
+
dict(type='RandomFlip', prob=0.0),
|
| 53 |
+
dict(
|
| 54 |
+
type='PhotoMetricDistortion',
|
| 55 |
+
brightness_delta=16,
|
| 56 |
+
contrast_range=(0.5, 1.0),
|
| 57 |
+
saturation_range=(0.5, 1.0),
|
| 58 |
+
hue_delta=5),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 62 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='DefaultFormatBundle'),
|
| 65 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 66 |
+
]
|
| 67 |
+
test_pipeline = [
|
| 68 |
+
dict(type='LoadImageFromFile'),
|
| 69 |
+
dict(
|
| 70 |
+
type='MultiScaleFlipAug',
|
| 71 |
+
img_scale=(384, 384),
|
| 72 |
+
flip=False,
|
| 73 |
+
transforms=[
|
| 74 |
+
dict(
|
| 75 |
+
type='Normalize',
|
| 76 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 77 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 78 |
+
to_rgb=True),
|
| 79 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 80 |
+
dict(type='Collect', keys=['img'])
|
| 81 |
+
])
|
| 82 |
+
]
|
| 83 |
+
data = dict(
|
| 84 |
+
train=dict(
|
| 85 |
+
type='EasyPortraitFPDataset',
|
| 86 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 87 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 88 |
+
'right eye', 'lips', 'teeth'),
|
| 89 |
+
img_dir='easyportrait_384/images/train',
|
| 90 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 91 |
+
pipeline=[
|
| 92 |
+
dict(type='LoadImageFromFile'),
|
| 93 |
+
dict(type='LoadAnnotations'),
|
| 94 |
+
dict(type='RandomFlip', prob=0.0),
|
| 95 |
+
dict(
|
| 96 |
+
type='PhotoMetricDistortion',
|
| 97 |
+
brightness_delta=16,
|
| 98 |
+
contrast_range=(0.5, 1.0),
|
| 99 |
+
saturation_range=(0.5, 1.0),
|
| 100 |
+
hue_delta=5),
|
| 101 |
+
dict(
|
| 102 |
+
type='Normalize',
|
| 103 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 104 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 105 |
+
to_rgb=True),
|
| 106 |
+
dict(type='DefaultFormatBundle'),
|
| 107 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 108 |
+
]),
|
| 109 |
+
val=dict(
|
| 110 |
+
type='EasyPortraitFPDataset',
|
| 111 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 112 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 113 |
+
'right eye', 'lips', 'teeth'),
|
| 114 |
+
img_dir='easyportrait_384/images/val',
|
| 115 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 116 |
+
pipeline=[
|
| 117 |
+
dict(type='LoadImageFromFile'),
|
| 118 |
+
dict(
|
| 119 |
+
type='MultiScaleFlipAug',
|
| 120 |
+
img_scale=(384, 384),
|
| 121 |
+
flip=False,
|
| 122 |
+
transforms=[
|
| 123 |
+
dict(
|
| 124 |
+
type='Normalize',
|
| 125 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 126 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 127 |
+
to_rgb=True),
|
| 128 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 129 |
+
dict(type='Collect', keys=['img'])
|
| 130 |
+
])
|
| 131 |
+
]),
|
| 132 |
+
test=dict(
|
| 133 |
+
type='EasyPortraitFPDataset',
|
| 134 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 135 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 136 |
+
'right eye', 'lips', 'teeth'),
|
| 137 |
+
img_dir='easyportrait_384/images/test',
|
| 138 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 139 |
+
pipeline=[
|
| 140 |
+
dict(type='LoadImageFromFile'),
|
| 141 |
+
dict(
|
| 142 |
+
type='MultiScaleFlipAug',
|
| 143 |
+
img_scale=(384, 384),
|
| 144 |
+
flip=False,
|
| 145 |
+
transforms=[
|
| 146 |
+
dict(
|
| 147 |
+
type='Normalize',
|
| 148 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 149 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 150 |
+
to_rgb=True),
|
| 151 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 152 |
+
dict(type='Collect', keys=['img'])
|
| 153 |
+
])
|
| 154 |
+
]),
|
| 155 |
+
samples_per_gpu=32,
|
| 156 |
+
workers_per_gpu=8)
|
| 157 |
+
log_config = dict(
|
| 158 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 159 |
+
dist_params = dict(backend='nccl')
|
| 160 |
+
log_level = 'INFO'
|
| 161 |
+
load_from = None
|
| 162 |
+
resume_from = None
|
| 163 |
+
workflow = [('train', 1)]
|
| 164 |
+
cudnn_benchmark = True
|
| 165 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 166 |
+
optimizer_config = dict()
|
| 167 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 168 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 169 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 170 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 171 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 172 |
+
work_dir = 'work_dirs/petrova/deeplabv3-fp'
|
| 173 |
+
gpu_ids = [0]
|
| 174 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/deeplab-ps/deeplabv3-ps.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 2, 4),
|
| 11 |
+
strides=(1, 2, 1, 1),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
decode_head=dict(
|
| 17 |
+
type='ASPPHead',
|
| 18 |
+
in_channels=2048,
|
| 19 |
+
in_index=3,
|
| 20 |
+
channels=512,
|
| 21 |
+
dilations=(1, 12, 24, 36),
|
| 22 |
+
dropout_ratio=0.1,
|
| 23 |
+
num_classes=2,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 28 |
+
auxiliary_head=dict(
|
| 29 |
+
type='FCNHead',
|
| 30 |
+
in_channels=1024,
|
| 31 |
+
in_index=2,
|
| 32 |
+
channels=256,
|
| 33 |
+
num_convs=1,
|
| 34 |
+
concat_input=False,
|
| 35 |
+
dropout_ratio=0.1,
|
| 36 |
+
num_classes=2,
|
| 37 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 38 |
+
align_corners=False,
|
| 39 |
+
loss_decode=dict(
|
| 40 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 41 |
+
train_cfg=dict(),
|
| 42 |
+
test_cfg=dict(mode='whole'))
|
| 43 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 44 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 45 |
+
img_norm_cfg = dict(
|
| 46 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 47 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 48 |
+
to_rgb=True)
|
| 49 |
+
train_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(type='LoadAnnotations'),
|
| 52 |
+
dict(type='RandomFlip', prob=0.0),
|
| 53 |
+
dict(
|
| 54 |
+
type='PhotoMetricDistortion',
|
| 55 |
+
brightness_delta=16,
|
| 56 |
+
contrast_range=(0.5, 1.0),
|
| 57 |
+
saturation_range=(0.5, 1.0),
|
| 58 |
+
hue_delta=5),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 62 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='DefaultFormatBundle'),
|
| 65 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 66 |
+
]
|
| 67 |
+
test_pipeline = [
|
| 68 |
+
dict(type='LoadImageFromFile'),
|
| 69 |
+
dict(
|
| 70 |
+
type='MultiScaleFlipAug',
|
| 71 |
+
img_scale=(384, 384),
|
| 72 |
+
flip=False,
|
| 73 |
+
transforms=[
|
| 74 |
+
dict(
|
| 75 |
+
type='Normalize',
|
| 76 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 77 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 78 |
+
to_rgb=True),
|
| 79 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 80 |
+
dict(type='Collect', keys=['img'])
|
| 81 |
+
])
|
| 82 |
+
]
|
| 83 |
+
data = dict(
|
| 84 |
+
train=dict(
|
| 85 |
+
type='EasyPortraitPSDataset',
|
| 86 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 87 |
+
classes=('background', 'person'),
|
| 88 |
+
img_dir='easyportrait_384/images/train',
|
| 89 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 90 |
+
pipeline=[
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(type='LoadAnnotations'),
|
| 93 |
+
dict(type='RandomFlip', prob=0.0),
|
| 94 |
+
dict(
|
| 95 |
+
type='PhotoMetricDistortion',
|
| 96 |
+
brightness_delta=16,
|
| 97 |
+
contrast_range=(0.5, 1.0),
|
| 98 |
+
saturation_range=(0.5, 1.0),
|
| 99 |
+
hue_delta=5),
|
| 100 |
+
dict(
|
| 101 |
+
type='Normalize',
|
| 102 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 103 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 104 |
+
to_rgb=True),
|
| 105 |
+
dict(type='DefaultFormatBundle'),
|
| 106 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 107 |
+
]),
|
| 108 |
+
val=dict(
|
| 109 |
+
type='EasyPortraitPSDataset',
|
| 110 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 111 |
+
classes=('background', 'person'),
|
| 112 |
+
img_dir='easyportrait_384/images/val',
|
| 113 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 114 |
+
pipeline=[
|
| 115 |
+
dict(type='LoadImageFromFile'),
|
| 116 |
+
dict(
|
| 117 |
+
type='MultiScaleFlipAug',
|
| 118 |
+
img_scale=(384, 384),
|
| 119 |
+
flip=False,
|
| 120 |
+
transforms=[
|
| 121 |
+
dict(
|
| 122 |
+
type='Normalize',
|
| 123 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 124 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 125 |
+
to_rgb=True),
|
| 126 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 127 |
+
dict(type='Collect', keys=['img'])
|
| 128 |
+
])
|
| 129 |
+
]),
|
| 130 |
+
test=dict(
|
| 131 |
+
type='EasyPortraitPSDataset',
|
| 132 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 133 |
+
classes=('background', 'person'),
|
| 134 |
+
img_dir='easyportrait_384/images/test',
|
| 135 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 136 |
+
pipeline=[
|
| 137 |
+
dict(type='LoadImageFromFile'),
|
| 138 |
+
dict(
|
| 139 |
+
type='MultiScaleFlipAug',
|
| 140 |
+
img_scale=(384, 384),
|
| 141 |
+
flip=False,
|
| 142 |
+
transforms=[
|
| 143 |
+
dict(
|
| 144 |
+
type='Normalize',
|
| 145 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 146 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 147 |
+
to_rgb=True),
|
| 148 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 149 |
+
dict(type='Collect', keys=['img'])
|
| 150 |
+
])
|
| 151 |
+
]),
|
| 152 |
+
samples_per_gpu=32,
|
| 153 |
+
workers_per_gpu=8)
|
| 154 |
+
log_config = dict(
|
| 155 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 156 |
+
dist_params = dict(backend='nccl')
|
| 157 |
+
log_level = 'INFO'
|
| 158 |
+
load_from = None
|
| 159 |
+
resume_from = None
|
| 160 |
+
workflow = [('train', 1)]
|
| 161 |
+
cudnn_benchmark = True
|
| 162 |
+
optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
|
| 163 |
+
optimizer_config = dict()
|
| 164 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 165 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 166 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 167 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 168 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 169 |
+
work_dir = 'work_dirs/petrova/deeplabv3-ps'
|
| 170 |
+
gpu_ids = [0]
|
| 171 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fastscnn-fp/fastscnn-fp.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
backbone=dict(
|
| 5 |
+
type='FastSCNN',
|
| 6 |
+
downsample_dw_channels=(32, 48),
|
| 7 |
+
global_in_channels=64,
|
| 8 |
+
global_block_channels=(64, 96, 128),
|
| 9 |
+
global_block_strides=(2, 2, 1),
|
| 10 |
+
global_out_channels=128,
|
| 11 |
+
higher_in_channels=64,
|
| 12 |
+
lower_in_channels=128,
|
| 13 |
+
fusion_out_channels=128,
|
| 14 |
+
out_indices=(0, 1, 2),
|
| 15 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True, momentum=0.01),
|
| 16 |
+
align_corners=False),
|
| 17 |
+
decode_head=dict(
|
| 18 |
+
type='DepthwiseSeparableFCNHead',
|
| 19 |
+
in_channels=128,
|
| 20 |
+
channels=128,
|
| 21 |
+
concat_input=False,
|
| 22 |
+
num_classes=8,
|
| 23 |
+
in_index=-1,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True, momentum=0.01),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
|
| 28 |
+
auxiliary_head=[
|
| 29 |
+
dict(type='FCNHead', in_channels=128, channels=32, num_classes=8),
|
| 30 |
+
dict(type='FCNHead', in_channels=128, channels=32, num_classes=8)
|
| 31 |
+
],
|
| 32 |
+
train_cfg=dict(),
|
| 33 |
+
test_cfg=dict(mode='whole'))
|
| 34 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 35 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 36 |
+
img_norm_cfg = dict(
|
| 37 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 38 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 39 |
+
to_rgb=True)
|
| 40 |
+
train_pipeline = [
|
| 41 |
+
dict(type='LoadImageFromFile'),
|
| 42 |
+
dict(type='LoadAnnotations'),
|
| 43 |
+
dict(type='RandomFlip', prob=0.0),
|
| 44 |
+
dict(
|
| 45 |
+
type='PhotoMetricDistortion',
|
| 46 |
+
brightness_delta=16,
|
| 47 |
+
contrast_range=(0.5, 1.0),
|
| 48 |
+
saturation_range=(0.5, 1.0),
|
| 49 |
+
hue_delta=5),
|
| 50 |
+
dict(
|
| 51 |
+
type='Normalize',
|
| 52 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 53 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 54 |
+
to_rgb=True),
|
| 55 |
+
dict(type='DefaultFormatBundle'),
|
| 56 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 57 |
+
]
|
| 58 |
+
test_pipeline = [
|
| 59 |
+
dict(type='LoadImageFromFile'),
|
| 60 |
+
dict(
|
| 61 |
+
type='MultiScaleFlipAug',
|
| 62 |
+
img_scale=(384, 384),
|
| 63 |
+
flip=False,
|
| 64 |
+
transforms=[
|
| 65 |
+
dict(
|
| 66 |
+
type='Normalize',
|
| 67 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 68 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 69 |
+
to_rgb=True),
|
| 70 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 71 |
+
dict(type='Collect', keys=['img'])
|
| 72 |
+
])
|
| 73 |
+
]
|
| 74 |
+
data = dict(
|
| 75 |
+
train=dict(
|
| 76 |
+
type='EasyPortraitFPDataset',
|
| 77 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 78 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 79 |
+
'right eye', 'lips', 'teeth'),
|
| 80 |
+
img_dir='easyportrait_384/images/train',
|
| 81 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 82 |
+
pipeline=[
|
| 83 |
+
dict(type='LoadImageFromFile'),
|
| 84 |
+
dict(type='LoadAnnotations'),
|
| 85 |
+
dict(type='RandomFlip', prob=0.0),
|
| 86 |
+
dict(
|
| 87 |
+
type='PhotoMetricDistortion',
|
| 88 |
+
brightness_delta=16,
|
| 89 |
+
contrast_range=(0.5, 1.0),
|
| 90 |
+
saturation_range=(0.5, 1.0),
|
| 91 |
+
hue_delta=5),
|
| 92 |
+
dict(
|
| 93 |
+
type='Normalize',
|
| 94 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 95 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 96 |
+
to_rgb=True),
|
| 97 |
+
dict(type='DefaultFormatBundle'),
|
| 98 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 99 |
+
]),
|
| 100 |
+
val=dict(
|
| 101 |
+
type='EasyPortraitFPDataset',
|
| 102 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 103 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 104 |
+
'right eye', 'lips', 'teeth'),
|
| 105 |
+
img_dir='easyportrait_384/images/val',
|
| 106 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 107 |
+
pipeline=[
|
| 108 |
+
dict(type='LoadImageFromFile'),
|
| 109 |
+
dict(
|
| 110 |
+
type='MultiScaleFlipAug',
|
| 111 |
+
img_scale=(384, 384),
|
| 112 |
+
flip=False,
|
| 113 |
+
transforms=[
|
| 114 |
+
dict(
|
| 115 |
+
type='Normalize',
|
| 116 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 117 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 118 |
+
to_rgb=True),
|
| 119 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 120 |
+
dict(type='Collect', keys=['img'])
|
| 121 |
+
])
|
| 122 |
+
]),
|
| 123 |
+
test=dict(
|
| 124 |
+
type='EasyPortraitFPDataset',
|
| 125 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 126 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 127 |
+
'right eye', 'lips', 'teeth'),
|
| 128 |
+
img_dir='easyportrait_384/images/test',
|
| 129 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 130 |
+
pipeline=[
|
| 131 |
+
dict(type='LoadImageFromFile'),
|
| 132 |
+
dict(
|
| 133 |
+
type='MultiScaleFlipAug',
|
| 134 |
+
img_scale=(384, 384),
|
| 135 |
+
flip=False,
|
| 136 |
+
transforms=[
|
| 137 |
+
dict(
|
| 138 |
+
type='Normalize',
|
| 139 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 140 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 141 |
+
to_rgb=True),
|
| 142 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 143 |
+
dict(type='Collect', keys=['img'])
|
| 144 |
+
])
|
| 145 |
+
]),
|
| 146 |
+
samples_per_gpu=32,
|
| 147 |
+
workers_per_gpu=8)
|
| 148 |
+
log_config = dict(
|
| 149 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 150 |
+
dist_params = dict(backend='nccl')
|
| 151 |
+
log_level = 'INFO'
|
| 152 |
+
load_from = None
|
| 153 |
+
resume_from = None
|
| 154 |
+
workflow = [('train', 1)]
|
| 155 |
+
cudnn_benchmark = True
|
| 156 |
+
optimizer = dict(type='SGD', lr=0.12, weight_decay=4e-05, momentum=0.9)
|
| 157 |
+
optimizer_config = dict()
|
| 158 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 159 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 160 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 161 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 162 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 163 |
+
work_dir = 'work_dirs/petrova/fast_scnn-fp'
|
| 164 |
+
gpu_ids = [0]
|
| 165 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fastscnn-ps/fastscnn-ps.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
backbone=dict(
|
| 5 |
+
type='FastSCNN',
|
| 6 |
+
downsample_dw_channels=(32, 48),
|
| 7 |
+
global_in_channels=64,
|
| 8 |
+
global_block_channels=(64, 96, 128),
|
| 9 |
+
global_block_strides=(2, 2, 1),
|
| 10 |
+
global_out_channels=128,
|
| 11 |
+
higher_in_channels=64,
|
| 12 |
+
lower_in_channels=128,
|
| 13 |
+
fusion_out_channels=128,
|
| 14 |
+
out_indices=(0, 1, 2),
|
| 15 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True, momentum=0.01),
|
| 16 |
+
align_corners=False),
|
| 17 |
+
decode_head=dict(
|
| 18 |
+
type='DepthwiseSeparableFCNHead',
|
| 19 |
+
in_channels=128,
|
| 20 |
+
channels=128,
|
| 21 |
+
concat_input=False,
|
| 22 |
+
num_classes=2,
|
| 23 |
+
in_index=-1,
|
| 24 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True, momentum=0.01),
|
| 25 |
+
align_corners=False,
|
| 26 |
+
loss_decode=dict(
|
| 27 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
|
| 28 |
+
auxiliary_head=[
|
| 29 |
+
dict(type='FCNHead', in_channels=128, channels=32, num_classes=2),
|
| 30 |
+
dict(type='FCNHead', in_channels=128, channels=32, num_classes=2)
|
| 31 |
+
],
|
| 32 |
+
train_cfg=dict(),
|
| 33 |
+
test_cfg=dict(mode='whole'))
|
| 34 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 35 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 36 |
+
img_norm_cfg = dict(
|
| 37 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 38 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 39 |
+
to_rgb=True)
|
| 40 |
+
train_pipeline = [
|
| 41 |
+
dict(type='LoadImageFromFile'),
|
| 42 |
+
dict(type='LoadAnnotations'),
|
| 43 |
+
dict(type='RandomFlip', prob=0.0),
|
| 44 |
+
dict(
|
| 45 |
+
type='PhotoMetricDistortion',
|
| 46 |
+
brightness_delta=16,
|
| 47 |
+
contrast_range=(0.5, 1.0),
|
| 48 |
+
saturation_range=(0.5, 1.0),
|
| 49 |
+
hue_delta=5),
|
| 50 |
+
dict(
|
| 51 |
+
type='Normalize',
|
| 52 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 53 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 54 |
+
to_rgb=True),
|
| 55 |
+
dict(type='DefaultFormatBundle'),
|
| 56 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 57 |
+
]
|
| 58 |
+
test_pipeline = [
|
| 59 |
+
dict(type='LoadImageFromFile'),
|
| 60 |
+
dict(
|
| 61 |
+
type='MultiScaleFlipAug',
|
| 62 |
+
img_scale=(384, 384),
|
| 63 |
+
flip=False,
|
| 64 |
+
transforms=[
|
| 65 |
+
dict(
|
| 66 |
+
type='Normalize',
|
| 67 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 68 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 69 |
+
to_rgb=True),
|
| 70 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 71 |
+
dict(type='Collect', keys=['img'])
|
| 72 |
+
])
|
| 73 |
+
]
|
| 74 |
+
data = dict(
|
| 75 |
+
train=dict(
|
| 76 |
+
type='EasyPortraitPSDataset',
|
| 77 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 78 |
+
classes=('background', 'person'),
|
| 79 |
+
img_dir='easyportrait_384/images/train',
|
| 80 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 81 |
+
pipeline=[
|
| 82 |
+
dict(type='LoadImageFromFile'),
|
| 83 |
+
dict(type='LoadAnnotations'),
|
| 84 |
+
dict(type='RandomFlip', prob=0.0),
|
| 85 |
+
dict(
|
| 86 |
+
type='PhotoMetricDistortion',
|
| 87 |
+
brightness_delta=16,
|
| 88 |
+
contrast_range=(0.5, 1.0),
|
| 89 |
+
saturation_range=(0.5, 1.0),
|
| 90 |
+
hue_delta=5),
|
| 91 |
+
dict(
|
| 92 |
+
type='Normalize',
|
| 93 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 94 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 95 |
+
to_rgb=True),
|
| 96 |
+
dict(type='DefaultFormatBundle'),
|
| 97 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 98 |
+
]),
|
| 99 |
+
val=dict(
|
| 100 |
+
type='EasyPortraitPSDataset',
|
| 101 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 102 |
+
classes=('background', 'person'),
|
| 103 |
+
img_dir='easyportrait_384/images/val',
|
| 104 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 105 |
+
pipeline=[
|
| 106 |
+
dict(type='LoadImageFromFile'),
|
| 107 |
+
dict(
|
| 108 |
+
type='MultiScaleFlipAug',
|
| 109 |
+
img_scale=(384, 384),
|
| 110 |
+
flip=False,
|
| 111 |
+
transforms=[
|
| 112 |
+
dict(
|
| 113 |
+
type='Normalize',
|
| 114 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 115 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 116 |
+
to_rgb=True),
|
| 117 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 118 |
+
dict(type='Collect', keys=['img'])
|
| 119 |
+
])
|
| 120 |
+
]),
|
| 121 |
+
test=dict(
|
| 122 |
+
type='EasyPortraitPSDataset',
|
| 123 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 124 |
+
classes=('background', 'person'),
|
| 125 |
+
img_dir='easyportrait_384/images/test',
|
| 126 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 127 |
+
pipeline=[
|
| 128 |
+
dict(type='LoadImageFromFile'),
|
| 129 |
+
dict(
|
| 130 |
+
type='MultiScaleFlipAug',
|
| 131 |
+
img_scale=(384, 384),
|
| 132 |
+
flip=False,
|
| 133 |
+
transforms=[
|
| 134 |
+
dict(
|
| 135 |
+
type='Normalize',
|
| 136 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 137 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 138 |
+
to_rgb=True),
|
| 139 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 140 |
+
dict(type='Collect', keys=['img'])
|
| 141 |
+
])
|
| 142 |
+
]),
|
| 143 |
+
samples_per_gpu=32,
|
| 144 |
+
workers_per_gpu=8)
|
| 145 |
+
log_config = dict(
|
| 146 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 147 |
+
dist_params = dict(backend='nccl')
|
| 148 |
+
log_level = 'INFO'
|
| 149 |
+
load_from = None
|
| 150 |
+
resume_from = None
|
| 151 |
+
workflow = [('train', 1)]
|
| 152 |
+
cudnn_benchmark = True
|
| 153 |
+
optimizer = dict(type='SGD', lr=0.12, weight_decay=4e-05, momentum=0.9)
|
| 154 |
+
optimizer_config = dict()
|
| 155 |
+
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=True)
|
| 156 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 157 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 158 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 159 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 160 |
+
work_dir = 'work_dirs/petrova/fast_scnn-ps'
|
| 161 |
+
gpu_ids = [0]
|
| 162 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fcn-fp/fcn-fp.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='mmcls://mobilenet_v2',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='MobileNetV2',
|
| 7 |
+
widen_factor=1.0,
|
| 8 |
+
strides=(1, 2, 2, 1, 1, 1, 1),
|
| 9 |
+
dilations=(1, 1, 1, 2, 2, 4, 4),
|
| 10 |
+
out_indices=(1, 2, 4, 6),
|
| 11 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
| 12 |
+
decode_head=dict(
|
| 13 |
+
type='FCNHead',
|
| 14 |
+
in_channels=320,
|
| 15 |
+
in_index=3,
|
| 16 |
+
channels=512,
|
| 17 |
+
num_convs=2,
|
| 18 |
+
concat_input=True,
|
| 19 |
+
dropout_ratio=0.1,
|
| 20 |
+
num_classes=8,
|
| 21 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 22 |
+
align_corners=False,
|
| 23 |
+
loss_decode=dict(
|
| 24 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 25 |
+
auxiliary_head=dict(
|
| 26 |
+
type='FCNHead',
|
| 27 |
+
in_channels=96,
|
| 28 |
+
in_index=2,
|
| 29 |
+
channels=256,
|
| 30 |
+
num_convs=1,
|
| 31 |
+
concat_input=False,
|
| 32 |
+
dropout_ratio=0.1,
|
| 33 |
+
num_classes=8,
|
| 34 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 35 |
+
align_corners=False,
|
| 36 |
+
loss_decode=dict(
|
| 37 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 38 |
+
train_cfg=dict(),
|
| 39 |
+
test_cfg=dict(mode='whole'))
|
| 40 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 41 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 42 |
+
img_norm_cfg = dict(
|
| 43 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 44 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 45 |
+
to_rgb=True)
|
| 46 |
+
train_pipeline = [
|
| 47 |
+
dict(type='LoadImageFromFile'),
|
| 48 |
+
dict(type='LoadAnnotations'),
|
| 49 |
+
dict(type='RandomFlip', prob=0.0),
|
| 50 |
+
dict(
|
| 51 |
+
type='PhotoMetricDistortion',
|
| 52 |
+
brightness_delta=16,
|
| 53 |
+
contrast_range=(0.5, 1.0),
|
| 54 |
+
saturation_range=(0.5, 1.0),
|
| 55 |
+
hue_delta=5),
|
| 56 |
+
dict(
|
| 57 |
+
type='Normalize',
|
| 58 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 59 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 60 |
+
to_rgb=True),
|
| 61 |
+
dict(type='DefaultFormatBundle'),
|
| 62 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 63 |
+
]
|
| 64 |
+
test_pipeline = [
|
| 65 |
+
dict(type='LoadImageFromFile'),
|
| 66 |
+
dict(
|
| 67 |
+
type='MultiScaleFlipAug',
|
| 68 |
+
img_scale=(384, 384),
|
| 69 |
+
flip=False,
|
| 70 |
+
transforms=[
|
| 71 |
+
dict(
|
| 72 |
+
type='Normalize',
|
| 73 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 74 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 75 |
+
to_rgb=True),
|
| 76 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 77 |
+
dict(type='Collect', keys=['img'])
|
| 78 |
+
])
|
| 79 |
+
]
|
| 80 |
+
data = dict(
|
| 81 |
+
train=dict(
|
| 82 |
+
type='EasyPortraitFPDataset',
|
| 83 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 84 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 85 |
+
'right eye', 'lips', 'teeth'),
|
| 86 |
+
img_dir='easyportrait_384/images/train',
|
| 87 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 88 |
+
pipeline=[
|
| 89 |
+
dict(type='LoadImageFromFile'),
|
| 90 |
+
dict(type='LoadAnnotations'),
|
| 91 |
+
dict(type='RandomFlip', prob=0.0),
|
| 92 |
+
dict(
|
| 93 |
+
type='PhotoMetricDistortion',
|
| 94 |
+
brightness_delta=16,
|
| 95 |
+
contrast_range=(0.5, 1.0),
|
| 96 |
+
saturation_range=(0.5, 1.0),
|
| 97 |
+
hue_delta=5),
|
| 98 |
+
dict(
|
| 99 |
+
type='Normalize',
|
| 100 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 101 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 102 |
+
to_rgb=True),
|
| 103 |
+
dict(type='DefaultFormatBundle'),
|
| 104 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 105 |
+
]),
|
| 106 |
+
val=dict(
|
| 107 |
+
type='EasyPortraitFPDataset',
|
| 108 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 109 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 110 |
+
'right eye', 'lips', 'teeth'),
|
| 111 |
+
img_dir='easyportrait_384/images/val',
|
| 112 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 113 |
+
pipeline=[
|
| 114 |
+
dict(type='LoadImageFromFile'),
|
| 115 |
+
dict(
|
| 116 |
+
type='MultiScaleFlipAug',
|
| 117 |
+
img_scale=(384, 384),
|
| 118 |
+
flip=False,
|
| 119 |
+
transforms=[
|
| 120 |
+
dict(
|
| 121 |
+
type='Normalize',
|
| 122 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 123 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 124 |
+
to_rgb=True),
|
| 125 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 126 |
+
dict(type='Collect', keys=['img'])
|
| 127 |
+
])
|
| 128 |
+
]),
|
| 129 |
+
test=dict(
|
| 130 |
+
type='EasyPortraitFPDataset',
|
| 131 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 132 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 133 |
+
'right eye', 'lips', 'teeth'),
|
| 134 |
+
img_dir='easyportrait_384/images/test',
|
| 135 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 136 |
+
pipeline=[
|
| 137 |
+
dict(type='LoadImageFromFile'),
|
| 138 |
+
dict(
|
| 139 |
+
type='MultiScaleFlipAug',
|
| 140 |
+
img_scale=(384, 384),
|
| 141 |
+
flip=False,
|
| 142 |
+
transforms=[
|
| 143 |
+
dict(
|
| 144 |
+
type='Normalize',
|
| 145 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 146 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 147 |
+
to_rgb=True),
|
| 148 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 149 |
+
dict(type='Collect', keys=['img'])
|
| 150 |
+
])
|
| 151 |
+
]),
|
| 152 |
+
samples_per_gpu=32,
|
| 153 |
+
workers_per_gpu=8)
|
| 154 |
+
log_config = dict(
|
| 155 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 156 |
+
dist_params = dict(backend='nccl')
|
| 157 |
+
log_level = 'INFO'
|
| 158 |
+
load_from = None
|
| 159 |
+
resume_from = None
|
| 160 |
+
workflow = [('train', 1)]
|
| 161 |
+
cudnn_benchmark = True
|
| 162 |
+
optimizer = dict(
|
| 163 |
+
type='AdamW',
|
| 164 |
+
lr=6e-05,
|
| 165 |
+
betas=(0.9, 0.999),
|
| 166 |
+
weight_decay=0.01,
|
| 167 |
+
paramwise_cfg=dict(
|
| 168 |
+
custom_keys=dict(
|
| 169 |
+
pos_block=dict(decay_mult=0.0),
|
| 170 |
+
norm=dict(decay_mult=0.0),
|
| 171 |
+
head=dict(lr_mult=10.0))))
|
| 172 |
+
optimizer_config = dict()
|
| 173 |
+
lr_config = dict(
|
| 174 |
+
policy='poly',
|
| 175 |
+
warmup='linear',
|
| 176 |
+
warmup_iters=1500,
|
| 177 |
+
warmup_ratio=1e-06,
|
| 178 |
+
power=1.0,
|
| 179 |
+
min_lr=0.0,
|
| 180 |
+
by_epoch=False)
|
| 181 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 182 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 183 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 184 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 185 |
+
work_dir = 'work_dirs/petrova/fcn-fp'
|
| 186 |
+
gpu_ids = [0]
|
| 187 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fcn-ps/fcn-ps.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='mmcls://mobilenet_v2',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='MobileNetV2',
|
| 7 |
+
widen_factor=1.0,
|
| 8 |
+
strides=(1, 2, 2, 1, 1, 1, 1),
|
| 9 |
+
dilations=(1, 1, 1, 2, 2, 4, 4),
|
| 10 |
+
out_indices=(1, 2, 4, 6),
|
| 11 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
| 12 |
+
decode_head=dict(
|
| 13 |
+
type='FCNHead',
|
| 14 |
+
in_channels=320,
|
| 15 |
+
in_index=3,
|
| 16 |
+
channels=512,
|
| 17 |
+
num_convs=2,
|
| 18 |
+
concat_input=True,
|
| 19 |
+
dropout_ratio=0.1,
|
| 20 |
+
num_classes=2,
|
| 21 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 22 |
+
align_corners=False,
|
| 23 |
+
loss_decode=dict(
|
| 24 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 25 |
+
auxiliary_head=dict(
|
| 26 |
+
type='FCNHead',
|
| 27 |
+
in_channels=96,
|
| 28 |
+
in_index=2,
|
| 29 |
+
channels=256,
|
| 30 |
+
num_convs=1,
|
| 31 |
+
concat_input=False,
|
| 32 |
+
dropout_ratio=0.1,
|
| 33 |
+
num_classes=2,
|
| 34 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 35 |
+
align_corners=False,
|
| 36 |
+
loss_decode=dict(
|
| 37 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 38 |
+
train_cfg=dict(),
|
| 39 |
+
test_cfg=dict(mode='whole'))
|
| 40 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 41 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 42 |
+
img_norm_cfg = dict(
|
| 43 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 44 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 45 |
+
to_rgb=True)
|
| 46 |
+
train_pipeline = [
|
| 47 |
+
dict(type='LoadImageFromFile'),
|
| 48 |
+
dict(type='LoadAnnotations'),
|
| 49 |
+
dict(type='RandomFlip', prob=0.0),
|
| 50 |
+
dict(
|
| 51 |
+
type='PhotoMetricDistortion',
|
| 52 |
+
brightness_delta=16,
|
| 53 |
+
contrast_range=(0.5, 1.0),
|
| 54 |
+
saturation_range=(0.5, 1.0),
|
| 55 |
+
hue_delta=5),
|
| 56 |
+
dict(
|
| 57 |
+
type='Normalize',
|
| 58 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 59 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 60 |
+
to_rgb=True),
|
| 61 |
+
dict(type='DefaultFormatBundle'),
|
| 62 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 63 |
+
]
|
| 64 |
+
test_pipeline = [
|
| 65 |
+
dict(type='LoadImageFromFile'),
|
| 66 |
+
dict(
|
| 67 |
+
type='MultiScaleFlipAug',
|
| 68 |
+
img_scale=(384, 384),
|
| 69 |
+
flip=False,
|
| 70 |
+
transforms=[
|
| 71 |
+
dict(
|
| 72 |
+
type='Normalize',
|
| 73 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 74 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 75 |
+
to_rgb=True),
|
| 76 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 77 |
+
dict(type='Collect', keys=['img'])
|
| 78 |
+
])
|
| 79 |
+
]
|
| 80 |
+
data = dict(
|
| 81 |
+
train=dict(
|
| 82 |
+
type='EasyPortraitPSDataset',
|
| 83 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 84 |
+
classes=('background', 'person'),
|
| 85 |
+
img_dir='easyportrait_384/images/train',
|
| 86 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 87 |
+
pipeline=[
|
| 88 |
+
dict(type='LoadImageFromFile'),
|
| 89 |
+
dict(type='LoadAnnotations'),
|
| 90 |
+
dict(type='RandomFlip', prob=0.0),
|
| 91 |
+
dict(
|
| 92 |
+
type='PhotoMetricDistortion',
|
| 93 |
+
brightness_delta=16,
|
| 94 |
+
contrast_range=(0.5, 1.0),
|
| 95 |
+
saturation_range=(0.5, 1.0),
|
| 96 |
+
hue_delta=5),
|
| 97 |
+
dict(
|
| 98 |
+
type='Normalize',
|
| 99 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 100 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 101 |
+
to_rgb=True),
|
| 102 |
+
dict(type='DefaultFormatBundle'),
|
| 103 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 104 |
+
]),
|
| 105 |
+
val=dict(
|
| 106 |
+
type='EasyPortraitPSDataset',
|
| 107 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 108 |
+
classes=('background', 'person'),
|
| 109 |
+
img_dir='easyportrait_384/images/val',
|
| 110 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 111 |
+
pipeline=[
|
| 112 |
+
dict(type='LoadImageFromFile'),
|
| 113 |
+
dict(
|
| 114 |
+
type='MultiScaleFlipAug',
|
| 115 |
+
img_scale=(384, 384),
|
| 116 |
+
flip=False,
|
| 117 |
+
transforms=[
|
| 118 |
+
dict(
|
| 119 |
+
type='Normalize',
|
| 120 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 121 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 122 |
+
to_rgb=True),
|
| 123 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 124 |
+
dict(type='Collect', keys=['img'])
|
| 125 |
+
])
|
| 126 |
+
]),
|
| 127 |
+
test=dict(
|
| 128 |
+
type='EasyPortraitPSDataset',
|
| 129 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 130 |
+
classes=('background', 'person'),
|
| 131 |
+
img_dir='easyportrait_384/images/test',
|
| 132 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 133 |
+
pipeline=[
|
| 134 |
+
dict(type='LoadImageFromFile'),
|
| 135 |
+
dict(
|
| 136 |
+
type='MultiScaleFlipAug',
|
| 137 |
+
img_scale=(384, 384),
|
| 138 |
+
flip=False,
|
| 139 |
+
transforms=[
|
| 140 |
+
dict(
|
| 141 |
+
type='Normalize',
|
| 142 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 143 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 144 |
+
to_rgb=True),
|
| 145 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 146 |
+
dict(type='Collect', keys=['img'])
|
| 147 |
+
])
|
| 148 |
+
]),
|
| 149 |
+
samples_per_gpu=32,
|
| 150 |
+
workers_per_gpu=8)
|
| 151 |
+
log_config = dict(
|
| 152 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 153 |
+
dist_params = dict(backend='nccl')
|
| 154 |
+
log_level = 'INFO'
|
| 155 |
+
load_from = None
|
| 156 |
+
resume_from = None
|
| 157 |
+
workflow = [('train', 1)]
|
| 158 |
+
cudnn_benchmark = True
|
| 159 |
+
optimizer = dict(
|
| 160 |
+
type='AdamW',
|
| 161 |
+
lr=6e-05,
|
| 162 |
+
betas=(0.9, 0.999),
|
| 163 |
+
weight_decay=0.01,
|
| 164 |
+
paramwise_cfg=dict(
|
| 165 |
+
custom_keys=dict(
|
| 166 |
+
pos_block=dict(decay_mult=0.0),
|
| 167 |
+
norm=dict(decay_mult=0.0),
|
| 168 |
+
head=dict(lr_mult=10.0))))
|
| 169 |
+
optimizer_config = dict()
|
| 170 |
+
lr_config = dict(
|
| 171 |
+
policy='poly',
|
| 172 |
+
warmup='linear',
|
| 173 |
+
warmup_iters=1500,
|
| 174 |
+
warmup_ratio=1e-06,
|
| 175 |
+
power=1.0,
|
| 176 |
+
min_lr=0.0,
|
| 177 |
+
by_epoch=False)
|
| 178 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 179 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 180 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 181 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 182 |
+
work_dir = 'work_dirs/petrova/fcn-ps'
|
| 183 |
+
gpu_ids = [0]
|
| 184 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fpn-fp/fpn-fp.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 1, 1),
|
| 11 |
+
strides=(1, 2, 2, 2),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
neck=dict(
|
| 17 |
+
type='FPN',
|
| 18 |
+
in_channels=[256, 512, 1024, 2048],
|
| 19 |
+
out_channels=256,
|
| 20 |
+
num_outs=4),
|
| 21 |
+
decode_head=dict(
|
| 22 |
+
type='FPNHead',
|
| 23 |
+
in_channels=[256, 256, 256, 256],
|
| 24 |
+
in_index=[0, 1, 2, 3],
|
| 25 |
+
feature_strides=[4, 8, 16, 32],
|
| 26 |
+
channels=128,
|
| 27 |
+
dropout_ratio=0.1,
|
| 28 |
+
num_classes=8,
|
| 29 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 30 |
+
align_corners=False,
|
| 31 |
+
loss_decode=dict(
|
| 32 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 33 |
+
train_cfg=dict(),
|
| 34 |
+
test_cfg=dict(mode='whole'))
|
| 35 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 36 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 37 |
+
img_norm_cfg = dict(
|
| 38 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 39 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 40 |
+
to_rgb=True)
|
| 41 |
+
train_pipeline = [
|
| 42 |
+
dict(type='LoadImageFromFile'),
|
| 43 |
+
dict(type='LoadAnnotations'),
|
| 44 |
+
dict(type='RandomFlip', prob=0.0),
|
| 45 |
+
dict(
|
| 46 |
+
type='PhotoMetricDistortion',
|
| 47 |
+
brightness_delta=16,
|
| 48 |
+
contrast_range=(0.5, 1.0),
|
| 49 |
+
saturation_range=(0.5, 1.0),
|
| 50 |
+
hue_delta=5),
|
| 51 |
+
dict(
|
| 52 |
+
type='Normalize',
|
| 53 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 54 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 55 |
+
to_rgb=True),
|
| 56 |
+
dict(type='DefaultFormatBundle'),
|
| 57 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 58 |
+
]
|
| 59 |
+
test_pipeline = [
|
| 60 |
+
dict(type='LoadImageFromFile'),
|
| 61 |
+
dict(
|
| 62 |
+
type='MultiScaleFlipAug',
|
| 63 |
+
img_scale=(384, 384),
|
| 64 |
+
flip=False,
|
| 65 |
+
transforms=[
|
| 66 |
+
dict(
|
| 67 |
+
type='Normalize',
|
| 68 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 69 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 70 |
+
to_rgb=True),
|
| 71 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 72 |
+
dict(type='Collect', keys=['img'])
|
| 73 |
+
])
|
| 74 |
+
]
|
| 75 |
+
data = dict(
|
| 76 |
+
train=dict(
|
| 77 |
+
type='EasyPortraitFPDataset',
|
| 78 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 79 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 80 |
+
'right eye', 'lips', 'teeth'),
|
| 81 |
+
img_dir='easyportrait_384/images/train',
|
| 82 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 83 |
+
pipeline=[
|
| 84 |
+
dict(type='LoadImageFromFile'),
|
| 85 |
+
dict(type='LoadAnnotations'),
|
| 86 |
+
dict(type='RandomFlip', prob=0.0),
|
| 87 |
+
dict(
|
| 88 |
+
type='PhotoMetricDistortion',
|
| 89 |
+
brightness_delta=16,
|
| 90 |
+
contrast_range=(0.5, 1.0),
|
| 91 |
+
saturation_range=(0.5, 1.0),
|
| 92 |
+
hue_delta=5),
|
| 93 |
+
dict(
|
| 94 |
+
type='Normalize',
|
| 95 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 96 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 97 |
+
to_rgb=True),
|
| 98 |
+
dict(type='DefaultFormatBundle'),
|
| 99 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 100 |
+
]),
|
| 101 |
+
val=dict(
|
| 102 |
+
type='EasyPortraitFPDataset',
|
| 103 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 104 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 105 |
+
'right eye', 'lips', 'teeth'),
|
| 106 |
+
img_dir='easyportrait_384/images/val',
|
| 107 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 108 |
+
pipeline=[
|
| 109 |
+
dict(type='LoadImageFromFile'),
|
| 110 |
+
dict(
|
| 111 |
+
type='MultiScaleFlipAug',
|
| 112 |
+
img_scale=(384, 384),
|
| 113 |
+
flip=False,
|
| 114 |
+
transforms=[
|
| 115 |
+
dict(
|
| 116 |
+
type='Normalize',
|
| 117 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 118 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 119 |
+
to_rgb=True),
|
| 120 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 121 |
+
dict(type='Collect', keys=['img'])
|
| 122 |
+
])
|
| 123 |
+
]),
|
| 124 |
+
test=dict(
|
| 125 |
+
type='EasyPortraitFPDataset',
|
| 126 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 127 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 128 |
+
'right eye', 'lips', 'teeth'),
|
| 129 |
+
img_dir='easyportrait_384/images/test',
|
| 130 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 131 |
+
pipeline=[
|
| 132 |
+
dict(type='LoadImageFromFile'),
|
| 133 |
+
dict(
|
| 134 |
+
type='MultiScaleFlipAug',
|
| 135 |
+
img_scale=(384, 384),
|
| 136 |
+
flip=False,
|
| 137 |
+
transforms=[
|
| 138 |
+
dict(
|
| 139 |
+
type='Normalize',
|
| 140 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 141 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 142 |
+
to_rgb=True),
|
| 143 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 144 |
+
dict(type='Collect', keys=['img'])
|
| 145 |
+
])
|
| 146 |
+
]),
|
| 147 |
+
samples_per_gpu=32,
|
| 148 |
+
workers_per_gpu=8)
|
| 149 |
+
log_config = dict(
|
| 150 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 151 |
+
dist_params = dict(backend='nccl')
|
| 152 |
+
log_level = 'INFO'
|
| 153 |
+
load_from = None
|
| 154 |
+
resume_from = None
|
| 155 |
+
workflow = [('train', 1)]
|
| 156 |
+
cudnn_benchmark = True
|
| 157 |
+
optimizer = dict(
|
| 158 |
+
type='AdamW',
|
| 159 |
+
lr=6e-05,
|
| 160 |
+
betas=(0.9, 0.999),
|
| 161 |
+
weight_decay=0.01,
|
| 162 |
+
paramwise_cfg=dict(
|
| 163 |
+
custom_keys=dict(
|
| 164 |
+
pos_block=dict(decay_mult=0.0),
|
| 165 |
+
norm=dict(decay_mult=0.0),
|
| 166 |
+
head=dict(lr_mult=10.0))))
|
| 167 |
+
optimizer_config = dict()
|
| 168 |
+
lr_config = dict(
|
| 169 |
+
policy='poly',
|
| 170 |
+
warmup='linear',
|
| 171 |
+
warmup_iters=1500,
|
| 172 |
+
warmup_ratio=1e-06,
|
| 173 |
+
power=1.0,
|
| 174 |
+
min_lr=0.0,
|
| 175 |
+
by_epoch=False)
|
| 176 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 177 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 178 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 179 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 180 |
+
work_dir = 'work_dirs/petrova/fpn-fp'
|
| 181 |
+
gpu_ids = [0]
|
| 182 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/fpn-ps/fpn-ps.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained='open-mmlab://resnet50_v1c',
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='ResNetV1c',
|
| 7 |
+
depth=50,
|
| 8 |
+
num_stages=4,
|
| 9 |
+
out_indices=(0, 1, 2, 3),
|
| 10 |
+
dilations=(1, 1, 1, 1),
|
| 11 |
+
strides=(1, 2, 2, 2),
|
| 12 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 13 |
+
norm_eval=False,
|
| 14 |
+
style='pytorch',
|
| 15 |
+
contract_dilation=True),
|
| 16 |
+
neck=dict(
|
| 17 |
+
type='FPN',
|
| 18 |
+
in_channels=[256, 512, 1024, 2048],
|
| 19 |
+
out_channels=256,
|
| 20 |
+
num_outs=4),
|
| 21 |
+
decode_head=dict(
|
| 22 |
+
type='FPNHead',
|
| 23 |
+
in_channels=[256, 256, 256, 256],
|
| 24 |
+
in_index=[0, 1, 2, 3],
|
| 25 |
+
feature_strides=[4, 8, 16, 32],
|
| 26 |
+
channels=128,
|
| 27 |
+
dropout_ratio=0.1,
|
| 28 |
+
num_classes=2,
|
| 29 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 30 |
+
align_corners=False,
|
| 31 |
+
loss_decode=dict(
|
| 32 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 33 |
+
train_cfg=dict(),
|
| 34 |
+
test_cfg=dict(mode='whole'))
|
| 35 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 36 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 37 |
+
img_norm_cfg = dict(
|
| 38 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 39 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 40 |
+
to_rgb=True)
|
| 41 |
+
train_pipeline = [
|
| 42 |
+
dict(type='LoadImageFromFile'),
|
| 43 |
+
dict(type='LoadAnnotations'),
|
| 44 |
+
dict(type='RandomFlip', prob=0.0),
|
| 45 |
+
dict(
|
| 46 |
+
type='PhotoMetricDistortion',
|
| 47 |
+
brightness_delta=16,
|
| 48 |
+
contrast_range=(0.5, 1.0),
|
| 49 |
+
saturation_range=(0.5, 1.0),
|
| 50 |
+
hue_delta=5),
|
| 51 |
+
dict(
|
| 52 |
+
type='Normalize',
|
| 53 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 54 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 55 |
+
to_rgb=True),
|
| 56 |
+
dict(type='DefaultFormatBundle'),
|
| 57 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 58 |
+
]
|
| 59 |
+
test_pipeline = [
|
| 60 |
+
dict(type='LoadImageFromFile'),
|
| 61 |
+
dict(
|
| 62 |
+
type='MultiScaleFlipAug',
|
| 63 |
+
img_scale=(384, 384),
|
| 64 |
+
flip=False,
|
| 65 |
+
transforms=[
|
| 66 |
+
dict(
|
| 67 |
+
type='Normalize',
|
| 68 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 69 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 70 |
+
to_rgb=True),
|
| 71 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 72 |
+
dict(type='Collect', keys=['img'])
|
| 73 |
+
])
|
| 74 |
+
]
|
| 75 |
+
data = dict(
|
| 76 |
+
train=dict(
|
| 77 |
+
type='EasyPortraitPSDataset',
|
| 78 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 79 |
+
classes=('background', 'person'),
|
| 80 |
+
img_dir='easyportrait_384/images/train',
|
| 81 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 82 |
+
pipeline=[
|
| 83 |
+
dict(type='LoadImageFromFile'),
|
| 84 |
+
dict(type='LoadAnnotations'),
|
| 85 |
+
dict(type='RandomFlip', prob=0.0),
|
| 86 |
+
dict(
|
| 87 |
+
type='PhotoMetricDistortion',
|
| 88 |
+
brightness_delta=16,
|
| 89 |
+
contrast_range=(0.5, 1.0),
|
| 90 |
+
saturation_range=(0.5, 1.0),
|
| 91 |
+
hue_delta=5),
|
| 92 |
+
dict(
|
| 93 |
+
type='Normalize',
|
| 94 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 95 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 96 |
+
to_rgb=True),
|
| 97 |
+
dict(type='DefaultFormatBundle'),
|
| 98 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 99 |
+
]),
|
| 100 |
+
val=dict(
|
| 101 |
+
type='EasyPortraitPSDataset',
|
| 102 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 103 |
+
classes=('background', 'person'),
|
| 104 |
+
img_dir='easyportrait_384/images/val',
|
| 105 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 106 |
+
pipeline=[
|
| 107 |
+
dict(type='LoadImageFromFile'),
|
| 108 |
+
dict(
|
| 109 |
+
type='MultiScaleFlipAug',
|
| 110 |
+
img_scale=(384, 384),
|
| 111 |
+
flip=False,
|
| 112 |
+
transforms=[
|
| 113 |
+
dict(
|
| 114 |
+
type='Normalize',
|
| 115 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 116 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 117 |
+
to_rgb=True),
|
| 118 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 119 |
+
dict(type='Collect', keys=['img'])
|
| 120 |
+
])
|
| 121 |
+
]),
|
| 122 |
+
test=dict(
|
| 123 |
+
type='EasyPortraitPSDataset',
|
| 124 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 125 |
+
classes=('background', 'person'),
|
| 126 |
+
img_dir='easyportrait_384/images/test',
|
| 127 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 128 |
+
pipeline=[
|
| 129 |
+
dict(type='LoadImageFromFile'),
|
| 130 |
+
dict(
|
| 131 |
+
type='MultiScaleFlipAug',
|
| 132 |
+
img_scale=(384, 384),
|
| 133 |
+
flip=False,
|
| 134 |
+
transforms=[
|
| 135 |
+
dict(
|
| 136 |
+
type='Normalize',
|
| 137 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 138 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 139 |
+
to_rgb=True),
|
| 140 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 141 |
+
dict(type='Collect', keys=['img'])
|
| 142 |
+
])
|
| 143 |
+
]),
|
| 144 |
+
samples_per_gpu=32,
|
| 145 |
+
workers_per_gpu=8)
|
| 146 |
+
log_config = dict(
|
| 147 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 148 |
+
dist_params = dict(backend='nccl')
|
| 149 |
+
log_level = 'INFO'
|
| 150 |
+
load_from = None
|
| 151 |
+
resume_from = None
|
| 152 |
+
workflow = [('train', 1)]
|
| 153 |
+
cudnn_benchmark = True
|
| 154 |
+
optimizer = dict(
|
| 155 |
+
type='AdamW',
|
| 156 |
+
lr=6e-05,
|
| 157 |
+
betas=(0.9, 0.999),
|
| 158 |
+
weight_decay=0.01,
|
| 159 |
+
paramwise_cfg=dict(
|
| 160 |
+
custom_keys=dict(
|
| 161 |
+
pos_block=dict(decay_mult=0.0),
|
| 162 |
+
norm=dict(decay_mult=0.0),
|
| 163 |
+
head=dict(lr_mult=10.0))))
|
| 164 |
+
optimizer_config = dict()
|
| 165 |
+
lr_config = dict(
|
| 166 |
+
policy='poly',
|
| 167 |
+
warmup='linear',
|
| 168 |
+
warmup_iters=1500,
|
| 169 |
+
warmup_ratio=1e-06,
|
| 170 |
+
power=1.0,
|
| 171 |
+
min_lr=0.0,
|
| 172 |
+
by_epoch=False)
|
| 173 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 174 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 175 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 176 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 177 |
+
work_dir = 'work_dirs/petrova/fpn-ps'
|
| 178 |
+
gpu_ids = [0]
|
| 179 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/segformer-fp/segformer-fp.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained=
|
| 5 |
+
'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth',
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MixVisionTransformer',
|
| 8 |
+
in_channels=3,
|
| 9 |
+
embed_dims=32,
|
| 10 |
+
num_stages=4,
|
| 11 |
+
num_layers=[2, 2, 2, 2],
|
| 12 |
+
num_heads=[1, 2, 5, 8],
|
| 13 |
+
patch_sizes=[7, 3, 3, 3],
|
| 14 |
+
sr_ratios=[8, 4, 2, 1],
|
| 15 |
+
out_indices=(0, 1, 2, 3),
|
| 16 |
+
mlp_ratio=4,
|
| 17 |
+
qkv_bias=True,
|
| 18 |
+
drop_rate=0.0,
|
| 19 |
+
attn_drop_rate=0.0,
|
| 20 |
+
drop_path_rate=0.1),
|
| 21 |
+
decode_head=dict(
|
| 22 |
+
type='SegformerHead',
|
| 23 |
+
in_channels=[32, 64, 160, 256],
|
| 24 |
+
in_index=[0, 1, 2, 3],
|
| 25 |
+
channels=256,
|
| 26 |
+
dropout_ratio=0.1,
|
| 27 |
+
num_classes=8,
|
| 28 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 29 |
+
align_corners=False,
|
| 30 |
+
loss_decode=dict(
|
| 31 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 32 |
+
train_cfg=dict(),
|
| 33 |
+
test_cfg=dict(mode='whole'))
|
| 34 |
+
dataset_type = 'EasyPortraitFPDataset'
|
| 35 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 36 |
+
img_norm_cfg = dict(
|
| 37 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 38 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 39 |
+
to_rgb=True)
|
| 40 |
+
train_pipeline = [
|
| 41 |
+
dict(type='LoadImageFromFile'),
|
| 42 |
+
dict(type='LoadAnnotations'),
|
| 43 |
+
dict(type='RandomFlip', prob=0.0),
|
| 44 |
+
dict(
|
| 45 |
+
type='PhotoMetricDistortion',
|
| 46 |
+
brightness_delta=16,
|
| 47 |
+
contrast_range=(0.5, 1.0),
|
| 48 |
+
saturation_range=(0.5, 1.0),
|
| 49 |
+
hue_delta=5),
|
| 50 |
+
dict(
|
| 51 |
+
type='Normalize',
|
| 52 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 53 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 54 |
+
to_rgb=True),
|
| 55 |
+
dict(type='DefaultFormatBundle'),
|
| 56 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 57 |
+
]
|
| 58 |
+
test_pipeline = [
|
| 59 |
+
dict(type='LoadImageFromFile'),
|
| 60 |
+
dict(
|
| 61 |
+
type='MultiScaleFlipAug',
|
| 62 |
+
img_scale=(384, 384),
|
| 63 |
+
flip=False,
|
| 64 |
+
transforms=[
|
| 65 |
+
dict(
|
| 66 |
+
type='Normalize',
|
| 67 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 68 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 69 |
+
to_rgb=True),
|
| 70 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 71 |
+
dict(type='Collect', keys=['img'])
|
| 72 |
+
])
|
| 73 |
+
]
|
| 74 |
+
data = dict(
|
| 75 |
+
train=dict(
|
| 76 |
+
type='EasyPortraitFPDataset',
|
| 77 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 78 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 79 |
+
'right eye', 'lips', 'teeth'),
|
| 80 |
+
img_dir='easyportrait_384/images/train',
|
| 81 |
+
ann_dir='easyportrait_384/annotations_fp/train',
|
| 82 |
+
pipeline=[
|
| 83 |
+
dict(type='LoadImageFromFile'),
|
| 84 |
+
dict(type='LoadAnnotations'),
|
| 85 |
+
dict(type='RandomFlip', prob=0.0),
|
| 86 |
+
dict(
|
| 87 |
+
type='PhotoMetricDistortion',
|
| 88 |
+
brightness_delta=16,
|
| 89 |
+
contrast_range=(0.5, 1.0),
|
| 90 |
+
saturation_range=(0.5, 1.0),
|
| 91 |
+
hue_delta=5),
|
| 92 |
+
dict(
|
| 93 |
+
type='Normalize',
|
| 94 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 95 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 96 |
+
to_rgb=True),
|
| 97 |
+
dict(type='DefaultFormatBundle'),
|
| 98 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 99 |
+
]),
|
| 100 |
+
val=dict(
|
| 101 |
+
type='EasyPortraitFPDataset',
|
| 102 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 103 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 104 |
+
'right eye', 'lips', 'teeth'),
|
| 105 |
+
img_dir='easyportrait_384/images/val',
|
| 106 |
+
ann_dir='easyportrait_384/annotations_fp/val',
|
| 107 |
+
pipeline=[
|
| 108 |
+
dict(type='LoadImageFromFile'),
|
| 109 |
+
dict(
|
| 110 |
+
type='MultiScaleFlipAug',
|
| 111 |
+
img_scale=(384, 384),
|
| 112 |
+
flip=False,
|
| 113 |
+
transforms=[
|
| 114 |
+
dict(
|
| 115 |
+
type='Normalize',
|
| 116 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 117 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 118 |
+
to_rgb=True),
|
| 119 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 120 |
+
dict(type='Collect', keys=['img'])
|
| 121 |
+
])
|
| 122 |
+
]),
|
| 123 |
+
test=dict(
|
| 124 |
+
type='EasyPortraitFPDataset',
|
| 125 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 126 |
+
classes=('background', 'skin', 'left brow', 'right brow', 'left eye',
|
| 127 |
+
'right eye', 'lips', 'teeth'),
|
| 128 |
+
img_dir='easyportrait_384/images/test',
|
| 129 |
+
ann_dir='easyportrait_384/annotations_fp/test',
|
| 130 |
+
pipeline=[
|
| 131 |
+
dict(type='LoadImageFromFile'),
|
| 132 |
+
dict(
|
| 133 |
+
type='MultiScaleFlipAug',
|
| 134 |
+
img_scale=(384, 384),
|
| 135 |
+
flip=False,
|
| 136 |
+
transforms=[
|
| 137 |
+
dict(
|
| 138 |
+
type='Normalize',
|
| 139 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 140 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 141 |
+
to_rgb=True),
|
| 142 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 143 |
+
dict(type='Collect', keys=['img'])
|
| 144 |
+
])
|
| 145 |
+
]),
|
| 146 |
+
samples_per_gpu=32,
|
| 147 |
+
workers_per_gpu=8)
|
| 148 |
+
log_config = dict(
|
| 149 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 150 |
+
dist_params = dict(backend='nccl')
|
| 151 |
+
log_level = 'INFO'
|
| 152 |
+
load_from = None
|
| 153 |
+
resume_from = None
|
| 154 |
+
workflow = [('train', 1)]
|
| 155 |
+
cudnn_benchmark = True
|
| 156 |
+
optimizer = dict(
|
| 157 |
+
type='AdamW',
|
| 158 |
+
lr=6e-05,
|
| 159 |
+
betas=(0.9, 0.999),
|
| 160 |
+
weight_decay=0.01,
|
| 161 |
+
paramwise_cfg=dict(
|
| 162 |
+
custom_keys=dict(
|
| 163 |
+
pos_block=dict(decay_mult=0.0),
|
| 164 |
+
norm=dict(decay_mult=0.0),
|
| 165 |
+
head=dict(lr_mult=10.0))))
|
| 166 |
+
optimizer_config = dict()
|
| 167 |
+
lr_config = dict(
|
| 168 |
+
policy='poly',
|
| 169 |
+
warmup='linear',
|
| 170 |
+
warmup_iters=1500,
|
| 171 |
+
warmup_ratio=1e-06,
|
| 172 |
+
power=1.0,
|
| 173 |
+
min_lr=0.0,
|
| 174 |
+
by_epoch=False)
|
| 175 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 176 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 177 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 178 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 179 |
+
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth'
|
| 180 |
+
work_dir = 'work_dirs/petrova/segformer-fp'
|
| 181 |
+
gpu_ids = [0]
|
| 182 |
+
auto_resume = False
|
data_utils/easyportrait/local_configs/easyportrait_experiments_v2/segformer-ps/segformer-ps.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained=
|
| 5 |
+
'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth',
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MixVisionTransformer',
|
| 8 |
+
in_channels=3,
|
| 9 |
+
embed_dims=32,
|
| 10 |
+
num_stages=4,
|
| 11 |
+
num_layers=[2, 2, 2, 2],
|
| 12 |
+
num_heads=[1, 2, 5, 8],
|
| 13 |
+
patch_sizes=[7, 3, 3, 3],
|
| 14 |
+
sr_ratios=[8, 4, 2, 1],
|
| 15 |
+
out_indices=(0, 1, 2, 3),
|
| 16 |
+
mlp_ratio=4,
|
| 17 |
+
qkv_bias=True,
|
| 18 |
+
drop_rate=0.0,
|
| 19 |
+
attn_drop_rate=0.0,
|
| 20 |
+
drop_path_rate=0.1),
|
| 21 |
+
decode_head=dict(
|
| 22 |
+
type='SegformerHead',
|
| 23 |
+
in_channels=[32, 64, 160, 256],
|
| 24 |
+
in_index=[0, 1, 2, 3],
|
| 25 |
+
channels=256,
|
| 26 |
+
dropout_ratio=0.1,
|
| 27 |
+
num_classes=2,
|
| 28 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 29 |
+
align_corners=False,
|
| 30 |
+
loss_decode=dict(
|
| 31 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 32 |
+
train_cfg=dict(),
|
| 33 |
+
test_cfg=dict(mode='whole'))
|
| 34 |
+
dataset_type = 'EasyPortraitPSDataset'
|
| 35 |
+
data_root = '/home/jovyan/datasets/wacv_24/'
|
| 36 |
+
img_norm_cfg = dict(
|
| 37 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 38 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 39 |
+
to_rgb=True)
|
| 40 |
+
train_pipeline = [
|
| 41 |
+
dict(type='LoadImageFromFile'),
|
| 42 |
+
dict(type='LoadAnnotations'),
|
| 43 |
+
dict(type='RandomFlip', prob=0.0),
|
| 44 |
+
dict(
|
| 45 |
+
type='PhotoMetricDistortion',
|
| 46 |
+
brightness_delta=16,
|
| 47 |
+
contrast_range=(0.5, 1.0),
|
| 48 |
+
saturation_range=(0.5, 1.0),
|
| 49 |
+
hue_delta=5),
|
| 50 |
+
dict(
|
| 51 |
+
type='Normalize',
|
| 52 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 53 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 54 |
+
to_rgb=True),
|
| 55 |
+
dict(type='DefaultFormatBundle'),
|
| 56 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 57 |
+
]
|
| 58 |
+
test_pipeline = [
|
| 59 |
+
dict(type='LoadImageFromFile'),
|
| 60 |
+
dict(
|
| 61 |
+
type='MultiScaleFlipAug',
|
| 62 |
+
img_scale=(384, 384),
|
| 63 |
+
flip=False,
|
| 64 |
+
transforms=[
|
| 65 |
+
dict(
|
| 66 |
+
type='Normalize',
|
| 67 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 68 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 69 |
+
to_rgb=True),
|
| 70 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 71 |
+
dict(type='Collect', keys=['img'])
|
| 72 |
+
])
|
| 73 |
+
]
|
| 74 |
+
data = dict(
|
| 75 |
+
train=dict(
|
| 76 |
+
type='EasyPortraitPSDataset',
|
| 77 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 78 |
+
classes=('background', 'person'),
|
| 79 |
+
img_dir='easyportrait_384/images/train',
|
| 80 |
+
ann_dir='easyportrait_384/annotations_ps/train',
|
| 81 |
+
pipeline=[
|
| 82 |
+
dict(type='LoadImageFromFile'),
|
| 83 |
+
dict(type='LoadAnnotations'),
|
| 84 |
+
dict(type='RandomFlip', prob=0.0),
|
| 85 |
+
dict(
|
| 86 |
+
type='PhotoMetricDistortion',
|
| 87 |
+
brightness_delta=16,
|
| 88 |
+
contrast_range=(0.5, 1.0),
|
| 89 |
+
saturation_range=(0.5, 1.0),
|
| 90 |
+
hue_delta=5),
|
| 91 |
+
dict(
|
| 92 |
+
type='Normalize',
|
| 93 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 94 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 95 |
+
to_rgb=True),
|
| 96 |
+
dict(type='DefaultFormatBundle'),
|
| 97 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 98 |
+
]),
|
| 99 |
+
val=dict(
|
| 100 |
+
type='EasyPortraitPSDataset',
|
| 101 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 102 |
+
classes=('background', 'person'),
|
| 103 |
+
img_dir='easyportrait_384/images/val',
|
| 104 |
+
ann_dir='easyportrait_384/annotations_ps/val',
|
| 105 |
+
pipeline=[
|
| 106 |
+
dict(type='LoadImageFromFile'),
|
| 107 |
+
dict(
|
| 108 |
+
type='MultiScaleFlipAug',
|
| 109 |
+
img_scale=(384, 384),
|
| 110 |
+
flip=False,
|
| 111 |
+
transforms=[
|
| 112 |
+
dict(
|
| 113 |
+
type='Normalize',
|
| 114 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 115 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 116 |
+
to_rgb=True),
|
| 117 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 118 |
+
dict(type='Collect', keys=['img'])
|
| 119 |
+
])
|
| 120 |
+
]),
|
| 121 |
+
test=dict(
|
| 122 |
+
type='EasyPortraitPSDataset',
|
| 123 |
+
data_root='/home/jovyan/datasets/wacv_24/',
|
| 124 |
+
classes=('background', 'person'),
|
| 125 |
+
img_dir='easyportrait_384/images/test',
|
| 126 |
+
ann_dir='easyportrait_384/annotations_ps/test',
|
| 127 |
+
pipeline=[
|
| 128 |
+
dict(type='LoadImageFromFile'),
|
| 129 |
+
dict(
|
| 130 |
+
type='MultiScaleFlipAug',
|
| 131 |
+
img_scale=(384, 384),
|
| 132 |
+
flip=False,
|
| 133 |
+
transforms=[
|
| 134 |
+
dict(
|
| 135 |
+
type='Normalize',
|
| 136 |
+
mean=[143.55267075, 132.96705975, 126.94924335],
|
| 137 |
+
std=[60.2625333, 60.32740275, 59.30988645],
|
| 138 |
+
to_rgb=True),
|
| 139 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 140 |
+
dict(type='Collect', keys=['img'])
|
| 141 |
+
])
|
| 142 |
+
]),
|
| 143 |
+
samples_per_gpu=32,
|
| 144 |
+
workers_per_gpu=8)
|
| 145 |
+
log_config = dict(
|
| 146 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 147 |
+
dist_params = dict(backend='nccl')
|
| 148 |
+
log_level = 'INFO'
|
| 149 |
+
load_from = None
|
| 150 |
+
resume_from = None
|
| 151 |
+
workflow = [('train', 1)]
|
| 152 |
+
cudnn_benchmark = True
|
| 153 |
+
optimizer = dict(
|
| 154 |
+
type='AdamW',
|
| 155 |
+
lr=6e-05,
|
| 156 |
+
betas=(0.9, 0.999),
|
| 157 |
+
weight_decay=0.01,
|
| 158 |
+
paramwise_cfg=dict(
|
| 159 |
+
custom_keys=dict(
|
| 160 |
+
pos_block=dict(decay_mult=0.0),
|
| 161 |
+
norm=dict(decay_mult=0.0),
|
| 162 |
+
head=dict(lr_mult=10.0))))
|
| 163 |
+
optimizer_config = dict()
|
| 164 |
+
lr_config = dict(
|
| 165 |
+
policy='poly',
|
| 166 |
+
warmup='linear',
|
| 167 |
+
warmup_iters=1500,
|
| 168 |
+
warmup_ratio=1e-06,
|
| 169 |
+
power=1.0,
|
| 170 |
+
min_lr=0.0,
|
| 171 |
+
by_epoch=False)
|
| 172 |
+
default_hooks = dict(stop=dict(type='EarlyStoppingHook', monitor='mIoU'))
|
| 173 |
+
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
| 174 |
+
checkpoint_config = dict(by_epoch=True, interval=100)
|
| 175 |
+
evaluation = dict(interval=1, metric='mIoU', save_best='mIoU')
|
| 176 |
+
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth'
|
| 177 |
+
work_dir = 'work_dirs/petrova/segformer-ps'
|
| 178 |
+
gpu_ids = [0]
|
| 179 |
+
auto_resume = False
|
data_utils/easyportrait/mmseg/.mim/configs
ADDED
|
File without changes
|
data_utils/easyportrait/mmseg/.mim/tools
ADDED
|
File without changes
|
data_utils/easyportrait/mmseg/__init__.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import warnings
|
| 3 |
+
|
| 4 |
+
import mmcv
|
| 5 |
+
from packaging.version import parse
|
| 6 |
+
|
| 7 |
+
from .version import __version__, version_info
|
| 8 |
+
|
| 9 |
+
MMCV_MIN = '1.3.13'
|
| 10 |
+
MMCV_MAX = '1.8.0'
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def digit_version(version_str: str, length: int = 4):
|
| 14 |
+
"""Convert a version string into a tuple of integers.
|
| 15 |
+
|
| 16 |
+
This method is usually used for comparing two versions. For pre-release
|
| 17 |
+
versions: alpha < beta < rc.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
version_str (str): The version string.
|
| 21 |
+
length (int): The maximum number of version levels. Default: 4.
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
tuple[int]: The version info in digits (integers).
|
| 25 |
+
"""
|
| 26 |
+
version = parse(version_str)
|
| 27 |
+
assert version.release, f'failed to parse version {version_str}'
|
| 28 |
+
release = list(version.release)
|
| 29 |
+
release = release[:length]
|
| 30 |
+
if len(release) < length:
|
| 31 |
+
release = release + [0] * (length - len(release))
|
| 32 |
+
if version.is_prerelease:
|
| 33 |
+
mapping = {'a': -3, 'b': -2, 'rc': -1}
|
| 34 |
+
val = -4
|
| 35 |
+
# version.pre can be None
|
| 36 |
+
if version.pre:
|
| 37 |
+
if version.pre[0] not in mapping:
|
| 38 |
+
warnings.warn(f'unknown prerelease version {version.pre[0]}, '
|
| 39 |
+
'version checking may go wrong')
|
| 40 |
+
else:
|
| 41 |
+
val = mapping[version.pre[0]]
|
| 42 |
+
release.extend([val, version.pre[-1]])
|
| 43 |
+
else:
|
| 44 |
+
release.extend([val, 0])
|
| 45 |
+
|
| 46 |
+
elif version.is_postrelease:
|
| 47 |
+
release.extend([1, version.post])
|
| 48 |
+
else:
|
| 49 |
+
release.extend([0, 0])
|
| 50 |
+
return tuple(release)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
mmcv_min_version = digit_version(MMCV_MIN)
|
| 54 |
+
mmcv_max_version = digit_version(MMCV_MAX)
|
| 55 |
+
mmcv_version = digit_version(mmcv.__version__)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \
|
| 59 |
+
f'MMCV=={mmcv.__version__} is used but incompatible. ' \
|
| 60 |
+
f'Please install mmcv>={mmcv_min_version}, <{mmcv_max_version}.'
|
| 61 |
+
|
| 62 |
+
__all__ = ['__version__', 'version_info', 'digit_version']
|
data_utils/easyportrait/mmseg/apis/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from .inference import inference_segmentor, init_segmentor, show_result_pyplot
|
| 3 |
+
from .test import multi_gpu_test, single_gpu_test
|
| 4 |
+
from .train import (get_root_logger, init_random_seed, set_random_seed,
|
| 5 |
+
train_segmentor)
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor',
|
| 9 |
+
'inference_segmentor', 'multi_gpu_test', 'single_gpu_test',
|
| 10 |
+
'show_result_pyplot', 'init_random_seed'
|
| 11 |
+
]
|
data_utils/easyportrait/mmseg/apis/inference.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import mmcv
|
| 4 |
+
import torch
|
| 5 |
+
from mmcv.parallel import collate, scatter
|
| 6 |
+
from mmcv.runner import load_checkpoint
|
| 7 |
+
|
| 8 |
+
from mmseg.datasets.pipelines import Compose
|
| 9 |
+
from mmseg.models import build_segmentor
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def init_segmentor(config, checkpoint=None, device='cuda:0'):
|
| 13 |
+
"""Initialize a segmentor from config file.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
config (str or :obj:`mmcv.Config`): Config file path or the config
|
| 17 |
+
object.
|
| 18 |
+
checkpoint (str, optional): Checkpoint path. If left as None, the model
|
| 19 |
+
will not load any weights.
|
| 20 |
+
device (str, optional) CPU/CUDA device option. Default 'cuda:0'.
|
| 21 |
+
Use 'cpu' for loading model on CPU.
|
| 22 |
+
Returns:
|
| 23 |
+
nn.Module: The constructed segmentor.
|
| 24 |
+
"""
|
| 25 |
+
if isinstance(config, str):
|
| 26 |
+
config = mmcv.Config.fromfile(config)
|
| 27 |
+
elif not isinstance(config, mmcv.Config):
|
| 28 |
+
raise TypeError('config must be a filename or Config object, '
|
| 29 |
+
'but got {}'.format(type(config)))
|
| 30 |
+
config.model.pretrained = None
|
| 31 |
+
config.model.train_cfg = None
|
| 32 |
+
model = build_segmentor(config.model, test_cfg=config.get('test_cfg'))
|
| 33 |
+
if checkpoint is not None:
|
| 34 |
+
checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
|
| 35 |
+
model.CLASSES = checkpoint['meta']['CLASSES']
|
| 36 |
+
model.PALETTE = checkpoint['meta']['PALETTE']
|
| 37 |
+
model.cfg = config # save the config in the model for convenience
|
| 38 |
+
model.to(device)
|
| 39 |
+
model.eval()
|
| 40 |
+
return model
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class LoadImage:
|
| 44 |
+
"""A simple pipeline to load image."""
|
| 45 |
+
|
| 46 |
+
def __call__(self, results):
|
| 47 |
+
"""Call function to load images into results.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
results (dict): A result dict contains the file name
|
| 51 |
+
of the image to be read.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
dict: ``results`` will be returned containing loaded image.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
if isinstance(results['img'], str):
|
| 58 |
+
results['filename'] = results['img']
|
| 59 |
+
results['ori_filename'] = results['img']
|
| 60 |
+
else:
|
| 61 |
+
results['filename'] = None
|
| 62 |
+
results['ori_filename'] = None
|
| 63 |
+
img = mmcv.imread(results['img'])
|
| 64 |
+
results['img'] = img
|
| 65 |
+
results['img_shape'] = img.shape
|
| 66 |
+
results['ori_shape'] = img.shape
|
| 67 |
+
return results
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def inference_segmentor(model, imgs):
|
| 71 |
+
"""Inference image(s) with the segmentor.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
model (nn.Module): The loaded segmentor.
|
| 75 |
+
imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
|
| 76 |
+
images.
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
(list[Tensor]): The segmentation result.
|
| 80 |
+
"""
|
| 81 |
+
cfg = model.cfg
|
| 82 |
+
device = next(model.parameters()).device # model device
|
| 83 |
+
# build the data pipeline
|
| 84 |
+
test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
|
| 85 |
+
test_pipeline = Compose(test_pipeline)
|
| 86 |
+
# prepare data
|
| 87 |
+
data = []
|
| 88 |
+
imgs = imgs if isinstance(imgs, list) else [imgs]
|
| 89 |
+
for img in imgs:
|
| 90 |
+
img_data = dict(img=img)
|
| 91 |
+
img_data = test_pipeline(img_data)
|
| 92 |
+
data.append(img_data)
|
| 93 |
+
data = collate(data, samples_per_gpu=len(imgs))
|
| 94 |
+
if next(model.parameters()).is_cuda:
|
| 95 |
+
# scatter to specified GPU
|
| 96 |
+
data = scatter(data, [device])[0]
|
| 97 |
+
else:
|
| 98 |
+
data['img_metas'] = [i.data[0] for i in data['img_metas']]
|
| 99 |
+
|
| 100 |
+
# forward the model
|
| 101 |
+
with torch.no_grad():
|
| 102 |
+
result = model(return_loss=False, rescale=True, **data)
|
| 103 |
+
return result
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def show_result_pyplot(model,
|
| 107 |
+
img,
|
| 108 |
+
result,
|
| 109 |
+
palette=None,
|
| 110 |
+
fig_size=(15, 10),
|
| 111 |
+
opacity=0.5,
|
| 112 |
+
title='',
|
| 113 |
+
block=True,
|
| 114 |
+
out_file=None):
|
| 115 |
+
"""Visualize the segmentation results on the image.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
model (nn.Module): The loaded segmentor.
|
| 119 |
+
img (str or np.ndarray): Image filename or loaded image.
|
| 120 |
+
result (list): The segmentation result.
|
| 121 |
+
palette (list[list[int]]] | None): The palette of segmentation
|
| 122 |
+
map. If None is given, random palette will be generated.
|
| 123 |
+
Default: None
|
| 124 |
+
fig_size (tuple): Figure size of the pyplot figure.
|
| 125 |
+
opacity(float): Opacity of painted segmentation map.
|
| 126 |
+
Default 0.5.
|
| 127 |
+
Must be in (0, 1] range.
|
| 128 |
+
title (str): The title of pyplot figure.
|
| 129 |
+
Default is ''.
|
| 130 |
+
block (bool): Whether to block the pyplot figure.
|
| 131 |
+
Default is True.
|
| 132 |
+
out_file (str or None): The path to write the image.
|
| 133 |
+
Default: None.
|
| 134 |
+
"""
|
| 135 |
+
if hasattr(model, 'module'):
|
| 136 |
+
model = model.module
|
| 137 |
+
img = model.show_result(
|
| 138 |
+
img, result, palette=palette, show=False, opacity=opacity)
|
| 139 |
+
plt.figure(figsize=fig_size)
|
| 140 |
+
plt.imshow(mmcv.bgr2rgb(img))
|
| 141 |
+
plt.title(title)
|
| 142 |
+
plt.tight_layout()
|
| 143 |
+
plt.show(block=block)
|
| 144 |
+
if out_file is not None:
|
| 145 |
+
mmcv.imwrite(img, out_file)
|
data_utils/easyportrait/mmseg/apis/test.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import os.path as osp
|
| 3 |
+
import tempfile
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
import mmcv
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
from mmcv.engine import collect_results_cpu, collect_results_gpu
|
| 10 |
+
from mmcv.image import tensor2imgs
|
| 11 |
+
from mmcv.runner import get_dist_info
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def np2tmp(array, temp_file_name=None, tmpdir=None):
|
| 15 |
+
"""Save ndarray to local numpy file.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
array (ndarray): Ndarray to save.
|
| 19 |
+
temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
|
| 20 |
+
function will generate a file name with tempfile.NamedTemporaryFile
|
| 21 |
+
to save ndarray. Default: None.
|
| 22 |
+
tmpdir (str): Temporary directory to save Ndarray files. Default: None.
|
| 23 |
+
Returns:
|
| 24 |
+
str: The numpy file name.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
if temp_file_name is None:
|
| 28 |
+
temp_file_name = tempfile.NamedTemporaryFile(
|
| 29 |
+
suffix='.npy', delete=False, dir=tmpdir).name
|
| 30 |
+
np.save(temp_file_name, array)
|
| 31 |
+
return temp_file_name
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def single_gpu_test(model,
|
| 35 |
+
data_loader,
|
| 36 |
+
show=False,
|
| 37 |
+
out_dir=None,
|
| 38 |
+
efficient_test=False,
|
| 39 |
+
opacity=0.5,
|
| 40 |
+
pre_eval=False,
|
| 41 |
+
format_only=False,
|
| 42 |
+
format_args={}):
|
| 43 |
+
"""Test with single GPU by progressive mode.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
model (nn.Module): Model to be tested.
|
| 47 |
+
data_loader (utils.data.Dataloader): Pytorch data loader.
|
| 48 |
+
show (bool): Whether show results during inference. Default: False.
|
| 49 |
+
out_dir (str, optional): If specified, the results will be dumped into
|
| 50 |
+
the directory to save output results.
|
| 51 |
+
efficient_test (bool): Whether save the results as local numpy files to
|
| 52 |
+
save CPU memory during evaluation. Mutually exclusive with
|
| 53 |
+
pre_eval and format_results. Default: False.
|
| 54 |
+
opacity(float): Opacity of painted segmentation map.
|
| 55 |
+
Default 0.5.
|
| 56 |
+
Must be in (0, 1] range.
|
| 57 |
+
pre_eval (bool): Use dataset.pre_eval() function to generate
|
| 58 |
+
pre_results for metric evaluation. Mutually exclusive with
|
| 59 |
+
efficient_test and format_results. Default: False.
|
| 60 |
+
format_only (bool): Only format result for results commit.
|
| 61 |
+
Mutually exclusive with pre_eval and efficient_test.
|
| 62 |
+
Default: False.
|
| 63 |
+
format_args (dict): The args for format_results. Default: {}.
|
| 64 |
+
Returns:
|
| 65 |
+
list: list of evaluation pre-results or list of save file names.
|
| 66 |
+
"""
|
| 67 |
+
if efficient_test:
|
| 68 |
+
warnings.warn(
|
| 69 |
+
'DeprecationWarning: ``efficient_test`` will be deprecated, the '
|
| 70 |
+
'evaluation is CPU memory friendly with pre_eval=True')
|
| 71 |
+
mmcv.mkdir_or_exist('.efficient_test')
|
| 72 |
+
# when none of them is set true, return segmentation results as
|
| 73 |
+
# a list of np.array.
|
| 74 |
+
assert [efficient_test, pre_eval, format_only].count(True) <= 1, \
|
| 75 |
+
'``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \
|
| 76 |
+
'exclusive, only one of them could be true .'
|
| 77 |
+
|
| 78 |
+
model.eval()
|
| 79 |
+
results = []
|
| 80 |
+
dataset = data_loader.dataset
|
| 81 |
+
prog_bar = mmcv.ProgressBar(len(dataset))
|
| 82 |
+
# The pipeline about how the data_loader retrieval samples from dataset:
|
| 83 |
+
# sampler -> batch_sampler -> indices
|
| 84 |
+
# The indices are passed to dataset_fetcher to get data from dataset.
|
| 85 |
+
# data_fetcher -> collate_fn(dataset[index]) -> data_sample
|
| 86 |
+
# we use batch_sampler to get correct data idx
|
| 87 |
+
loader_indices = data_loader.batch_sampler
|
| 88 |
+
|
| 89 |
+
for batch_indices, data in zip(loader_indices, data_loader):
|
| 90 |
+
with torch.no_grad():
|
| 91 |
+
result = model(return_loss=False, **data)
|
| 92 |
+
|
| 93 |
+
if show or out_dir:
|
| 94 |
+
img_tensor = data['img'][0]
|
| 95 |
+
img_metas = data['img_metas'][0].data[0]
|
| 96 |
+
imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
|
| 97 |
+
assert len(imgs) == len(img_metas)
|
| 98 |
+
|
| 99 |
+
for img, img_meta in zip(imgs, img_metas):
|
| 100 |
+
h, w, _ = img_meta['img_shape']
|
| 101 |
+
img_show = img[:h, :w, :]
|
| 102 |
+
|
| 103 |
+
ori_h, ori_w = img_meta['ori_shape'][:-1]
|
| 104 |
+
img_show = mmcv.imresize(img_show, (ori_w, ori_h))
|
| 105 |
+
|
| 106 |
+
if out_dir:
|
| 107 |
+
out_file = osp.join(out_dir, img_meta['ori_filename'])
|
| 108 |
+
else:
|
| 109 |
+
out_file = None
|
| 110 |
+
|
| 111 |
+
model.module.show_result(
|
| 112 |
+
img_show,
|
| 113 |
+
result,
|
| 114 |
+
palette=dataset.PALETTE,
|
| 115 |
+
show=show,
|
| 116 |
+
out_file=out_file,
|
| 117 |
+
opacity=opacity)
|
| 118 |
+
|
| 119 |
+
if efficient_test:
|
| 120 |
+
result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
|
| 121 |
+
|
| 122 |
+
if format_only:
|
| 123 |
+
result = dataset.format_results(
|
| 124 |
+
result, indices=batch_indices, **format_args)
|
| 125 |
+
if pre_eval:
|
| 126 |
+
# TODO: adapt samples_per_gpu > 1.
|
| 127 |
+
# only samples_per_gpu=1 valid now
|
| 128 |
+
result = dataset.pre_eval(result, indices=batch_indices)
|
| 129 |
+
results.extend(result)
|
| 130 |
+
else:
|
| 131 |
+
results.extend(result)
|
| 132 |
+
|
| 133 |
+
batch_size = len(result)
|
| 134 |
+
for _ in range(batch_size):
|
| 135 |
+
prog_bar.update()
|
| 136 |
+
|
| 137 |
+
return results
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def multi_gpu_test(model,
|
| 141 |
+
data_loader,
|
| 142 |
+
tmpdir=None,
|
| 143 |
+
gpu_collect=False,
|
| 144 |
+
efficient_test=False,
|
| 145 |
+
pre_eval=False,
|
| 146 |
+
format_only=False,
|
| 147 |
+
format_args={}):
|
| 148 |
+
"""Test model with multiple gpus by progressive mode.
|
| 149 |
+
|
| 150 |
+
This method tests model with multiple gpus and collects the results
|
| 151 |
+
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
|
| 152 |
+
it encodes results to gpu tensors and use gpu communication for results
|
| 153 |
+
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
|
| 154 |
+
and collects them by the rank 0 worker.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
model (nn.Module): Model to be tested.
|
| 158 |
+
data_loader (utils.data.Dataloader): Pytorch data loader.
|
| 159 |
+
tmpdir (str): Path of directory to save the temporary results from
|
| 160 |
+
different gpus under cpu mode. The same path is used for efficient
|
| 161 |
+
test. Default: None.
|
| 162 |
+
gpu_collect (bool): Option to use either gpu or cpu to collect results.
|
| 163 |
+
Default: False.
|
| 164 |
+
efficient_test (bool): Whether save the results as local numpy files to
|
| 165 |
+
save CPU memory during evaluation. Mutually exclusive with
|
| 166 |
+
pre_eval and format_results. Default: False.
|
| 167 |
+
pre_eval (bool): Use dataset.pre_eval() function to generate
|
| 168 |
+
pre_results for metric evaluation. Mutually exclusive with
|
| 169 |
+
efficient_test and format_results. Default: False.
|
| 170 |
+
format_only (bool): Only format result for results commit.
|
| 171 |
+
Mutually exclusive with pre_eval and efficient_test.
|
| 172 |
+
Default: False.
|
| 173 |
+
format_args (dict): The args for format_results. Default: {}.
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
list: list of evaluation pre-results or list of save file names.
|
| 177 |
+
"""
|
| 178 |
+
if efficient_test:
|
| 179 |
+
warnings.warn(
|
| 180 |
+
'DeprecationWarning: ``efficient_test`` will be deprecated, the '
|
| 181 |
+
'evaluation is CPU memory friendly with pre_eval=True')
|
| 182 |
+
mmcv.mkdir_or_exist('.efficient_test')
|
| 183 |
+
# when none of them is set true, return segmentation results as
|
| 184 |
+
# a list of np.array.
|
| 185 |
+
assert [efficient_test, pre_eval, format_only].count(True) <= 1, \
|
| 186 |
+
'``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \
|
| 187 |
+
'exclusive, only one of them could be true .'
|
| 188 |
+
|
| 189 |
+
model.eval()
|
| 190 |
+
results = []
|
| 191 |
+
dataset = data_loader.dataset
|
| 192 |
+
# The pipeline about how the data_loader retrieval samples from dataset:
|
| 193 |
+
# sampler -> batch_sampler -> indices
|
| 194 |
+
# The indices are passed to dataset_fetcher to get data from dataset.
|
| 195 |
+
# data_fetcher -> collate_fn(dataset[index]) -> data_sample
|
| 196 |
+
# we use batch_sampler to get correct data idx
|
| 197 |
+
|
| 198 |
+
# batch_sampler based on DistributedSampler, the indices only point to data
|
| 199 |
+
# samples of related machine.
|
| 200 |
+
loader_indices = data_loader.batch_sampler
|
| 201 |
+
|
| 202 |
+
rank, world_size = get_dist_info()
|
| 203 |
+
if rank == 0:
|
| 204 |
+
prog_bar = mmcv.ProgressBar(len(dataset))
|
| 205 |
+
|
| 206 |
+
for batch_indices, data in zip(loader_indices, data_loader):
|
| 207 |
+
with torch.no_grad():
|
| 208 |
+
result = model(return_loss=False, rescale=True, **data)
|
| 209 |
+
|
| 210 |
+
if efficient_test:
|
| 211 |
+
result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
|
| 212 |
+
|
| 213 |
+
if format_only:
|
| 214 |
+
result = dataset.format_results(
|
| 215 |
+
result, indices=batch_indices, **format_args)
|
| 216 |
+
if pre_eval:
|
| 217 |
+
# TODO: adapt samples_per_gpu > 1.
|
| 218 |
+
# only samples_per_gpu=1 valid now
|
| 219 |
+
result = dataset.pre_eval(result, indices=batch_indices)
|
| 220 |
+
|
| 221 |
+
results.extend(result)
|
| 222 |
+
|
| 223 |
+
if rank == 0:
|
| 224 |
+
batch_size = len(result) * world_size
|
| 225 |
+
for _ in range(batch_size):
|
| 226 |
+
prog_bar.update()
|
| 227 |
+
|
| 228 |
+
# collect results from all ranks
|
| 229 |
+
if gpu_collect:
|
| 230 |
+
results = collect_results_gpu(results, len(dataset))
|
| 231 |
+
else:
|
| 232 |
+
results = collect_results_cpu(results, len(dataset), tmpdir)
|
| 233 |
+
return results
|
data_utils/easyportrait/mmseg/apis/train.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
import mmcv
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
import torch.distributed as dist
|
| 10 |
+
from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
|
| 11 |
+
build_runner, get_dist_info)
|
| 12 |
+
from mmcv.utils import build_from_cfg
|
| 13 |
+
|
| 14 |
+
from mmseg import digit_version
|
| 15 |
+
from mmseg.core import DistEvalHook, EvalHook, build_optimizer
|
| 16 |
+
from mmseg.datasets import build_dataloader, build_dataset
|
| 17 |
+
from mmseg.utils import (build_ddp, build_dp, find_latest_checkpoint,
|
| 18 |
+
get_root_logger)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def init_random_seed(seed=None, device='cuda'):
|
| 22 |
+
"""Initialize random seed.
|
| 23 |
+
|
| 24 |
+
If the seed is not set, the seed will be automatically randomized,
|
| 25 |
+
and then broadcast to all processes to prevent some potential bugs.
|
| 26 |
+
Args:
|
| 27 |
+
seed (int, Optional): The seed. Default to None.
|
| 28 |
+
device (str): The device where the seed will be put on.
|
| 29 |
+
Default to 'cuda'.
|
| 30 |
+
Returns:
|
| 31 |
+
int: Seed to be used.
|
| 32 |
+
"""
|
| 33 |
+
if seed is not None:
|
| 34 |
+
return seed
|
| 35 |
+
|
| 36 |
+
# Make sure all ranks share the same random seed to prevent
|
| 37 |
+
# some potential bugs. Please refer to
|
| 38 |
+
# https://github.com/open-mmlab/mmdetection/issues/6339
|
| 39 |
+
rank, world_size = get_dist_info()
|
| 40 |
+
seed = np.random.randint(2**31)
|
| 41 |
+
if world_size == 1:
|
| 42 |
+
return seed
|
| 43 |
+
|
| 44 |
+
if rank == 0:
|
| 45 |
+
random_num = torch.tensor(seed, dtype=torch.int32, device=device)
|
| 46 |
+
else:
|
| 47 |
+
random_num = torch.tensor(0, dtype=torch.int32, device=device)
|
| 48 |
+
dist.broadcast(random_num, src=0)
|
| 49 |
+
return random_num.item()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def set_random_seed(seed, deterministic=False):
|
| 53 |
+
"""Set random seed.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
seed (int): Seed to be used.
|
| 57 |
+
deterministic (bool): Whether to set the deterministic option for
|
| 58 |
+
CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
|
| 59 |
+
to True and `torch.backends.cudnn.benchmark` to False.
|
| 60 |
+
Default: False.
|
| 61 |
+
"""
|
| 62 |
+
random.seed(seed)
|
| 63 |
+
np.random.seed(seed)
|
| 64 |
+
torch.manual_seed(seed)
|
| 65 |
+
torch.cuda.manual_seed_all(seed)
|
| 66 |
+
if deterministic:
|
| 67 |
+
torch.backends.cudnn.deterministic = True
|
| 68 |
+
torch.backends.cudnn.benchmark = False
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def train_segmentor(model,
|
| 72 |
+
dataset,
|
| 73 |
+
cfg,
|
| 74 |
+
distributed=False,
|
| 75 |
+
validate=False,
|
| 76 |
+
timestamp=None,
|
| 77 |
+
meta=None):
|
| 78 |
+
"""Launch segmentor training."""
|
| 79 |
+
logger = get_root_logger(cfg.log_level)
|
| 80 |
+
|
| 81 |
+
# prepare data loaders
|
| 82 |
+
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
|
| 83 |
+
# The default loader config
|
| 84 |
+
loader_cfg = dict(
|
| 85 |
+
# cfg.gpus will be ignored if distributed
|
| 86 |
+
num_gpus=len(cfg.gpu_ids),
|
| 87 |
+
dist=distributed,
|
| 88 |
+
seed=cfg.seed,
|
| 89 |
+
drop_last=True)
|
| 90 |
+
# The overall dataloader settings
|
| 91 |
+
loader_cfg.update({
|
| 92 |
+
k: v
|
| 93 |
+
for k, v in cfg.data.items() if k not in [
|
| 94 |
+
'train', 'val', 'test', 'train_dataloader', 'val_dataloader',
|
| 95 |
+
'test_dataloader'
|
| 96 |
+
]
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
# The specific dataloader settings
|
| 100 |
+
train_loader_cfg = {**loader_cfg, **cfg.data.get('train_dataloader', {})}
|
| 101 |
+
data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]
|
| 102 |
+
|
| 103 |
+
# put model on devices
|
| 104 |
+
if distributed:
|
| 105 |
+
find_unused_parameters = cfg.get('find_unused_parameters', False)
|
| 106 |
+
# Sets the `find_unused_parameters` parameter in
|
| 107 |
+
# DDP wrapper
|
| 108 |
+
model = build_ddp(
|
| 109 |
+
model,
|
| 110 |
+
cfg.device,
|
| 111 |
+
device_ids=[int(os.environ['LOCAL_RANK'])],
|
| 112 |
+
broadcast_buffers=False,
|
| 113 |
+
find_unused_parameters=find_unused_parameters)
|
| 114 |
+
else:
|
| 115 |
+
if not torch.cuda.is_available():
|
| 116 |
+
assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \
|
| 117 |
+
'Please use MMCV >= 1.4.4 for CPU training!'
|
| 118 |
+
model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
|
| 119 |
+
|
| 120 |
+
# build runner
|
| 121 |
+
optimizer = build_optimizer(model, cfg.optimizer)
|
| 122 |
+
|
| 123 |
+
if cfg.get('runner') is None:
|
| 124 |
+
cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters}
|
| 125 |
+
warnings.warn(
|
| 126 |
+
'config is now expected to have a `runner` section, '
|
| 127 |
+
'please set `runner` in your config.', UserWarning)
|
| 128 |
+
|
| 129 |
+
runner = build_runner(
|
| 130 |
+
cfg.runner,
|
| 131 |
+
default_args=dict(
|
| 132 |
+
model=model,
|
| 133 |
+
batch_processor=None,
|
| 134 |
+
optimizer=optimizer,
|
| 135 |
+
work_dir=cfg.work_dir,
|
| 136 |
+
logger=logger,
|
| 137 |
+
meta=meta))
|
| 138 |
+
|
| 139 |
+
# register hooks
|
| 140 |
+
runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
|
| 141 |
+
cfg.checkpoint_config, cfg.log_config,
|
| 142 |
+
cfg.get('momentum_config', None))
|
| 143 |
+
if distributed:
|
| 144 |
+
# when distributed training by epoch, using`DistSamplerSeedHook` to set
|
| 145 |
+
# the different seed to distributed sampler for each epoch, it will
|
| 146 |
+
# shuffle dataset at each epoch and avoid overfitting.
|
| 147 |
+
if isinstance(runner, EpochBasedRunner):
|
| 148 |
+
runner.register_hook(DistSamplerSeedHook())
|
| 149 |
+
|
| 150 |
+
# an ugly walkaround to make the .log and .log.json filenames the same
|
| 151 |
+
runner.timestamp = timestamp
|
| 152 |
+
|
| 153 |
+
# register eval hooks
|
| 154 |
+
if validate:
|
| 155 |
+
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
|
| 156 |
+
# The specific dataloader settings
|
| 157 |
+
val_loader_cfg = {
|
| 158 |
+
**loader_cfg,
|
| 159 |
+
'samples_per_gpu': 1,
|
| 160 |
+
'shuffle': False, # Not shuffle by default
|
| 161 |
+
**cfg.data.get('val_dataloader', {}),
|
| 162 |
+
}
|
| 163 |
+
val_dataloader = build_dataloader(val_dataset, **val_loader_cfg)
|
| 164 |
+
eval_cfg = cfg.get('evaluation', {})
|
| 165 |
+
eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
|
| 166 |
+
eval_hook = DistEvalHook if distributed else EvalHook
|
| 167 |
+
# In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
|
| 168 |
+
# priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
|
| 169 |
+
runner.register_hook(
|
| 170 |
+
eval_hook(val_dataloader, **eval_cfg), priority='LOW')
|
| 171 |
+
|
| 172 |
+
# user-defined hooks
|
| 173 |
+
if cfg.get('custom_hooks', None):
|
| 174 |
+
custom_hooks = cfg.custom_hooks
|
| 175 |
+
assert isinstance(custom_hooks, list), \
|
| 176 |
+
f'custom_hooks expect list type, but got {type(custom_hooks)}'
|
| 177 |
+
for hook_cfg in cfg.custom_hooks:
|
| 178 |
+
assert isinstance(hook_cfg, dict), \
|
| 179 |
+
'Each item in custom_hooks expects dict type, but got ' \
|
| 180 |
+
f'{type(hook_cfg)}'
|
| 181 |
+
hook_cfg = hook_cfg.copy()
|
| 182 |
+
priority = hook_cfg.pop('priority', 'NORMAL')
|
| 183 |
+
hook = build_from_cfg(hook_cfg, HOOKS)
|
| 184 |
+
runner.register_hook(hook, priority=priority)
|
| 185 |
+
|
| 186 |
+
if cfg.resume_from is None and cfg.get('auto_resume'):
|
| 187 |
+
resume_from = find_latest_checkpoint(cfg.work_dir)
|
| 188 |
+
if resume_from is not None:
|
| 189 |
+
cfg.resume_from = resume_from
|
| 190 |
+
if cfg.resume_from:
|
| 191 |
+
runner.resume(cfg.resume_from)
|
| 192 |
+
elif cfg.load_from:
|
| 193 |
+
runner.load_checkpoint(cfg.load_from)
|
| 194 |
+
runner.run(data_loaders, cfg.workflow)
|
data_utils/easyportrait/mmseg/core/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from .builder import (OPTIMIZER_BUILDERS, build_optimizer,
|
| 3 |
+
build_optimizer_constructor)
|
| 4 |
+
from .evaluation import * # noqa: F401, F403
|
| 5 |
+
from .hook import * # noqa: F401, F403
|
| 6 |
+
from .optimizers import * # noqa: F401, F403
|
| 7 |
+
from .seg import * # noqa: F401, F403
|
| 8 |
+
from .utils import * # noqa: F401, F403
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor'
|
| 12 |
+
]
|
data_utils/easyportrait/mmseg/core/builder.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import copy
|
| 3 |
+
|
| 4 |
+
from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS
|
| 5 |
+
from mmcv.utils import Registry, build_from_cfg
|
| 6 |
+
|
| 7 |
+
OPTIMIZER_BUILDERS = Registry(
|
| 8 |
+
'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def build_optimizer_constructor(cfg):
|
| 12 |
+
constructor_type = cfg.get('type')
|
| 13 |
+
if constructor_type in OPTIMIZER_BUILDERS:
|
| 14 |
+
return build_from_cfg(cfg, OPTIMIZER_BUILDERS)
|
| 15 |
+
elif constructor_type in MMCV_OPTIMIZER_BUILDERS:
|
| 16 |
+
return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS)
|
| 17 |
+
else:
|
| 18 |
+
raise KeyError(f'{constructor_type} is not registered '
|
| 19 |
+
'in the optimizer builder registry.')
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def build_optimizer(model, cfg):
|
| 23 |
+
optimizer_cfg = copy.deepcopy(cfg)
|
| 24 |
+
constructor_type = optimizer_cfg.pop('constructor',
|
| 25 |
+
'DefaultOptimizerConstructor')
|
| 26 |
+
paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
|
| 27 |
+
optim_constructor = build_optimizer_constructor(
|
| 28 |
+
dict(
|
| 29 |
+
type=constructor_type,
|
| 30 |
+
optimizer_cfg=optimizer_cfg,
|
| 31 |
+
paramwise_cfg=paramwise_cfg))
|
| 32 |
+
optimizer = optim_constructor(model)
|
| 33 |
+
return optimizer
|