SAEHD: add 'Blur out mask' and 'Denoise DST faceset' options.

This commit is contained in:
iperov 2021-08-20 17:07:44 +04:00
parent e41f87e682
commit 8e63666390

View File

@ -29,7 +29,7 @@ class SAEHDModel(ModelBase):
yn_str = {True:'y',False:'n'}
min_res = 64
max_res = 640
#default_usefp16 = self.options['use_fp16'] = self.load_or_def_option('use_fp16', False)
default_resolution = self.options['resolution'] = self.load_or_def_option('resolution', 128)
default_face_type = self.options['face_type'] = self.load_or_def_option('face_type', 'f')
@ -46,6 +46,8 @@ class SAEHDModel(ModelBase):
default_masked_training = self.options['masked_training'] = self.load_or_def_option('masked_training', True)
default_eyes_mouth_prio = self.options['eyes_mouth_prio'] = self.load_or_def_option('eyes_mouth_prio', False)
default_uniform_yaw = self.options['uniform_yaw'] = self.load_or_def_option('uniform_yaw', False)
default_blur_out_mask = self.options['blur_out_mask'] = self.load_or_def_option('blur_out_mask', False)
default_dst_denoise = self.options['dst_denoise'] = self.load_or_def_option('dst_denoise', False)
default_adabelief = self.options['adabelief'] = self.load_or_def_option('adabelief', True)
@ -70,14 +72,14 @@ class SAEHDModel(ModelBase):
self.ask_random_dst_flip()
self.ask_batch_size(suggest_batch_size)
#self.options['use_fp16'] = io.input_bool ("Use fp16", default_usefp16, help_message='Increases training/inference speed, reduces model size. Model may crash. Enable it after 1-5k iters.')
if self.is_first_run():
resolution = io.input_int("Resolution", default_resolution, add_info="64-640", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16 and 32 for -d archi.")
resolution = np.clip ( (resolution // 16) * 16, min_res, max_res)
self.options['resolution'] = resolution
self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf','head'], help_message="Half / mid face / full face / whole face / head. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead. 'head' covers full head, but requires XSeg for src and dst faceset.").lower()
while True:
@ -138,11 +140,13 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
self.options['eyes_mouth_prio'] = io.input_bool ("Eyes and mouth priority", default_eyes_mouth_prio, help_message='Helps to fix eye problems during training like "alien eyes" and wrong eyes direction. Also makes the detail of the teeth higher.')
self.options['uniform_yaw'] = io.input_bool ("Uniform yaw distribution of samples", default_uniform_yaw, help_message='Helps to fix blurry side faces due to small amount of them in the faceset.')
self.options['blur_out_mask'] = io.input_bool ("Blur out mask", default_blur_out_mask, help_message='Blurs nearby area outside of applied face mask of training samples. The result is the background near the face is smoothed and less noticeable on swapped face. The exact xseg mask in src and dst faceset is required.')
self.options['dst_denoise'] = io.input_bool ("Denoise DST faceset.", default_dst_denoise, help_message='Used in RTM(ReadyToMerge) training with RTM DST faceset. Removes high frequency noise keeping edges. Result is better face syncronization with any face. Can be enabled at any time.')
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
default_gan_patch_size = self.options['gan_patch_size'] = self.load_or_def_option('gan_patch_size', self.options['resolution'] // 8)
default_gan_dims = self.options['gan_dims'] = self.load_or_def_option('gan_dims', 16)
if self.is_first_run() or ask_override:
self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
@ -153,14 +157,14 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 5.0", help_message="Forces the neural network to learn small details of the face. Enable it only when the face is trained enough with lr_dropout(on) and random_warp(off), and don't disable. The higher the value, the higher the chances of artifacts. Typical fine value is 0.1"), 0.0, 5.0 )
if self.options['gan_power'] != 0.0:
if self.options['gan_power'] != 0.0:
gan_patch_size = np.clip ( io.input_int("GAN patch size", default_gan_patch_size, add_info="3-640", help_message="The higher patch size, the higher the quality, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is resolution / 8." ), 3, 640 )
self.options['gan_patch_size'] = gan_patch_size
gan_dims = np.clip ( io.input_int("GAN dimensions", default_gan_dims, add_info="4-512", help_message="The dimensions of the GAN network. The higher dimensions, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is 16." ), 4, 512 )
self.options['gan_dims'] = gan_dims
if 'df' in self.options['archi']:
self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
else:
@ -176,7 +180,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if self.options['pretrain'] and self.get_pretraining_data_path() is None:
raise Exception("pretraining_data_path is not defined")
self.gan_model_changed = (default_gan_patch_size != self.options['gan_patch_size']) or (default_gan_dims != self.options['gan_dims'])
self.pretrain_just_disabled = (default_pretrain == True and self.options['pretrain'] == False)
@ -198,7 +202,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if 'eyes_prio' in self.options:
self.options.pop('eyes_prio')
eyes_mouth_prio = self.options['eyes_mouth_prio']
archi_split = self.options['archi'].split('-')
@ -207,7 +211,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
archi_type, archi_opts = archi_split
elif len(archi_split) == 1:
archi_type, archi_opts = archi_split[0], None
self.archi_type = archi_type
ae_dims = self.options['ae_dims']
@ -219,16 +223,18 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
self.set_iter(0)
adabelief = self.options['adabelief']
use_fp16 = False
if self.is_exporting:
use_fp16 = io.input_bool ("Export quantized?", False, help_message='Makes the exported model faster. If you have problems, disable this option.')
self.gan_power = gan_power = 0.0 if self.pretrain else self.options['gan_power']
random_warp = False if self.pretrain else self.options['random_warp']
random_src_flip = self.random_src_flip if not self.pretrain else True
random_dst_flip = self.random_dst_flip if not self.pretrain else True
blur_out_mask = self.options['blur_out_mask']
dst_denoise = self.options['dst_denoise']
if self.pretrain:
self.options_show_override['gan_power'] = 0.0
self.options_show_override['random_warp'] = False
@ -241,8 +247,8 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
ct_mode = self.options['ct_mode']
if ct_mode == 'none':
ct_mode = None
models_opt_on_gpu = False if len(devices) == 0 else self.options['models_opt_on_gpu']
models_opt_device = nn.tf_default_device_name if models_opt_on_gpu and self.is_training else '/CPU:0'
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
@ -356,7 +362,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_G_loss_gvs = []
gpu_D_code_loss_gvs = []
gpu_D_src_dst_loss_gvs = []
for gpu_id in range(gpu_count):
with tf.device( f'/{devices[gpu_id].tf_dev_type}:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
with tf.device(f'/CPU:0'):
@ -371,6 +377,22 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_target_dstm = self.target_dstm[batch_slice,:,:,:]
gpu_target_dstm_em = self.target_dstm_em[batch_slice,:,:,:]
gpu_target_srcm_anti = 1-gpu_target_srcm
gpu_target_dstm_anti = 1-gpu_target_dstm
if blur_out_mask:
#gpu_target_src = gpu_target_src*gpu_target_srcm_blur + nn.gaussian_blur(gpu_target_src, resolution // 32)*gpu_target_srcm_anti_blur
#gpu_target_dst = gpu_target_dst*gpu_target_dstm_blur + nn.gaussian_blur(gpu_target_dst, resolution // 32)*gpu_target_dstm_anti_blur
bg_blur_div = 128
gpu_target_src = gpu_target_src*gpu_target_srcm + \
tf.math.divide_no_nan(nn.gaussian_blur(gpu_target_src*gpu_target_srcm_anti, resolution / bg_blur_div),
(1-nn.gaussian_blur(gpu_target_srcm, resolution / bg_blur_div) ) ) * gpu_target_srcm_anti
gpu_target_dst = gpu_target_dst*gpu_target_dstm + \
tf.math.divide_no_nan(nn.gaussian_blur(gpu_target_dst*gpu_target_dstm_anti, resolution / bg_blur_div),
(1-nn.gaussian_blur(gpu_target_dstm, resolution / bg_blur_div)) ) * gpu_target_dstm_anti
# process model tensors
if 'df' in archi_type:
gpu_src_code = self.inter(self.encoder(gpu_warped_src))
@ -408,7 +430,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_target_dstm_style_blur = gpu_target_dstm_blur #default style mask is 0.5 on boundary
gpu_target_dstm_blur = tf.clip_by_value(gpu_target_dstm_blur, 0, 0.5) * 2
gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur
gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur
gpu_target_dst_style_masked = gpu_target_dst*gpu_target_dstm_style_blur
gpu_target_dst_style_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_style_blur)
@ -503,14 +525,14 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_G_loss += gan_power*(DLoss(gpu_pred_src_src_d_ones, gpu_pred_src_src_d) + \
DLoss(gpu_pred_src_src_d2_ones, gpu_pred_src_src_d2))
if masked_training:
# Minimal src-src-bg rec with total_variation_mse to suppress random bright dots from gan
gpu_G_loss += 0.000001*nn.total_variation_mse(gpu_pred_src_src)
gpu_G_loss += 0.02*tf.reduce_mean(tf.square(gpu_pred_src_src_anti_masked-gpu_target_src_anti_masked),axis=[1,2,3] )
gpu_G_loss_gvs += [ nn.gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ]
@ -620,10 +642,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if do_init:
model.init_weights()
###############
# initializing sample generators
if self.is_training:
training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
@ -641,7 +663,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
sample_process_options=SampleProcessor.Options(random_flip=random_src_flip),
output_sample_types = [ {'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':random_warp, 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'ct_mode': ct_mode, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'ct_mode': ct_mode, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'ct_mode': ct_mode, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_MASK, 'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.G, 'face_mask_type' : SampleProcessor.FaceMaskType.FULL_FACE, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_MASK, 'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.G, 'face_mask_type' : SampleProcessor.FaceMaskType.EYES_MOUTH, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
],
@ -651,7 +673,8 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
sample_process_options=SampleProcessor.Options(random_flip=random_dst_flip),
output_sample_types = [ {'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':random_warp, 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_IMAGE,'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.BGR, 'denoise_filter' : dst_denoise, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_MASK, 'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.G, 'face_mask_type' : SampleProcessor.FaceMaskType.FULL_FACE, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
{'sample_type': SampleProcessor.SampleType.FACE_MASK, 'warp':False , 'transform':True, 'channel_type' : SampleProcessor.ChannelType.G, 'face_mask_type' : SampleProcessor.FaceMaskType.EYES_MOUTH, 'face_type':self.face_type, 'data_format':nn.data_format, 'resolution': resolution},
],
@ -664,20 +687,20 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if self.pretrain_just_disabled:
self.update_sample_for_preview(force_new=True)
def export_dfm (self):
output_path=self.get_strpath_storage_for_file('model.dfm')
io.log_info(f'Dumping .dfm to {output_path}')
tf = nn.tf
nn.set_data_format('NCHW')
with tf.device (nn.tf_default_device_name):
warped_dst = tf.placeholder (nn.floatx, (None, self.resolution, self.resolution, 3), name='in_face')
warped_dst = tf.transpose(warped_dst, (0,3,1,2))
if 'df' in self.archi_type:
gpu_dst_code = self.inter(self.encoder(warped_dst))
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
@ -692,21 +715,21 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder(gpu_src_dst_code)
_, gpu_pred_dst_dstm = self.decoder(gpu_dst_code)
gpu_pred_src_dst = tf.transpose(gpu_pred_src_dst, (0,2,3,1))
gpu_pred_dst_dstm = tf.transpose(gpu_pred_dst_dstm, (0,2,3,1))
gpu_pred_src_dstm = tf.transpose(gpu_pred_src_dstm, (0,2,3,1))
tf.identity(gpu_pred_dst_dstm, name='out_face_mask')
tf.identity(gpu_pred_src_dst, name='out_celeb_face')
tf.identity(gpu_pred_src_dstm, name='out_celeb_face_mask')
tf.identity(gpu_pred_src_dstm, name='out_celeb_face_mask')
output_graph_def = tf.graph_util.convert_variables_to_constants(
nn.tf_sess,
tf.get_default_graph().as_graph_def(),
nn.tf_sess,
tf.get_default_graph().as_graph_def(),
['out_face_mask','out_celeb_face','out_celeb_face_mask']
)
)
import tf2onnx
with tf.device("/CPU:0"):
model_proto, _ = tf2onnx.convert._convert_common(
@ -716,7 +739,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
output_names=['out_face_mask:0','out_celeb_face:0','out_celeb_face_mask:0'],
opset=13,
output_path=output_path)
#override
def get_model_filename_list(self):
return self.model_filename_list
@ -739,27 +762,28 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
bs = self.get_batch_size()
( (warped_src, target_src, target_srcm, target_srcm_em), \
(warped_dst, target_dst, target_dstm, target_dstm_em) ) = self.generate_next_samples()
(warped_dst, target_dst, target_dst_train, target_dstm, target_dstm_em) ) = self.generate_next_samples()
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, target_srcm_em, warped_dst, target_dst, target_dstm, target_dstm_em)
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, target_srcm_em, warped_dst, target_dst_train, target_dstm, target_dstm_em)
for i in range(bs):
self.last_src_samples_loss.append ( (target_src[i], target_srcm[i], target_srcm_em[i], src_loss[i] ) )
self.last_dst_samples_loss.append ( (target_dst[i], target_dstm[i], target_dstm_em[i], dst_loss[i] ) )
self.last_src_samples_loss.append ( (src_loss[i], target_src[i], target_srcm[i], target_srcm_em[i],) )
self.last_dst_samples_loss.append ( (dst_loss[i], target_dst[i], target_dst_train[i], target_dstm[i], target_dstm_em[i],) )
if len(self.last_src_samples_loss) >= bs*16:
src_samples_loss = sorted(self.last_src_samples_loss, key=operator.itemgetter(3), reverse=True)
dst_samples_loss = sorted(self.last_dst_samples_loss, key=operator.itemgetter(3), reverse=True)
src_samples_loss = sorted(self.last_src_samples_loss, key=operator.itemgetter(0), reverse=True)
dst_samples_loss = sorted(self.last_dst_samples_loss, key=operator.itemgetter(0), reverse=True)
target_src = np.stack( [ x[0] for x in src_samples_loss[:bs] ] )
target_srcm = np.stack( [ x[1] for x in src_samples_loss[:bs] ] )
target_srcm_em = np.stack( [ x[2] for x in src_samples_loss[:bs] ] )
target_src = np.stack( [ x[1] for x in src_samples_loss[:bs] ] )
target_srcm = np.stack( [ x[2] for x in src_samples_loss[:bs] ] )
target_srcm_em = np.stack( [ x[3] for x in src_samples_loss[:bs] ] )
target_dst = np.stack( [ x[0] for x in dst_samples_loss[:bs] ] )
target_dstm = np.stack( [ x[1] for x in dst_samples_loss[:bs] ] )
target_dstm_em = np.stack( [ x[2] for x in dst_samples_loss[:bs] ] )
target_dst = np.stack( [ x[1] for x in dst_samples_loss[:bs] ] )
target_dst_train = np.stack( [ x[2] for x in dst_samples_loss[:bs] ] )
target_dstm = np.stack( [ x[3] for x in dst_samples_loss[:bs] ] )
target_dstm_em = np.stack( [ x[4] for x in dst_samples_loss[:bs] ] )
src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm, target_srcm_em, target_dst, target_dst, target_dstm, target_dstm_em)
src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm, target_srcm_em, target_dst, target_dst_train, target_dstm, target_dstm_em)
self.last_src_samples_loss = []
self.last_dst_samples_loss = []
@ -767,14 +791,14 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
self.D_train (warped_src, warped_dst)
if self.gan_power != 0:
self.D_src_dst_train (warped_src, target_src, target_srcm, target_srcm_em, warped_dst, target_dst, target_dstm, target_dstm_em)
self.D_src_dst_train (warped_src, target_src, target_srcm, target_srcm_em, warped_dst, target_dst_train, target_dstm, target_dstm_em)
return ( ('src_loss', np.mean(src_loss) ), ('dst_loss', np.mean(dst_loss) ), )
#override
def onGetPreview(self, samples, for_history=False):
( (warped_src, target_src, target_srcm, target_srcm_em),
(warped_dst, target_dst, target_dstm, target_dstm_em) ) = samples
(warped_dst, target_dst, target_dst_train, target_dstm, target_dstm_em) ) = samples
S, D, SS, DD, DDM, SD, SDM = [ np.clip( nn.to_data_format(x,"NHWC", self.model_data_format), 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ]