more stable and precise version of face transformation matrix.

fixed bleeding mask on some samples
2024-03-22 13:10:55 +08:00 · 2019-12-20 10:30:49 +04:00 · 2019-12-20 10:30:49 +04:00 · 64021b9c62
commit 64021b9c62
parent 068c7d0d55
5 changed files with 68 additions and 57 deletions
--- a/facelib/FANExtractor.py
+++ b/facelib/FANExtractor.py
@ -83,7 +83,7 @@ class FANExtractor(object):
            for i, lmrks in enumerate(landmarks):
                try:
                    if lmrks is not None:
-                        image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL)
+                        image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL, full_face_align_top=False)
                        face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC )

                        rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr)
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -183,6 +183,15 @@ landmarks_68_3D = np.array( [
 [0.205322    , 31.408738    , -21.903670  ],
 [-7.198266   , 30.844876    , -20.328022  ] ], dtype=np.float32)

+FaceType_to_padding_remove_align = {
+    FaceType.HALF: (0.0, False),
+    FaceType.MID_FULL: (0.0675, False),
+    FaceType.FULL: (0.2109375, False),
+    FaceType.FULL_NO_ALIGN: (0.2109375, True),
+    FaceType.HEAD: (0.369140625, False),
+    FaceType.HEAD_NO_ALIGN: (0.369140625, True),
+}
+
 def convert_98_to_68(lmrks):
    #jaw
    result = [ lmrks[0] ]
@ -240,66 +249,62 @@ def transform_points(points, mat, invert=False):
    points = np.squeeze(points)
    return points

-def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
+def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0, full_face_align_top=True):
    if not isinstance(image_landmarks, np.ndarray):
        image_landmarks = np.array (image_landmarks)

-    """
-    if face_type == FaceType.AVATAR:
-        centroid = np.mean (image_landmarks, axis=0)
+    padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)

-        mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2]
-        a, c = mat[0,0], mat[1,0]
-        scale = math.sqrt((a * a) + (c * c))
-
-        padding = (output_size / 64) * 32
-
-        mat = np.eye ( 2,3 )
-        mat[0,2] = -centroid[0]
-        mat[1,2] = -centroid[1]
-        mat = mat * scale * (output_size / 3)
-        mat[:,2] += output_size / 2
-    else:
-    """
-    remove_align = False
-    if face_type == FaceType.FULL_NO_ALIGN:
-        face_type = FaceType.FULL
-        remove_align = True
-    elif face_type == FaceType.HEAD_NO_ALIGN:
-        face_type = FaceType.HEAD
-        remove_align = True
-
-    if face_type == FaceType.HALF:
-        padding = 0
-    elif face_type == FaceType.MID_FULL:
-        padding = int(output_size * 0.06)
-    elif face_type == FaceType.FULL:
-        padding = (output_size / 64) * 12
-    elif face_type == FaceType.HEAD:
-        padding = (output_size / 64) * 21
-    else:
-        raise ValueError ('wrong face_type: ', face_type)
-
-    #mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2]
    mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
+    l_p = transform_points (  np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
+    l_c = l_p[4]

-    mat = mat * (output_size - 2 * padding)
-    mat[:,2] += padding
-    mat *= (1 / scale)
-    mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 )
+    tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
+    tb_diag_vec /= npla.norm(tb_diag_vec)
+    bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
+    bt_diag_vec /= npla.norm(bt_diag_vec)
+    
+    mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
+    
+    l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ), 
+                      np.round( l_c + bt_diag_vec*mod ), 
+                      np.round( l_c + tb_diag_vec*mod ) ] )    

+    pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
+    mat = cv2.getAffineTransform(l_t,pts2)
+    
+    #if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN):
+    #    #lmrks2 = expand_eyebrows(image_landmarks)    
+    #    #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False )     
+    #    #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) ) 
+    #    #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True)
+    #    #y_diff = y_diff[1]-y_diff[0]
+    #    
+    #    x_diff = np.float32((0,0))
+    #    
+    #    lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False )   
+    #    if lmrks2_[0,0] < 0:
+    #        x_diff = lmrks2_[0,0]        
+    #        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
+    #        x_diff = x_diff[1]-x_diff[0]        
+    #    elif lmrks2_[1,0] >= output_size:
+    #        x_diff = lmrks2_[1,0]-(output_size-1)
+    #        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
+    #        x_diff = x_diff[1]-x_diff[0]    
+    #    
+    #    mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2)
+        
    if remove_align:
-        bbox = transform_points ( [ (0,0), (0,output_size-1), (output_size-1, output_size-1), (output_size-1,0) ], mat, True)
+        bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
        area = mathlib.polygon_area(bbox[:,0], bbox[:,1] )
        side = math.sqrt(area) / 2
        center = transform_points ( [(output_size/2,output_size/2)], mat, True)
-
-        pts1 = np.float32([ center+[-side,-side], center+[side,-side], center+[-side,side] ])
-        pts2 = np.float32([[0,0],[output_size-1,0],[0,output_size-1]])
+        pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[-side,side] ))
        mat = cv2.getAffineTransform(pts1,pts2)

    return mat

+
 def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0):
    if len(lmrks) != 68:
        raise Exception('works only with 68 landmarks')
@ -627,7 +632,7 @@ def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, tra
    image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type)
    points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True)
    imagelib.draw_polygon (image, points, (0,0,255), 2)
-    
+
    points = transform_points ( [ ( int(face_size*0.05), 0), ( int(face_size*0.1), int(face_size*0.1) ), ( 0, int(face_size*0.1) ) ], image_to_face_mat, True)
    imagelib.draw_polygon (image, points, (0,0,255), 2)

--- a/nnlib/TernausNet.py
+++ b/nnlib/TernausNet.py
@ -57,16 +57,16 @@ class TernausNet(object):
            real_t = Input ( (resolution, resolution, 1) )
            out_t = self.model(inp_t)

-            loss = K.mean(10*K.binary_crossentropy(real_t,out_t), axis=[1,2,3] )
+            loss = K.mean(10*K.binary_crossentropy(real_t,out_t) )

            out_t_diff1 = out_t[:, 1:, :, :] - out_t[:, :-1, :, :]
            out_t_diff2 = out_t[:, :, 1:, :] - out_t[:, :, :-1, :]

            total_var_loss = K.mean( 0.1*K.abs(out_t_diff1), axis=[1, 2, 3] ) + K.mean( 0.1*K.abs(out_t_diff2), axis=[1, 2, 3] )

-            opt = RMSprop(lr=0.0001, lr_dropout=0.3, tf_cpu_mode=2)
+            opt = Adam(lr=0.0001, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)

-            self.train_func = K.function  ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( loss, self.model.trainable_weights) )
+            self.train_func = K.function  ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( [loss], self.model.trainable_weights) )


    def __enter__(self):
--- a/samplelib/SampleGeneratorFace.py
+++ b/samplelib/SampleGeneratorFace.py
@ -54,7 +54,7 @@ class SampleGeneratorFace(SampleGeneratorBase):
        if self.samples_len == 0:
            raise ValueError('No training data provided.')
        
-        ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) if random_ct_samples_path is not None else None
+        ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path, use_caching=use_caching) if random_ct_samples_path is not None else None

        if self.debug:
            self.generators_count = 1
--- a/samplelib/SampleProcessor.py
+++ b/samplelib/SampleProcessor.py
@ -177,8 +177,8 @@ class SampleProcessor(object):
                            if len(mask.shape) == 2:
                                mask = mask[...,np.newaxis]

-                            img = np.concatenate( (img, mask ), -1 )
-                        return img
+                            
+                        return img, mask

                    img = sample_bgr

@ -222,14 +222,20 @@ class SampleProcessor(object):
                            img =  cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
                            mask = cv2.warpAffine( mask, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
                            #then apply transforms
-                            img = do_transform (img, mask)
+                            img, mask = do_transform (img, mask)
+                            img = np.concatenate( (img, mask ), -1 )
                            img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
                        else:
-                            img = do_transform (img, mask)
-                            img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft), (resolution,resolution), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
+                            img, mask = do_transform (img, mask)
+                            
+                            mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
+                            img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
+                            mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )
+                            img = np.concatenate( (img, mask[...,None] ), -1 )

                    else:
-                        img = do_transform (img, mask)
+                        img, mask = do_transform (img, mask)
+                        img = np.concatenate( (img, mask ), -1 )
                        img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )

                    if random_sub_res != 0: