diff --git a/core/leras/optimizers/AdaBelief.py b/core/leras/optimizers/AdaBelief.py index dd004f8..b283624 100644 --- a/core/leras/optimizers/AdaBelief.py +++ b/core/leras/optimizers/AdaBelief.py @@ -1,9 +1,11 @@ -from tensorflow.python.ops import control_flow_ops, state_ops +import numpy as np from core.leras import nn +from tensorflow.python.ops import control_flow_ops, state_ops + tf = nn.tf class AdaBelief(nn.OptimizerBase): - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, lr_dropout=1.0, lr_cos=0, epsilon=1e-7, clipnorm=0.0, name=None, **kwargs): + def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, lr_dropout=1.0, lr_cos=0, clipnorm=0.0, name=None, **kwargs): super().__init__(name=name) if name is None: @@ -15,7 +17,6 @@ class AdaBelief(nn.OptimizerBase): self.lr_dropout = lr_dropout self.lr_cos = lr_cos self.clipnorm = clipnorm - self.epsilon = epsilon with tf.device('/CPU:0') : with tf.variable_scope(self.name): @@ -66,7 +67,7 @@ class AdaBelief(nn.OptimizerBase): if self.lr_cos != 0: lr *= (tf.cos( tf.cast(self.iterations, g.dtype) * (2*3.1415926535/ float(self.lr_cos) ) ) + 1.0) / 2.0 - v_diff = - lr * m_t / (tf.sqrt(v_t) + self.epsilon) + v_diff = - lr * m_t / (tf.sqrt(v_t) + np.finfo( m_t.dtype.as_numpy_dtype ).resolution ) if self.lr_dropout != 1.0: lr_rnd = self.lr_rnds_dict[v.name] v_diff *= lr_rnd @@ -77,4 +78,4 @@ class AdaBelief(nn.OptimizerBase): updates.append (state_ops.assign(v, new_v)) return control_flow_ops.group ( *updates, name=self.name+'_updates') -nn.AdaBelief = AdaBelief \ No newline at end of file +nn.AdaBelief = AdaBelief diff --git a/core/leras/optimizers/RMSprop.py b/core/leras/optimizers/RMSprop.py index d2eb605..2c2ab30 100644 --- a/core/leras/optimizers/RMSprop.py +++ b/core/leras/optimizers/RMSprop.py @@ -1,9 +1,10 @@ +import numpy as np from tensorflow.python.ops import control_flow_ops, state_ops from core.leras import nn tf = nn.tf class RMSprop(nn.OptimizerBase): - def __init__(self, lr=0.001, rho=0.9, lr_dropout=1.0, epsilon=1e-7, clipnorm=0.0, name=None, **kwargs): + def __init__(self, lr=0.001, rho=0.9, lr_dropout=1.0, clipnorm=0.0, name=None, **kwargs): super().__init__(name=name) if name is None: @@ -12,7 +13,6 @@ class RMSprop(nn.OptimizerBase): self.lr_dropout = lr_dropout self.lr = lr self.rho = rho - self.epsilon = epsilon self.clipnorm = clipnorm @@ -59,7 +59,7 @@ class RMSprop(nn.OptimizerBase): lr = tf.constant(self.lr, g.dtype) - v_diff = - lr * g / (tf.sqrt(new_a) + self.epsilon) + v_diff = - lr * g / (tf.sqrt(new_a) + np.finfo( m_t.dtype.as_numpy_dtype ).resolution ) if self.lr_dropout != 1.0: lr_rnd = self.lr_rnds_dict[v.name] v_diff *= lr_rnd