I have a custom training loop that can be simplified as follow
inputs = tf.keras.Input(dtype=tf.float32, shape=(None, None, 3))
model = tf.keras.Model({"inputs": inputs}, {"loss": f(inputs)})
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, nesterov=True)
for inputs in batches:
with tf.GradientTape() as tape:
results = model(inputs, training=True)
grads = tape.gradient(results["loss"], model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
The TensorFlow documentation of ExponentialMovingAverage is not clear on how it should be used in from-scratch training loop. As anyone worked with this?
Additionally, how should the shadow variable be restored into the model if both are still in memory, and how can I check that that training variables were correctly updated?
Create the EMA object before the training loop:
ema = tf.train.ExponentialMovingAverage(decay=0.9999)
And then just apply the EMA after your optimization step. The ema object will keep shadow variables of your model's variables. (You don't need the call to tf.control_dependencies here, see the note in the documentation)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
ema.apply(model.trainable_variables)
Then, one way to use the shadow variables into your model could be to assign to your model's variables the shadow variable by calling the average method of the EMA object on them:
for var in model.trainable_variables:
var.assign(ema.average(var))
model.save("model_with_shadow_variables.h5")
model.fitHere is a working example of Exponential Moving Average with customizing the fit. Ref.
from tensorflow import keras
import tensorflow as tf
class EMACustomModel(keras.Model):
def __init__(self,*args, **kwargs):
super().__init__(*args, **kwargs)
self.ema = tf.train.ExponentialMovingAverage(decay=0.999)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
gradients = tape.gradient(loss, self.trainable_variables)
opt_op = self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
'''About: tf.control_dependencies:
Note: In TensorFlow 2 with eager and/or Autograph, you should not
require this method, as code executes in the expected order. Only use
tf.control_dependencies when working with v1-style code or in a graph
context such as inside Dataset.map.
'''
with tf.control_dependencies([opt_op]):
self.ema.apply(self.trainable_variables)
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
DummyModel
import numpy as np
input = keras.Input(shape=(28, 28))
flat = tf.keras.layers.Flatten()(input)
outputs = keras.layers.Dense(1)(flat)
model = EMACustomModel(input, outputs)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
DummyData
np.random.seed(101)
x = np.random.randint(0, 256, size=(50, 28, 28)).astype("float32")
y = np.random.random((50, 1))
print(x.shape, y.shape)
# train the model
model.fit(x, y, epochs=10, verbose=2)
...
...
Epoch 49/50
2/2 - 0s - loss: 189.8506 - mae: 10.8830
Epoch 50/50
2/2 - 0s - loss: 170.3690 - mae: 10.1046
model.trainable_weights[:1][:1]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With