Custom optimizer for TensorFlow

Question

I'm trying to experiment with custom optimization algorithms for neural networks on TensorFlow, but I'm stuck with the lack of information on the topic. What I need is some code that will get me at each iteration a vector x (current point) and a vector g (gradient at x), then I'll update x, and then some code to set updated values back. Here's what I have at the moment:

from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_training_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.training import optimizer
from tensorflow.python.util.tf_export import tf_export
import tensorflow as tf
import numpy as np

class TestGD(optimizer.Optimizer):
  def __init__(self, rad=0.01,
               use_locking=False, name="TestGD"):
    super(TestGD, self).__init__(use_locking, name)
    self._radius = rad

  def _create_slots(self, var_list):
    num_dims = len(var_list)
    self._beta = (num_dims - 1) / (num_dims + 1)
    self._B_matrix = np.identity(num_dims)

  def _prepare(self):
    self._radn_t = ops.convert_to_tensor(self._call_if_callable(self._radius), name="beta")
    self._beta_t = ops.convert_to_tensor(self._call_if_callable(self._beta), name="beta")
    self._B_matrix_t = ops.convert_to_tensor(self._call_if_callable(self._B_matrix), name="B")

  def _apply_dense(self, grad, var):
    return self._resource_apply_dense(grad, var)

  def _resource_apply_dense(self, grad, var):
    print(grad.shape, "<-----------")
    #I'm planning to implement my algorithm somewhere here
    var_update = tf.compat.v1.assign_sub(var, 0.01 * grad)
    return tf.group(var_update)

  def _apply_sparse(self, grad, var):
    raise NotImplementedError("Sparse gradient updates are not supported.")


# Build LeNet model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(16, kernel_size=(5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120, activation='relu'),
    tf.keras.layers.Dense(84, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Use your custom optimizer
#custom_optimizer = SimpleGD(learning_rate=0.001)
custom_optimizer = TestGD()

# Compile the model with your custom optimizer
model.compile(optimizer=custom_optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Getting dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize pixel values to between 0 and 1

x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=60000).batch(64)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(64)

# training
model.fit(train_dataset, epochs=5)

# evaluation
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc}")

The problem is, I get very strange shapes for grad and var, they're definitely not vectors. What should I do to reduce the problem to x and g vectors and how do I correctly update results after my minimization step?

mrk · Accepted Answer

def _apply_dense(self, grad, var):
        # Flatten the gradient to a 1D vector
        grad_flat = tf.reshape(grad, [-1])
        # Update using TensorFlow operations
        var_update = self._resource_apply_dense(grad_flat, var)
        return tf.group(var_update)

Flatten the gradient to a 1D vector In your original code, you were working with the gradient (grad) directly. The shape of grad might be (batch_size, num_parameters) or something similar, where each row corresponds to the gradient of a single parameter across the entire batch.

Importance of Flattening for TensorFlow Operations - When you apply the update using operations like tf.compat.v1.assign_sub, it expects a 1D tensor for the update. Flattening ensures compatibility with such operations.

Custom optimizer for TensorFlow

Tags:

python

machine-learning

tensorflow

keras

guest

1 Answers

mrk

Recent Activity

Donate For Us

Custom optimizer for TensorFlow

Tags:

python

machine-learning

tensorflow

keras

guest

1 Answers

mrk

Related questions

Recent Activity

Donate For Us