Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Custom optimizer for TensorFlow

I'm trying to experiment with custom optimization algorithms for neural networks on TensorFlow, but I'm stuck with the lack of information on the topic. What I need is some code that will get me at each iteration a vector x (current point) and a vector g (gradient at x), then I'll update x, and then some code to set updated values back. Here's what I have at the moment:

from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_training_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.training import optimizer
from tensorflow.python.util.tf_export import tf_export
import tensorflow as tf
import numpy as np

class TestGD(optimizer.Optimizer):
  def __init__(self, rad=0.01,
               use_locking=False, name="TestGD"):
    super(TestGD, self).__init__(use_locking, name)
    self._radius = rad

  def _create_slots(self, var_list):
    num_dims = len(var_list)
    self._beta = (num_dims - 1) / (num_dims + 1)
    self._B_matrix = np.identity(num_dims)

  def _prepare(self):
    self._radn_t = ops.convert_to_tensor(self._call_if_callable(self._radius), name="beta")
    self._beta_t = ops.convert_to_tensor(self._call_if_callable(self._beta), name="beta")
    self._B_matrix_t = ops.convert_to_tensor(self._call_if_callable(self._B_matrix), name="B")

  def _apply_dense(self, grad, var):
    return self._resource_apply_dense(grad, var)

  def _resource_apply_dense(self, grad, var):
    print(grad.shape, "<-----------")
    #I'm planning to implement my algorithm somewhere here
    var_update = tf.compat.v1.assign_sub(var, 0.01 * grad)
    return tf.group(var_update)

  def _apply_sparse(self, grad, var):
    raise NotImplementedError("Sparse gradient updates are not supported.")


# Build LeNet model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(16, kernel_size=(5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120, activation='relu'),
    tf.keras.layers.Dense(84, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Use your custom optimizer
#custom_optimizer = SimpleGD(learning_rate=0.001)
custom_optimizer = TestGD()

# Compile the model with your custom optimizer
model.compile(optimizer=custom_optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Getting dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize pixel values to between 0 and 1

x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=60000).batch(64)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(64)

# training
model.fit(train_dataset, epochs=5)

# evaluation
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc}")

The problem is, I get very strange shapes for grad and var, they're definitely not vectors. What should I do to reduce the problem to x and g vectors and how do I correctly update results after my minimization step?

like image 586
guest Avatar asked Dec 29 '25 21:12

guest


1 Answers

def _apply_dense(self, grad, var):
        # Flatten the gradient to a 1D vector
        grad_flat = tf.reshape(grad, [-1])
        # Update using TensorFlow operations
        var_update = self._resource_apply_dense(grad_flat, var)
        return tf.group(var_update)

Flatten the gradient to a 1D vector In your original code, you were working with the gradient (grad) directly. The shape of grad might be (batch_size, num_parameters) or something similar, where each row corresponds to the gradient of a single parameter across the entire batch.

Importance of Flattening for TensorFlow Operations - When you apply the update using operations like tf.compat.v1.assign_sub, it expects a 1D tensor for the update. Flattening ensures compatibility with such operations.

like image 183
mrk Avatar answered Dec 31 '25 09:12

mrk



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!