I'm attempting to leverage TensorFlow 2.0's automatic differentiation to automate the calculation of certain gradients on financial instruments. Generally this involves a piecewise interpolation scheme between various "benchmark points". The simplest example is below:
    import tensorflow as tf
    MATURITIES = tf.constant([1.0, 2.0, 3.0, 5.0, 7.0, 10.0, 12.0, 15.0, 20.0, 25.0])
    CASH_FLOW_TIMES = tf.constant([n * 0.5 for n in range(1, 51)])
    YIELDS = tf.Variable([0.04153733, 0.0425888, 0.04662959, 0.05406879, 0.05728735, 0.0606996, 0.06182699, 0.05854381, 0.05376556, 0.0531946])
    @tf.function
    def linear(knot_y, knot_x, x):
        """Linear interpolation"""
        i = tf.maximum(tf.minimum(tf.searchsorted(knot_x, x, side="right") - 1, knot_x.shape[0] - 2), 0)
        y_i = tf.gather(knot_y, i)
        x_i = tf.gather(knot_x, i)
        return y_i + y_i / x_i * (x - x_i)
    with tf.GradientTape() as tape:
        tape.watch(YIELDS)
        y = linear(YIELDS, MATURITIES, CASH_FLOW_TIMES)
    dydx = tape.gradient(y, YIELDS)
    y, dydx
Which outputs the following:
    (<tf.Tensor: id=1249, shape=(50,), dtype=float32, numpy=
     array([0.02076866, 0.04153733, 0.06230599, 0.0425888 , 0.053236  ,
            0.04662959, 0.05440119, 0.06217279, 0.06994438, 0.05406879,
            0.05947567, 0.06488255, 0.07028943, 0.05728735, 0.0613793 ,
            0.06547125, 0.06956321, 0.07365517, 0.07774712, 0.0606996 ,
            0.06373458, 0.06676956, 0.06980454, 0.06182699, 0.06440312,
            0.06697924, 0.06955536, 0.07213148, 0.07470761, 0.05854381,
            0.06049527, 0.06244673, 0.06439819, 0.06634965, 0.06830111,
            0.07025257, 0.07220403, 0.07415549, 0.07610695, 0.05376556,
            0.0551097 , 0.05645384, 0.05779798, 0.05914212, 0.06048626,
            0.06183039, 0.06317453, 0.06451868, 0.06586281, 0.06720695],
           dtype=float32)>,
     <tensorflow.python.framework.indexed_slices.IndexedSlices at 0x203027345c0>)
The issue is that, due (I suspect) to the tf.searchsorted or tf.gather calls, the gradient is an IndexedSlice, not a Tensor. This causes an issue when I need to chain multiple tf.function together to value a security. For example, say I want to use a transformation of the YIELDS variable within my linear function:
    import tensorflow as tf
    MATURITIES = tf.constant([1.0, 2.0, 3.0, 5.0, 7.0, 10.0, 12.0, 15.0, 20.0, 25.0])
    CASH_FLOW_TIMES = tf.constant([n * 0.5 for n in range(1, 51)])
    YIELDS = tf.Variable([0.04153733, 0.0425888, 0.04662959, 0.05406879, 0.05728735, 0.0606996, 0.06182699, 0.05854381, 0.05376556, 0.0531946])
    @tf.function
    def logdf_from_yields(yields, times):
        return tf.math.multiply(yields, times) * -1.0
    @tf.function
    def linear(knot_y, knot_x, x):
        """Linear interpolation"""
        i = tf.maximum(tf.minimum(tf.searchsorted(knot_x, x, side="right") - 1, knot_x.shape[0] - 2), 0)
        y_i = tf.gather(knot_y, i)
        x_i = tf.gather(knot_x, i)
        return y_i + y_i / x_i * (x - x_i)
    with tf.GradientTape() as tape:
        tape.watch(YIELDS)
        y = linear(logdf_from_yields(YIELDS, MATURITIES), MATURITIES, CASH_FLOW_TIMES)
    dydx = tape.gradient(y, YIELDS)
    y, dydx
This code raises the following exception:
    ---------------------------------------------------------------------------
    AssertionError                            Traceback (most recent call last)
    <ipython-input-9-2bc3457894ea> in <module>
         20     tape.watch(YIELDS)
         21     y = linear(logdf_from_yields(YIELDS, MATURITIES), MATURITIES, CASH_FLOW_TIMES)
    ---> 22 dydx = tape.gradient(y, YIELDS)
         23 y, dydx
    ~\Anaconda3\lib\site-packages\tensorflow\python\eager\backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
       1000         output_gradients=output_gradients,
       1001         sources_raw=flat_sources_raw,
    -> 1002         unconnected_gradients=unconnected_gradients)
       1003 
       1004     if not self._persistent:
    ~\Anaconda3\lib\site-packages\tensorflow\python\eager\imperative_grad.py in imperative_grad(tape, target, sources, output_gradients, sources_raw, unconnected_gradients)
         74       output_gradients,
         75       sources_raw,
    ---> 76       compat.as_str(unconnected_gradients.value))
    ~\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py in backward_function(*args)
        904               if a is not None and i not in skip_positions]
        905       return self._backward_graph_function._call_flat(  # pylint: disable=protected-access
    --> 906           list(args) + side_outputs)
        907 
        908     tape.record_operation(self._forward_function.signature.name, real_outputs,
    ~\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args)
        610     if any(isinstance(a, composite_tensor.CompositeTensor) for a in args):
        611       raise AssertionError("Expected all args to be Tensors or Variables; "
    --> 612                            "but got CompositeTensor: %r" % args)
        613 
        614     if (tape.could_possibly_record() or
    AssertionError: Expected all args to be Tensors or Variables; but got CompositeTensor: [<tensorflow.python.framework.indexed_slices.IndexedSlices object at 0x00000203013C2128>, <tf.Tensor: id=1024, shape=(), dtype=float32, numpy=-1.0>, <tf.Tensor: id=1025, shape=(10,), dtype=float32, numpy=
    array([0.04153733, 0.0851776 , 0.13988876, 0.27034396, 0.40101147,
           0.606996  , 0.74192387, 0.87815714, 1.0753112 , 1.329865  ],
          dtype=float32)>, <tf.Tensor: id=1026, shape=(10,), dtype=float32, numpy=array([ 1.,  2.,  3.,  5.,  7., 10., 12., 15., 20., 25.], dtype=float32)>, <tf.Tensor: id=1027, shape=(10,), dtype=float32, numpy=
    array([0.04153733, 0.0425888 , 0.04662959, 0.05406879, 0.05728735,
           0.0606996 , 0.06182699, 0.05854381, 0.05376556, 0.0531946 ],
          dtype=float32)>]
Inspecting the traceback, I see that the only item that is not a Tensor or Variable is the IndexedSlice, which I believe, again, comes from the linear function.
Is there a way for me to rewrite the piecewise linear function (e.g. using different functions) so that it will work with the gradient tape?
What have I tried?
I took a look at this question, but it seemed like the writer was trying to make the index a variable. Here I should know all of my indices ahead of time (i.e. because MATURITIES and CASH_FLOW_TIMES are fixed for any given instance of the problem ... only YIELDS is a tf.Variable). So I wasn't so sure how it applied.
This question was also interesting in that it recommended the usage of tf.convert_to_tensor, but I'm not sure how to apply it in my case.
Your gradient seems to be fine. TensorFlow uses tf.IndexedSlices to represent sparse gradients in some cases like tf.gather, but you can easily convert it to a regular tensor like this (the example is in graph mode but the function would be the same in eager mode):
import tensorflow as tf
def convert_indexed_slices_to_tensor(idx_slices):
    return tf.scatter_nd(tf.expand_dims(idx_slices.indices, 1),
                         idx_slices.values, idx_slices.dense_shape)
# Test
with tf.Graph().as_default(), tf.Session() as sess:
    a = tf.constant([1., 2., 3., 4.])
    b = tf.gather(a, [0, 2])
    g = tf.gradients(b, a)[0]
    print(g)
    # IndexedSlices(indices=..., values=..., dense_shape=...)
    g_dense = convert_indexed_slices_to_tensor(g)
    # Tensor(...)
    print(sess.run(g_dense))
    # [1. 0. 1. 0.]
If you want to force tf.gather to produce regular tensors, you can wrap it with tf.custom_gradient like this:
import tensorflow as tf
@tf.custom_gradient
def gather_dense_gradient(params, indices, validate_indices=None, name=None):
    def grad(ys):
        return tf.scatter_nd(tf.expand_dims(indices, 1), ys, tf.shape(params)), None
    return tf.gather(params, indices, validate_indices, name), grad
# Test
with tf.Graph().as_default(), tf.Session() as sess:
    a = tf.constant([1., 2., 3., 4.])
    b = gather_dense_gradient(a, [0, 2])
    g = tf.gradients(b, a)[0]
    print(sess.run(g))
    # [1. 0. 1. 0.]
Note this assumes axis=0 and one-dimensional indices, otherwise it would still be possible to do the same but it would require a bit more of work.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With