Is this a correct gradient implementation for tf.scatter_nd_update ?









up vote
0
down vote

favorite












Unfortunately, Tensorflow does not provide any gradient support for tf.scatter_nd_update and in the backward pass, the gradient gets stop there. In essence, this function is just a series of assign operations across several arrays, thus in each assign operation the gradient of the right hand side should simply just propagate to the left hand side.



I have implemented my own gradient for tf.scatter_nd_update, but I am not sure if it is correct, since I had to set the gradients of the updates and indices to zero, as there was no way for me to pick them up. Here's my implementation:



import tensorflow as tf
import numpy as np

def reset_graph(seed=4):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)

@tf.custom_gradient
def scatter_nd_w_gradient(phi,indices,update):
phi = tf.scatter_nd_update(phi,indices,update)

def grad(dy):
dz= tf.zeros([2,4], dtype='float32')
dt= tf.zeros([2], dtype='float32')
return [dy,dz,dt]

return phi, grad

def some_operation(x):

phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
phi_prime= tf.zeros([1,10,10,1], dtype='float32')
phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

ind_y=tf.constant([0,1,3,0])
ind_x=tf.constant([0,2,1,0])

indices=ind_y,ind_x
update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

phi = scatter_nd_w_gradient(phi,indices,update)
c3=tf.nn.sigmoid(phi)
c4=tf.reduce_mean(c3)

return 1-c4

reset_graph()
a = np.ones((10,10), dtype=np.float32)
k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
flip = [slice(None, None, -1), slice(None, None, -1)]
k = k[flip]

a=a.astype(np.float32)
a_tensor = tf.reshape(a, [1, 10, 10, 1])
k_weight = tf.reshape(np.array(k), [3,3,1,1])

c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

total_loss2=some_operation(c2)
train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

init = tf.initialize_all_variables()
sess=tf.Session()
with tf.Session() as sess:
init = tf.initialize_all_variables()
sess.run(init)
_,c2=sess.run([train_op,c2])
print('this is the value for c2 '.format(c2))


The code might sound complex but it is not. I am just doing a simple convolution then I am performing some operation ( which may not be really meaningful but to show the concept ) and assign a value to a variable and update from the output of the convolution layer by using this tf.scatter_nd_update functionality.



If my implementation is correct, it would help a lot of people out there who would try to use the output of a neural network and combine it with another unit. Please let me know if it makes sense to you.










share|improve this question

























    up vote
    0
    down vote

    favorite












    Unfortunately, Tensorflow does not provide any gradient support for tf.scatter_nd_update and in the backward pass, the gradient gets stop there. In essence, this function is just a series of assign operations across several arrays, thus in each assign operation the gradient of the right hand side should simply just propagate to the left hand side.



    I have implemented my own gradient for tf.scatter_nd_update, but I am not sure if it is correct, since I had to set the gradients of the updates and indices to zero, as there was no way for me to pick them up. Here's my implementation:



    import tensorflow as tf
    import numpy as np

    def reset_graph(seed=4):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

    @tf.custom_gradient
    def scatter_nd_w_gradient(phi,indices,update):
    phi = tf.scatter_nd_update(phi,indices,update)

    def grad(dy):
    dz= tf.zeros([2,4], dtype='float32')
    dt= tf.zeros([2], dtype='float32')
    return [dy,dz,dt]

    return phi, grad

    def some_operation(x):

    phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
    phi_prime= tf.zeros([1,10,10,1], dtype='float32')
    phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

    ind_y=tf.constant([0,1,3,0])
    ind_x=tf.constant([0,2,1,0])

    indices=ind_y,ind_x
    update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

    phi = scatter_nd_w_gradient(phi,indices,update)
    c3=tf.nn.sigmoid(phi)
    c4=tf.reduce_mean(c3)

    return 1-c4

    reset_graph()
    a = np.ones((10,10), dtype=np.float32)
    k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
    flip = [slice(None, None, -1), slice(None, None, -1)]
    k = k[flip]

    a=a.astype(np.float32)
    a_tensor = tf.reshape(a, [1, 10, 10, 1])
    k_weight = tf.reshape(np.array(k), [3,3,1,1])

    c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

    total_loss2=some_operation(c2)
    train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

    init = tf.initialize_all_variables()
    sess=tf.Session()
    with tf.Session() as sess:
    init = tf.initialize_all_variables()
    sess.run(init)
    _,c2=sess.run([train_op,c2])
    print('this is the value for c2 '.format(c2))


    The code might sound complex but it is not. I am just doing a simple convolution then I am performing some operation ( which may not be really meaningful but to show the concept ) and assign a value to a variable and update from the output of the convolution layer by using this tf.scatter_nd_update functionality.



    If my implementation is correct, it would help a lot of people out there who would try to use the output of a neural network and combine it with another unit. Please let me know if it makes sense to you.










    share|improve this question























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      Unfortunately, Tensorflow does not provide any gradient support for tf.scatter_nd_update and in the backward pass, the gradient gets stop there. In essence, this function is just a series of assign operations across several arrays, thus in each assign operation the gradient of the right hand side should simply just propagate to the left hand side.



      I have implemented my own gradient for tf.scatter_nd_update, but I am not sure if it is correct, since I had to set the gradients of the updates and indices to zero, as there was no way for me to pick them up. Here's my implementation:



      import tensorflow as tf
      import numpy as np

      def reset_graph(seed=4):
      tf.reset_default_graph()
      tf.set_random_seed(seed)
      np.random.seed(seed)

      @tf.custom_gradient
      def scatter_nd_w_gradient(phi,indices,update):
      phi = tf.scatter_nd_update(phi,indices,update)

      def grad(dy):
      dz= tf.zeros([2,4], dtype='float32')
      dt= tf.zeros([2], dtype='float32')
      return [dy,dz,dt]

      return phi, grad

      def some_operation(x):

      phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
      phi_prime= tf.zeros([1,10,10,1], dtype='float32')
      phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

      ind_y=tf.constant([0,1,3,0])
      ind_x=tf.constant([0,2,1,0])

      indices=ind_y,ind_x
      update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

      phi = scatter_nd_w_gradient(phi,indices,update)
      c3=tf.nn.sigmoid(phi)
      c4=tf.reduce_mean(c3)

      return 1-c4

      reset_graph()
      a = np.ones((10,10), dtype=np.float32)
      k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
      flip = [slice(None, None, -1), slice(None, None, -1)]
      k = k[flip]

      a=a.astype(np.float32)
      a_tensor = tf.reshape(a, [1, 10, 10, 1])
      k_weight = tf.reshape(np.array(k), [3,3,1,1])

      c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

      total_loss2=some_operation(c2)
      train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

      init = tf.initialize_all_variables()
      sess=tf.Session()
      with tf.Session() as sess:
      init = tf.initialize_all_variables()
      sess.run(init)
      _,c2=sess.run([train_op,c2])
      print('this is the value for c2 '.format(c2))


      The code might sound complex but it is not. I am just doing a simple convolution then I am performing some operation ( which may not be really meaningful but to show the concept ) and assign a value to a variable and update from the output of the convolution layer by using this tf.scatter_nd_update functionality.



      If my implementation is correct, it would help a lot of people out there who would try to use the output of a neural network and combine it with another unit. Please let me know if it makes sense to you.










      share|improve this question













      Unfortunately, Tensorflow does not provide any gradient support for tf.scatter_nd_update and in the backward pass, the gradient gets stop there. In essence, this function is just a series of assign operations across several arrays, thus in each assign operation the gradient of the right hand side should simply just propagate to the left hand side.



      I have implemented my own gradient for tf.scatter_nd_update, but I am not sure if it is correct, since I had to set the gradients of the updates and indices to zero, as there was no way for me to pick them up. Here's my implementation:



      import tensorflow as tf
      import numpy as np

      def reset_graph(seed=4):
      tf.reset_default_graph()
      tf.set_random_seed(seed)
      np.random.seed(seed)

      @tf.custom_gradient
      def scatter_nd_w_gradient(phi,indices,update):
      phi = tf.scatter_nd_update(phi,indices,update)

      def grad(dy):
      dz= tf.zeros([2,4], dtype='float32')
      dt= tf.zeros([2], dtype='float32')
      return [dy,dz,dt]

      return phi, grad

      def some_operation(x):

      phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
      phi_prime= tf.zeros([1,10,10,1], dtype='float32')
      phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

      ind_y=tf.constant([0,1,3,0])
      ind_x=tf.constant([0,2,1,0])

      indices=ind_y,ind_x
      update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

      phi = scatter_nd_w_gradient(phi,indices,update)
      c3=tf.nn.sigmoid(phi)
      c4=tf.reduce_mean(c3)

      return 1-c4

      reset_graph()
      a = np.ones((10,10), dtype=np.float32)
      k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
      flip = [slice(None, None, -1), slice(None, None, -1)]
      k = k[flip]

      a=a.astype(np.float32)
      a_tensor = tf.reshape(a, [1, 10, 10, 1])
      k_weight = tf.reshape(np.array(k), [3,3,1,1])

      c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

      total_loss2=some_operation(c2)
      train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

      init = tf.initialize_all_variables()
      sess=tf.Session()
      with tf.Session() as sess:
      init = tf.initialize_all_variables()
      sess.run(init)
      _,c2=sess.run([train_op,c2])
      print('this is the value for c2 '.format(c2))


      The code might sound complex but it is not. I am just doing a simple convolution then I am performing some operation ( which may not be really meaningful but to show the concept ) and assign a value to a variable and update from the output of the convolution layer by using this tf.scatter_nd_update functionality.



      If my implementation is correct, it would help a lot of people out there who would try to use the output of a neural network and combine it with another unit. Please let me know if it makes sense to you.







      python python-3.x tensorflow






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 11 at 4:22









      Ali1990

      62




      62



























          active

          oldest

          votes











          Your Answer






          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "1"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader:
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          ,
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );













           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53245812%2fis-this-a-correct-gradient-implementation-for-tf-scatter-nd-update%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes















           

          draft saved


          draft discarded















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53245812%2fis-this-a-correct-gradient-implementation-for-tf-scatter-nd-update%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          這個網誌中的熱門文章

          How to read a connectionString WITH PROVIDER in .NET Core?

          In R, how to develop a multiplot heatmap.2 figure showing key labels successfully

          Museum of Modern and Contemporary Art of Trento and Rovereto