Is this a correct gradient implementation for tf.scatter_nd

Is this a correct gradient implementation for tf.scatter_nd_update ?

up vote
0
down vote

favorite

Unfortunately, Tensorflow does not provide any gradient support for tf.scatter_nd_update and in the backward pass, the gradient gets stop there. In essence, this function is just a series of assign operations across several arrays, thus in each assign operation the gradient of the right hand side should simply just propagate to the left hand side.

I have implemented my own gradient for tf.scatter_nd_update, but I am not sure if it is correct, since I had to set the gradients of the updates and indices to zero, as there was no way for me to pick them up. Here's my implementation:

import tensorflow as tf
import numpy as np

def reset_graph(seed=4):
 tf.reset_default_graph()
 tf.set_random_seed(seed)
 np.random.seed(seed)

@tf.custom_gradient
def scatter_nd_w_gradient(phi,indices,update):
 phi = tf.scatter_nd_update(phi,indices,update) 

 def grad(dy):
 dz= tf.zeros([2,4], dtype='float32')
 dt= tf.zeros([2], dtype='float32')
 return [dy,dz,dt]

 return phi, grad

def some_operation(x):

 phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
 phi_prime= tf.zeros([1,10,10,1], dtype='float32')
 phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

 ind_y=tf.constant([0,1,3,0])
 ind_x=tf.constant([0,2,1,0])

 indices=ind_y,ind_x
 update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

 phi = scatter_nd_w_gradient(phi,indices,update)
 c3=tf.nn.sigmoid(phi)
 c4=tf.reduce_mean(c3)

 return 1-c4

reset_graph()
a = np.ones((10,10), dtype=np.float32)
k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
flip = [slice(None, None, -1), slice(None, None, -1)]
k = k[flip]

a=a.astype(np.float32)
a_tensor = tf.reshape(a, [1, 10, 10, 1])
k_weight = tf.reshape(np.array(k), [3,3,1,1])

c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

total_loss2=some_operation(c2)
train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

init = tf.initialize_all_variables()
sess=tf.Session()
with tf.Session() as sess:
 init = tf.initialize_all_variables()
 sess.run(init)
 _,c2=sess.run([train_op,c2])
 print('this is the value for c2 '.format(c2))

The code might sound complex but it is not. I am just doing a simple convolution then I am performing some operation ( which may not be really meaningful but to show the concept ) and assign a value to a variable and update from the output of the convolution layer by using this tf.scatter_nd_update functionality.

If my implementation is correct, it would help a lot of people out there who would try to use the output of a neural network and combine it with another unit. Please let me know if it makes sense to you.

asked Nov 11 at 4:22

Ali1990

add a comment |

up vote
0
down vote

favorite

import tensorflow as tf
import numpy as np

def reset_graph(seed=4):
 tf.reset_default_graph()
 tf.set_random_seed(seed)
 np.random.seed(seed)

@tf.custom_gradient
def scatter_nd_w_gradient(phi,indices,update):
 phi = tf.scatter_nd_update(phi,indices,update) 

 def grad(dy):
 dz= tf.zeros([2,4], dtype='float32')
 dt= tf.zeros([2], dtype='float32')
 return [dy,dz,dt]

 return phi, grad

def some_operation(x):

 phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
 phi_prime= tf.zeros([1,10,10,1], dtype='float32')
 phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

 ind_y=tf.constant([0,1,3,0])
 ind_x=tf.constant([0,2,1,0])

 indices=ind_y,ind_x
 update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

 phi = scatter_nd_w_gradient(phi,indices,update)
 c3=tf.nn.sigmoid(phi)
 c4=tf.reduce_mean(c3)

 return 1-c4

reset_graph()
a = np.ones((10,10), dtype=np.float32)
k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
flip = [slice(None, None, -1), slice(None, None, -1)]
k = k[flip]

a=a.astype(np.float32)
a_tensor = tf.reshape(a, [1, 10, 10, 1])
k_weight = tf.reshape(np.array(k), [3,3,1,1])

c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

total_loss2=some_operation(c2)
train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

init = tf.initialize_all_variables()
sess=tf.Session()
with tf.Session() as sess:
 init = tf.initialize_all_variables()
 sess.run(init)
 _,c2=sess.run([train_op,c2])
 print('this is the value for c2 '.format(c2))

asked Nov 11 at 4:22

Ali1990

add a comment |

up vote
0
down vote

favorite

import tensorflow as tf
import numpy as np

def reset_graph(seed=4):
 tf.reset_default_graph()
 tf.set_random_seed(seed)
 np.random.seed(seed)

@tf.custom_gradient
def scatter_nd_w_gradient(phi,indices,update):
 phi = tf.scatter_nd_update(phi,indices,update) 

 def grad(dy):
 dz= tf.zeros([2,4], dtype='float32')
 dt= tf.zeros([2], dtype='float32')
 return [dy,dz,dt]

 return phi, grad

def some_operation(x):

 phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
 phi_prime= tf.zeros([1,10,10,1], dtype='float32')
 phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

 ind_y=tf.constant([0,1,3,0])
 ind_x=tf.constant([0,2,1,0])

 indices=ind_y,ind_x
 update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

 phi = scatter_nd_w_gradient(phi,indices,update)
 c3=tf.nn.sigmoid(phi)
 c4=tf.reduce_mean(c3)

 return 1-c4

reset_graph()
a = np.ones((10,10), dtype=np.float32)
k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
flip = [slice(None, None, -1), slice(None, None, -1)]
k = k[flip]

a=a.astype(np.float32)
a_tensor = tf.reshape(a, [1, 10, 10, 1])
k_weight = tf.reshape(np.array(k), [3,3,1,1])

c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

total_loss2=some_operation(c2)
train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

init = tf.initialize_all_variables()
sess=tf.Session()
with tf.Session() as sess:
 init = tf.initialize_all_variables()
 sess.run(init)
 _,c2=sess.run([train_op,c2])
 print('this is the value for c2 '.format(c2))

asked Nov 11 at 4:22

Ali1990

import tensorflow as tf
import numpy as np

def reset_graph(seed=4):
 tf.reset_default_graph()
 tf.set_random_seed(seed)
 np.random.seed(seed)

@tf.custom_gradient
def scatter_nd_w_gradient(phi,indices,update):
 phi = tf.scatter_nd_update(phi,indices,update) 

 def grad(dy):
 dz= tf.zeros([2,4], dtype='float32')
 dt= tf.zeros([2], dtype='float32')
 return [dy,dz,dt]

 return phi, grad

def some_operation(x):

 phi = tf.Variable(tf.zeros([1,10,10,1], dtype='float32'), dtype='float32', trainable=True)
 phi_prime= tf.zeros([1,10,10,1], dtype='float32')
 phi= tf.assign(phi,tf.cast(phi_prime, dtype='float32'))

 ind_y=tf.constant([0,1,3,0])
 ind_x=tf.constant([0,2,1,0])

 indices=ind_y,ind_x
 update=tf.stack([x[0,4,4,0],x[0,4,3,0]])

 phi = scatter_nd_w_gradient(phi,indices,update)
 c3=tf.nn.sigmoid(phi)
 c4=tf.reduce_mean(c3)

 return 1-c4

reset_graph()
a = np.ones((10,10), dtype=np.float32)
k = np.array([[1,1,1],[1,1,1],[1,1,1]],dtype=np.float32)
flip = [slice(None, None, -1), slice(None, None, -1)]
k = k[flip]

a=a.astype(np.float32)
a_tensor = tf.reshape(a, [1, 10, 10, 1])
k_weight = tf.reshape(np.array(k), [3,3,1,1])

c2=tf.layers.conv2d(a_tensor,filters=1, kernel_size=3, strides=1, padding="same",activation = tf.nn.relu)

total_loss2=some_operation(c2)
train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss2,colocate_gradients_with_ops=True)

init = tf.initialize_all_variables()
sess=tf.Session()
with tf.Session() as sess:
 init = tf.initialize_all_variables()
 sess.run(init)
 _,c2=sess.run([train_op,c2])
 print('this is the value for c2 '.format(c2))

python python-3.x tensorflow

asked Nov 11 at 4:22

Ali1990

asked Nov 11 at 4:22

Ali1990

asked Nov 11 at 4:22

Ali1990

asked Nov 11 at 4:22

Ali1990

asked Nov 11 at 4:22

Ali1990

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "1"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader:
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
,
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53245812%2fis-this-a-correct-gradient-implementation-for-tf-scatter-nd-update%23new-answer', 'question_page');

);

Post as a guest

Name

Required, but never shown

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Odtnhj