Keep objective values, in case they are references

When opfunc() simply returns the output state variable of a nn model (ie. when opfunc() simply returns my_net:forward()'s output), the second opfunc() call within the for loop updates not only C2, but also C1. In this case, dC_est is wrongly 0. Avoid this behaviour by blindly copying C1 contents when it is a Tensor/CudaTensor. The overhead should be bearable as C1 is a scalar.
author: R. Gokberk Cinbis <gokberkcinbis@gmail.com> 2016-08-25 01:11:07 +0300
committer: GitHub <noreply@github.com> 2016-08-25 01:11:07 +0300
commit: 84501e325f1cf7adf64b10a0b2a8aeb3a0928b32 (patch)
tree: 488ac80d1a7bab87b273026832bd35faca8a5599
parent: 9c7fc422356a5a50adb15961aa27929371c2ea01 (diff)
1 files changed, 11 insertions, 1 deletions
diff --git a/checkgrad.lua b/checkgrad.lua
index 402d9fc..908a1a2 100644
--- a/checkgrad.lua
+++ b/checkgrad.lua
@@ -20,9 +20,15 @@ RETURN:
 function optim.checkgrad(opfunc, x, eps)
     
     -- compute true gradient:
-    local _,dC = opfunc(x)
+    local Corg,dC = opfunc(x)
     dC:resize(x:size())
     
+    local Ctmp -- temporary value
+    local isTensor = torch.isTensor(Corg)
+    if isTensor then
+          Ctmp = Corg.new(Corg:size())
+    end
+    
     -- compute numeric approximations to gradient:
     local eps = eps or 1e-7
     local dC_est = torch.Tensor():typeAs(dC):resizeAs(dC)
@@ -30,6 +36,10 @@ function optim.checkgrad(opfunc, x, eps)
       local tmp = x[i]
       x[i] = x[i] + eps
       local C1 = opfunc(x)
+      if isTensor then
+          Ctmp:copy(C1)
+          C1 = Ctmp
+      end
       x[i] = x[i] - 2 * eps
       local C2 = opfunc(x)
       x[i] = tmp
author	R. Gokberk Cinbis <gokberkcinbis@gmail.com>	2016-08-25 01:11:07 +0300
committer	GitHub <noreply@github.com>	2016-08-25 01:11:07 +0300
commit	84501e325f1cf7adf64b10a0b2a8aeb3a0928b32 (patch)
tree	488ac80d1a7bab87b273026832bd35faca8a5599
parent	9c7fc422356a5a50adb15961aa27929371c2ea01 (diff)