local Bilinear, parent = torch.class('nn.Bilinear', 'nn.Module')

local function isint(x) return type(x) == 'number' and x == math.floor(x) end
function Bilinear:__assertInput(input)
   assert(input and torch.type(input) == 'table' and #input == 2,
      'input should be a table containing two data Tensors')
   assert(input[1]:nDimension() == 2 and input[2]:nDimension() == 2,
      'input Tensors should be two-dimensional')
   assert(input[1]:size(1) == input[2]:size(1),
      'input Tensors should have the same number of rows (instances)')
   assert(input[1]:size(2) == self.weight:size(2),
      'dimensionality of first input is erroneous')
   assert(input[2]:size(2) == self.weight:size(3),
      'dimensionality of second input is erroneous')
end
function Bilinear:__assertInputGradOutput(input, gradOutput)
   assert(input[1]:size(1) == gradOutput:size(1),
      'number of rows in gradOutput does not match input')
   assert(gradOutput:size(2) == self.weight:size(1),
      'number of columns in gradOutput does not output size of layer')
end

function Bilinear:__init(inputSize1, inputSize2, outputSize, bias)

   -- assertions:
   assert(self and inputSize1 and inputSize2 and outputSize,
      'should specify inputSize1 and inputSize2 and outputSize')
   assert(isint(inputSize1) and isint(inputSize2) and isint(outputSize),
      'inputSize1 and inputSize2 and outputSize should be integer numbers')
   assert(inputSize1 > 0 and inputSize2 > 0 and outputSize > 0,
      'inputSize1 and inputSize2 and outputSize should be positive numbers')

   -- set up model:
   parent.__init(self)
   local bias = ((bias == nil) and true) or bias
   self.weight     = torch.Tensor(outputSize, inputSize1, inputSize2)
   self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2)
   if bias then
      self.bias     = torch.Tensor(outputSize)
      self.gradBias = torch.Tensor(outputSize)
   end
   self.gradInput = {torch.Tensor(), torch.Tensor()}
   self:reset()
end

function Bilinear:reset(stdv)
   assert(self)
   if stdv then
      assert(stdv and type(stdv) == 'number' and stdv > 0,
         'standard deviation should be a positive number')
      stdv = stdv * math.sqrt(3)
   else
      stdv = 1 / math.sqrt(self.weight:size(2))
   end
   self.weight:uniform(-stdv, stdv)
   if self.bias then self.bias:uniform(-stdv, stdv) end
   return self
end

function Bilinear:updateOutput(input)
   assert(self)
   self:__assertInput(input)

   -- set up buffer:
   self.buff2 = self.buff2 or input[1].new()
   self.buff2:resizeAs(input[2])

   -- compute output scores:
   self.output:resize(input[1]:size(1), self.weight:size(1))
   for k = 1,self.weight:size(1) do
      torch.mm(self.buff2, input[1], self.weight[k])
      self.buff2:cmul(input[2])
      torch.sum(self.output:narrow(2, k, 1), self.buff2, 2)
   end
   if self.bias then
       self.output:add(
           self.bias:reshape(1, self.bias:nElement()):expandAs(self.output)
       )
   end
   return self.output
end

function Bilinear:updateGradInput(input, gradOutput)
   assert(self)
   if self.gradInput then
      self:__assertInputGradOutput(input, gradOutput)

      if #self.gradInput == 0 then
          for i = 1, 2 do self.gradInput[i] = input[1].new() end
      end

      -- compute d output / d input:
      self.gradInput[1]:resizeAs(input[1]):fill(0)
      self.gradInput[2]:resizeAs(input[2]):fill(0)


       -- do first slice of weight tensor (k = 1)
      self.gradInput[1]:mm(input[2], self.weight[1]:t())
      self.gradInput[1]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[1]:size(1),
          self.gradInput[1]:size(2)))
      self.gradInput[2]:addmm(1, input[1], self.weight[1])
      self.gradInput[2]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[2]:size(1),
          self.gradInput[2]:size(2)))

      -- do remaining slices of weight tensor
      if self.weight:size(1) > 1 then
         self.buff1 = self.buff1 or input[1].new()
         self.buff1:resizeAs(input[1])

         for k = 2, self.weight:size(1) do
            self.buff1:mm(input[2], self.weight[k]:t())
            self.buff1:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[1]:size(1),
              self.gradInput[1]:size(2)))
            self.gradInput[1]:add(self.buff1)

            self.buff2:mm(input[1], self.weight[k])
            self.buff2:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[2]:size(1),
              self.gradInput[2]:size(2)))
            self.gradInput[2]:add(self.buff2)
         end
      end
      return self.gradInput
   end
end

function Bilinear:accGradParameters(input, gradOutput, scale)
   local scale = scale or 1
   self:__assertInputGradOutput(input, gradOutput)
   assert(scale and type(scale) == 'number' and scale >= 0)

   -- make sure we have buffer:
   self.buff1 = self.buff1 or input[1].new()
   self.buff1:resizeAs(input[1])

   -- accumulate parameter gradients:
   for k = 1,self.weight:size(1) do
      torch.cmul(
         self.buff1, input[1], gradOutput:narrow(2, k, 1):expandAs(input[1])
      )
      self.gradWeight[k]:addmm(self.buff1:t(), input[2])
   end
   if self.bias then self.gradBias:add(scale, gradOutput:sum(1)) end
end

function Bilinear:sharedAccUpdateGradParameters(input, gradOutput, lr)
   -- we do not need to accumulate parameters when sharing:
   self:defaultAccUpdateGradParameters(input, gradOutput, lr)
end

function Bilinear:__tostring__()
  return torch.type(self) ..
      string.format(
         '(%dx%d -> %d) %s',
         self.weight:size(2), self.weight:size(3), self.weight:size(1),
         (self.bias == nil and ' without bias' or '')
      )
end

function Bilinear:clearState()
   if self.buff2 then self.buff2:set() end
   if self.buff1 then self.buff1:set() end
   return parent.clearState(self)
end