local SpatialPyramid, parent = torch.class('nn.SpatialPyramid', 'nn.Module') local help_desc = [[ Simplified (and more flexible regarding sizes) fovea: From a given image, generates a pyramid of scales, and process each scale with the given list of processors. The result of each module/scale is then upsampled to produce a homogenous list of 3D feature maps (a table of 3D tensors) grouping the different scales. There are two operating modes: focused [mostly training], and global [inference]. In global mode, the entire input is processed. In focused mode, the fovea is first focused on a particular (x,y) point. This function has two additional parameters, w and h, that represent the size of the OUTPUT of the processors. To focus the fovea, simply call fovea:focus(x,y,w,h) before doing a forward. A call to fovea:focus(nil) makes it unfocus (go back to global mode). If prescaled_input is true, then the input has to be a table of pre-downscaled 3D tensors. It does not work in focus mode. ]] function SpatialPyramid:__init(ratios, processors, kW, kH, dW, dH, xDimIn, yDimIn, xDimOut, yDimOut, prescaled_input) parent.__init(self) self.prescaled_input = prescaled_input or false assert(#ratios == #processors) self.ratios = ratios self.kH = kH self.kW = kW self.dH = dH self.dW = dW self.focused = false self.x = 0 self.y = 0 self.wFocus = 0 self.hFocus = 0 self.processors = processors local wPad = kW-dW local hPad = kH-dH local padLeft = math.floor(wPad/2) local padRight = math.ceil (wPad/2) local padTop = math.floor(hPad/2) local padBottom = math.ceil (hPad/2) -- focused self.focused_pipeline = nn.ConcatTable() for i = 1,#self.ratios do local seq = nn.Sequential() seq:add(nn.SpatialPadding(0,0,0,0, yDimIn, xDimIn)) seq:add(nn.SpatialReSamplingEx{rwidth=1.0/self.ratios[i], rheight=1.0/self.ratios[i], xDim = xDimIn, yDim = yDimIn, mode='average'}) seq:add(processors[i]) self.focused_pipeline:add(seq) end -- unfocused if prescaled_input then self.unfocused_pipeline = nn.ParallelTable() else self.unfocused_pipeline = nn.ConcatTable() end for i = 1,#self.ratios do local seq = nn.Sequential() if not prescaled_input then seq:add(nn.SpatialReSamplingEx{rwidth=1.0/self.ratios[i], rheight=1.0/self.ratios[i], xDim = xDimIn, yDim = yDimIn, mode='average'}) seq:add(nn.SpatialPadding(padLeft, padRight, padTop, padBottom, yDimIn, xDimIn)) end seq:add(processors[i]) seq:add(nn.SpatialReSamplingEx{rwidth=self.ratios[i], rheight=self.ratios[i], xDim=xDimOut, yDim=yDimOut, mode='simple'}) self.unfocused_pipeline:add(seq) end end function SpatialPyramid:focus(x, y, w, h) w = w or 1 h = h or 1 if x and y then self.x = x self.y = y self.focused = true self.winWidth = {} self.winHeight = {} for i = 1,#self.ratios do self.winWidth[i] = self.ratios[i] * ((w-1) * self.dW + self.kW) self.winHeight[i] = self.ratios[i] * ((h-1) * self.dH + self.kH) end else self.focused = false end end function SpatialPyramid:configureFocus(wImg, hImg) for i = 1,#self.ratios do local padder = self.focused_pipeline.modules[i].modules[1] padder.pad_l = -self.x + math.ceil (self.winWidth[i] /2) padder.pad_r = self.x + math.floor(self.winWidth[i] /2) - wImg padder.pad_t = -self.y + math.ceil (self.winHeight[i]/2) padder.pad_b = self.y + math.floor(self.winHeight[i]/2) - hImg end end function SpatialPyramid:checkSize(input) for i = 1,#self.ratios do if (math.fmod(input:size(2), self.ratios[i]) ~= 0) or (math.fmod(input:size(3), self.ratios[i]) ~= 0) then error('SpatialPyramid: input sizes must be multiple of ratios') end end end function SpatialPyramid:updateOutput(input) if not self.prescaled_input then self:checkSize(input) end if self.focused then self:configureFocus(input:size(3), input:size(2)) self.output = self.focused_pipeline:updateOutput(input) else self.output = self.unfocused_pipeline:updateOutput(input) end return self.output end function SpatialPyramid:updateGradInput(input, gradOutput) if self.focused then self.gradInput = self.focused_pipeline:updateGradInput(input, gradOutput) else self.gradInput = self.unfocused_pipeline:updateGradInput(input, gradOutput) end return self.gradInput end function SpatialPyramid:zeroGradParameters() self.focused_pipeline:zeroGradParameters() self.unfocused_pipeline:zeroGradParameters() end function SpatialPyramid:accGradParameters(input, gradOutput, scale) if self.focused then self.focused_pipeline:accGradParameters(input, gradOutput, scale) else self.unfocused_pipeline:accGradParameters(input, gradOutput, scale) end end function SpatialPyramid:updateParameters(learningRate) if self.focused then self.focused_pipeline:updateParameters(learningRate) else self.unfocused_pipeline:updateParameters(learningRate) end end function SpatialPyramid:type(type) parent.type(self, type) self.focused_pipeline:type(type) self.unfocused_pipeline:type(type) return self end function SpatialPyramid:parameters() if self.focused then return self.focused_pipeline:parameters() else return self.unfocused_pipeline:parameters() end end function SpatialPyramid:__tostring__() if self.focused then local dscr = tostring(self.focused_pipeline):gsub('\n', '\n | ') return 'SpatialPyramid (focused)\n' .. dscr else local dscr = tostring(self.unfocused_pipeline):gsub('\n', '\n | ') return 'SpatialPyramid (unfocused)\n' .. dscr end end