LinearWeightNorm.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

local LinearWeightNorm, parent = torch.class('nn.LinearWeightNorm', 'nn.Linear')

function LinearWeightNorm:__init(inputSize, outputSize, bias, eps)
    nn.Module.__init(self) -- Skip nn.Linear constructor

    local bias = ((bias == nil) and true) or bias

    self.eps = eps or 1e-16

    self.outputSize = outputSize
    self.inputSize = inputSize

    self.v = torch.Tensor(outputSize, inputSize)
    self.gradV = torch.Tensor(outputSize, inputSize)

    self.weight = torch.Tensor(outputSize, inputSize)

    self.g = torch.Tensor(outputSize,1)
    self.gradG = torch.Tensor(outputSize,1)

    self.norm = torch.Tensor(outputSize,1)
    self.scale = torch.Tensor(outputSize,1)

    if bias then
        self.bias = torch.Tensor(outputSize)
        self.gradBias = torch.Tensor(outputSize)
    end

    self:reset()
end

function LinearWeightNorm:evaluate()
    if self.train ~= false then
        self:updateWeightMatrix()
    end

    parent.evaluate(self)
end

function LinearWeightNorm:initFromWeight(weight)
    weight = weight or self.weight

    self.g:norm(weight,2,2):clamp(self.eps,math.huge)
    self.v:copy(weight)

    return self
end

function LinearWeightNorm.fromLinear(linear)
    local module = nn.LinearWeightNorm(linear.weight:size(2), linear.weight:size(1), torch.isTensor(linear.bias))
    module.weight:copy(linear.weight)
    module:initFromWeight()

    if linear.bias then
        module.bias:copy(linear.bias)
    end

    return module
end

function LinearWeightNorm:toLinear()
    self:updateWeightMatrix()

    local module = nn.Linear(self.inputSize, self.outputSize, torch.isTensor(self.bias))

    module.weight:copy(self.weight)
    if self.bias then
        module.bias:copy(self.bias)
    end

    return module
end

function LinearWeightNorm:parameters()
    if self.bias then
        return {self.v, self.g, self.bias}, {self.gradV, self.gradG, self.gradBias}
    else
        return {self.v, self.g}, {self.gradV, self.gradG}
    end
end

function LinearWeightNorm:reset(stdv)
    if stdv then
        stdv = stdv * math.sqrt(3)
    else
        stdv = 1 / math.sqrt(self.inputSize)
    end

    self.weight:uniform(-stdv,stdv)
    self:initFromWeight()

    if self.bias then
        self.bias:uniform(-stdv,stdv)
    end
end

function LinearWeightNorm:updateWeightMatrix()
    if self.norm:dim() == 0 then self.norm:resizeAs(self.g) end
    if self.scale:dim() == 0 then self.scale:resizeAs(self.g) end
    if self.weight:dim() == 0 then self.weight:resizeAs(self.v) end

    self.norm:norm(self.v,2,2):clamp(self.eps,math.huge)
    self.scale:cdiv(self.g,self.norm)
    self.weight:cmul(self.v,self.scale:expandAs(self.v))
end

function LinearWeightNorm:updateOutput(input)
    if self.train ~= false then
        self:updateWeightMatrix()
    end

    return parent.updateOutput(self, input)
end

function LinearWeightNorm:accGradParameters(input, gradOutput, scale)
    scale = scale or 1
    if input:dim() == 1 then
        self.gradV:addr(scale, gradOutput, input)
        if self.bias then self.gradBias:add(scale, gradOutput) end
    elseif input:dim() == 2 then
        self.gradV:addmm(scale, gradOutput:t(), input)
        if self.bias then
            -- update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput
            self:updateAddBuffer(input)
            self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer)
        end
    end

    local scale = self.scale:expandAs(self.v)
    local norm = self.norm:expandAs(self.v)

    self.weight:cmul(self.gradV,self.v):cdiv(norm)
    self.gradG:sum(self.weight,2)

    self.gradV:cmul(scale)

    self.weight:cmul(self.v,scale):cdiv(norm)
    self.weight:cmul(self.gradG:expandAs(self.weight))

    self.gradV:add(-1,self.weight)
end

function LinearWeightNorm:defaultAccUpdateGradParameters(input, gradOutput, lr)
    local gradV = self.gradV
    local gradG = self.gradG
    local gradBias = self.gradBias

    self.gradV = self.v
    self.gradG = self.g
    self.gradBias = self.bias

    self:accGradParameters(input, gradOutput, -lr)

    self.gradV = gradV
    self.gradG = gradG
    self.gradBias = gradBias
end

function LinearWeightNorm:clearState()
    nn.utils.clear(self, 'weight', 'norm', 'scale')
    return parent.clearState(self)
end

function LinearWeightNorm:__tostring__()
    return torch.type(self) ..
        string.format('(%d -> %d)', self.inputSize, self.outputSize) ..
        (self.bias == nil and ' without bias' or '')
end