switched to optimal-scale soft quantization

author: Jan Buethe <jbuethe@amazon.de> 2024-01-04 16:55:57 +0300
committer: Jan Buethe <jbuethe@amazon.de> 2024-01-04 16:55:57 +0300
commit: 0acae390c42b7a98e1da0fa46aedb2077b91898f (patch)
tree: 5ff871e37e9fde14a4371fc8e45a890ab456d902
parent: 3a63463b9854f3fbbb3321e38f11d5ea351e3a8c (diff)
1 files changed, 49 insertions, 4 deletions
diff --git a/dnn/torch/osce/utils/softquant.py b/dnn/torch/osce/utils/softquant.py
index f62ee020..5fca5b2a 100644
--- a/dnn/torch/osce/utils/softquant.py
+++ b/dnn/torch/osce/utils/softquant.py
@@ -1,5 +1,47 @@
 import torch
 
+@torch.no_grad()
+def compute_optimal_scale(weight):
+    with torch.no_grad():
+        n_out, n_in = weight.shape
+        assert n_in % 4 == 0
+        if n_out % 8:
+            # add padding
+            pad = n_out - n_out % 8
+            weight = torch.cat((weight, torch.zeros((pad, n_in), dtype=weight.dtype, device=weight.device)), dim=0)
+
+        weight_max_abs, _ = torch.max(torch.abs(weight), dim=1)
+        weight_max_sum, _ = torch.max(torch.abs(weight[:, : n_in : 2] + weight[:, 1 : n_in : 2]), dim=1)
+        scale_max = weight_max_abs / 127
+        scale_sum = weight_max_sum / 129
+
+        scale = torch.maximum(scale_max, scale_sum)
+
+    return scale[:n_out]
+
+@torch.no_grad()
+def q_scaled_noise(module, weight):
+    if isinstance(module, torch.nn.Conv1d):
+        w = weight.permute(0, 2, 1).flatten(1)
+        noise = torch.rand_like(w) - 0.5
+        scale = compute_optimal_scale(w)
+        noise = noise * scale.unsqueeze(-1)
+        noise = noise.reshape(weight.size(0), weight.size(2), weight.size(1)).permute(0, 2, 1)
+    elif isinstance(module, torch.nn.ConvTranspose1d):
+        i, o, k = weight.shape
+        w = weight.permute(2, 1, 0).reshape(k * o, i)
+        noise = torch.rand_like(w) - 0.5
+        scale = compute_optimal_scale(w)
+        noise = noise * scale.unsqueeze(-1)
+        noise = noise.reshape(k, o, i).permute(2, 1, 0)
+    elif len(weight.shape) == 2:
+        noise = torch.rand_like(weight) - 0.5
+        scale = compute_optimal_scale(weight)
+        noise = noise * scale.unsqueeze(-1)
+    else:
+        raise ValueError('unknown quantization setting')
+
+    return noise
 
 class SoftQuant:
     name: str
@@ -16,8 +58,11 @@ class SoftQuant:
             self.quantization_noise = dict()
             for name in self.names:
                 weight = getattr(module, name)
-                self.quantization_noise[name] = \
-                    self.scale * weight.abs().max() * 2 * (torch.rand_like(weight) - 0.5)
+                if self.scale is None:
+                    self.quantization_noise[name] = q_scaled_noise(module, weight)
+                else:
+                    self.quantization_noise[name] = \
+                        self.scale * weight.abs().max() * (torch.rand_like(weight) - 0.5)
                 with torch.no_grad():
                     weight.data[:] = weight + self.quantization_noise[name]
         else:
@@ -27,7 +72,7 @@ class SoftQuant:
                     weight.data[:] = weight - self.quantization_noise[name]
             self.quantization_noise = None
 
-    def apply(module, names=['weight'], scale=0.5/127):
+    def apply(module, names=['weight'], scale=None):
         fn = SoftQuant(names, scale)
 
         for name in names:
@@ -48,7 +93,7 @@ class SoftQuant:
         return fn
 
 
-def soft_quant(module, names=['weight'], scale=0.5/127):
+def soft_quant(module, names=['weight'], scale=None):
     fn = SoftQuant.apply(module, names, scale)
     return module
author	Jan Buethe <jbuethe@amazon.de>	2024-01-04 16:55:57 +0300
committer	Jan Buethe <jbuethe@amazon.de>	2024-01-04 16:55:57 +0300
commit	0acae390c42b7a98e1da0fa46aedb2077b91898f (patch)
tree	5ff871e37e9fde14a4371fc8e45a890ab456d902
parent	3a63463b9854f3fbbb3321e38f11d5ea351e3a8c (diff)