Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-10-01 06:43:51 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-10-01 06:43:51 +0300
commit966a2d22eb0999e9319d1003b4b55d9fd051d33d (patch)
tree554190cee058e0942fed1f1f92b545ad31bb79e8
parentf3b86f941408b37b0c0236eb5b8b09605b8a713b (diff)
Code for 2D convolution
Untested
-rw-r--r--dnn/nnet.c52
-rw-r--r--dnn/nnet.h10
2 files changed, 62 insertions, 0 deletions
diff --git a/dnn/nnet.c b/dnn/nnet.c
index 73c49fc3..60bde585 100644
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -384,6 +384,58 @@ void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const f
compute_generic_conv1d(&matrix, output, mem, input, layer->nb_inputs, layer->activation);
}
+/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
+ kernel [ out_channels x in_channels x ksize1 x ksize2 ],
+ storing the output as [ out_channels x len2 ].
+ We assume that the output dimension along the ksize1 axis is 1,
+ i.e. processing one frame at a time. */
+void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int len2)
+{
+ int i;
+ int in_stride;
+ in_stride = len2+kheight-1;
+ OPUS_CLEAR(out, out_channels*len2);
+ for (i=0;i<out_channels;i++) {
+ int m;
+ for (m=0;m<in_channels;m++) {
+ int t;
+ for (t=0;t<ktime;t++) {
+ int h;
+ for (h=0;h<kheight;h++) {
+ int j;
+ for (j=0;j<len2;j++) {
+ out[i*len2 + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
+ in[t*in_channels*in_stride + m*in_stride + j + h];
+ }
+ }
+ }
+ }
+ }
+}
+
+#define MAX_CONV2D_INPUTS 2048
+
+void compute_conv2d(const Conv2DLayer *conv, float *out, float *mem, const float *in, int len2, int activation)
+{
+ int i;
+ const float *bias;
+ float in_buf[MAX_CONV2D_INPUTS];
+ int time_stride;
+ celt_assert(in != out);
+ time_stride = conv->in_channels*(len2+conv->kheight);
+ celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
+ OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
+ OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
+ OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
+ bias = conv->bias;
+ conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, len2);
+ if (bias != NULL) {
+ for (i=0;i<conv->out_channels*len2;i++) out[i] += bias[i];
+ }
+ compute_activation(out, out, conv->out_channels*len2, activation);
+}
+
+
void compute_embedding(const EmbeddingLayer *layer, float *output, int input)
{
int i;
diff --git a/dnn/nnet.h b/dnn/nnet.h
index 2b43308a..386d204d 100644
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -75,6 +75,16 @@ typedef struct {
int nb_outputs;
} LinearLayer;
+/* Generic sparse affine transformation. */
+typedef struct {
+ const float *bias;
+ const float *float_weights;
+ int in_channels;
+ int out_channels;
+ int ktime;
+ int kheight;
+} Conv2DLayer;
+
typedef struct {
const float *bias;
const float *input_weights;