diff options
author | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2021-04-20 18:50:53 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2021-04-20 18:50:53 +0300 |
commit | 3e51ff387232f1096e9560980f0115ac734224f5 (patch) | |
tree | 7f6e97f309119c801a42803082e164822a5e2d64 | |
parent | 8a53b761d5bc922e4ab058a4487ad362d2edefaf (diff) |
fix depth-scaling in FFN
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | src/layers/generic.h | 2 |
2 files changed, 2 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index cbab6b5a..752847e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fix compilation with OMP ### Fixed +- Missing depth-scaling in transformer FFN - Fixed an issue when loading intgemm16 models from unaligned memory. - Fix building marian with gcc 9.3+ and FBGEMM - Find MKL installed under Ubuntu 20.04 via apt-get diff --git a/src/layers/generic.h b/src/layers/generic.h index 2746bc85..8f390bd7 100644 --- a/src/layers/generic.h +++ b/src/layers/generic.h @@ -192,7 +192,7 @@ static inline Expr denseInline(Expr x, float dropProb = 0.0f) { auto graph = x->graph(); - auto W = graph->param(prefix + "_W" + suffix, {x->shape()[-1], outDim}, inits::glorotUniform()); + auto W = graph->param(prefix + "_W" + suffix, {x->shape()[-1], outDim}, initFn); auto b = graph->param(prefix + "_b" + suffix, {1, outDim}, inits::zeros()); if(actName == "relu") { |