From 8616a7f4e8b3e65edf44f5960ab020d7e7f978fc Mon Sep 17 00:00:00 2001
From: vince62s <vince62s@yahoo.com>
Date: Mon, 17 Oct 2022 19:21:59 +0200
Subject: small changes

---
 onmt/decoders/decoder.py                  |  8 --------
 onmt/encoders/cnn_encoder.py              |  2 --
 onmt/encoders/encoder.py                  |  9 ---------
 onmt/encoders/ggnn_encoder.py             |  2 +-
 onmt/encoders/mean_encoder.py             |  1 -
 onmt/encoders/rnn_encoder.py              |  3 +--
 onmt/encoders/transformer.py              |  1 -
 onmt/modules/conv_multi_step_attention.py | 11 -----------
 onmt/modules/copy_generator.py            |  9 ++-------
 onmt/modules/global_attention.py          | 20 +-------------------
 onmt/modules/sparse_losses.py             |  3 ---
 onmt/trainer.py                           | 16 +---------------
 onmt/utils/__init__.py                    |  4 ++--
 onmt/utils/misc.py                        | 10 ----------
 14 files changed, 8 insertions(+), 91 deletions(-)

diff --git a/onmt/decoders/decoder.py b/onmt/decoders/decoder.py
index 8cc8ee08..5481048e 100644
--- a/onmt/decoders/decoder.py
+++ b/onmt/decoders/decoder.py
@@ -5,8 +5,6 @@ from onmt.models.stacked_rnn import StackedLSTM, StackedGRU
 from onmt.modules import context_gate_factory, GlobalAttention
 from onmt.utils.rnn_factory import rnn_factory
 
-from onmt.utils.misc import aeq
-
 
 class DecoderBase(nn.Module):
     """Abstract class for decoders.
@@ -290,11 +288,7 @@ class StdRNNDecoder(RNNDecoderBase):
         else:
             rnn_output, dec_state = self.rnn(emb, self.state["hidden"])
 
-        # Check
         tgt_len, tgt_batch, _ = tgt.size()
-        output_len, output_batch, _ = rnn_output.size()
-        aeq(tgt_len, output_len)
-        aeq(tgt_batch, output_batch)
 
         # Calculate the attention.
         if not self.attentional:
@@ -365,8 +359,6 @@ class InputFeedRNNDecoder(RNNDecoderBase):
         input_feed = self.state["input_feed"].squeeze(0)
         input_feed_batch, _ = input_feed.size()
         _, tgt_batch, _ = tgt.size()
-        aeq(tgt_batch, input_feed_batch)
-        # END Additional args check.
 
         dec_outs = []
         attns = {}
diff --git a/onmt/encoders/cnn_encoder.py b/onmt/encoders/cnn_encoder.py
index 5dae039c..a44e6e8b 100644
--- a/onmt/encoders/cnn_encoder.py
+++ b/onmt/encoders/cnn_encoder.py
@@ -36,10 +36,8 @@ class CNNEncoder(EncoderBase):
 
     def forward(self, input, lengths=None, hidden=None):
         """See :class:`onmt.modules.EncoderBase.forward()`"""
-        self._check_args(input, lengths, hidden)
 
         emb = self.embeddings(input)
-        # s_len, batch, emb_dim = emb.size()
 
         emb = emb.transpose(0, 1).contiguous()
         emb_reshape = emb.view(emb.size(0) * emb.size(1), -1)
diff --git a/onmt/encoders/encoder.py b/onmt/encoders/encoder.py
index 78802015..b6175638 100644
--- a/onmt/encoders/encoder.py
+++ b/onmt/encoders/encoder.py
@@ -2,8 +2,6 @@
 
 import torch.nn as nn
 
-from onmt.utils.misc import aeq
-
 
 class EncoderBase(nn.Module):
     """
@@ -34,12 +32,6 @@ class EncoderBase(nn.Module):
     def from_opt(cls, opt, embeddings=None):
         raise NotImplementedError
 
-    def _check_args(self, src, lengths=None, hidden=None):
-        n_batch = src.size(1)
-        if lengths is not None:
-            n_batch_, = lengths.size()
-            aeq(n_batch, n_batch_)
-
     def forward(self, src, lengths=None):
         """
         Args:
@@ -47,7 +39,6 @@ class EncoderBase(nn.Module):
                padded sequences of sparse indices ``(src_len, batch, nfeat)``
             lengths (LongTensor): length of each sequence ``(batch,)``
 
-
         Returns:
             (FloatTensor, FloatTensor, FloatTensor):
 
diff --git a/onmt/encoders/ggnn_encoder.py b/onmt/encoders/ggnn_encoder.py
index 1ba32e88..4477ede8 100644
--- a/onmt/encoders/ggnn_encoder.py
+++ b/onmt/encoders/ggnn_encoder.py
@@ -191,7 +191,7 @@ class GGNNEncoder(EncoderBase):
 
     def forward(self, src, lengths=None):
         """See :func:`EncoderBase.forward()`"""
-        self._check_args(src, lengths)
+
         nodes = self.n_node
         batch_size = src.size()[1]
         first_extra = np.zeros(batch_size, dtype=np.int32)
diff --git a/onmt/encoders/mean_encoder.py b/onmt/encoders/mean_encoder.py
index 4ec12ff4..b3ee27ec 100644
--- a/onmt/encoders/mean_encoder.py
+++ b/onmt/encoders/mean_encoder.py
@@ -26,7 +26,6 @@ class MeanEncoder(EncoderBase):
 
     def forward(self, src, lengths=None):
         """See :func:`EncoderBase.forward()`"""
-        self._check_args(src, lengths)
 
         emb = self.embeddings(src)
         _, batch, emb_dim = emb.size()
diff --git a/onmt/encoders/rnn_encoder.py b/onmt/encoders/rnn_encoder.py
index 3570dfe1..ed6a26a2 100644
--- a/onmt/encoders/rnn_encoder.py
+++ b/onmt/encoders/rnn_encoder.py
@@ -62,9 +62,8 @@ class RNNEncoder(EncoderBase):
 
     def forward(self, src, lengths=None):
         """See :func:`EncoderBase.forward()`"""
-        self._check_args(src, lengths)
+
         emb = self.embeddings(src)
-        # s_len, batch, emb_dim = emb.size()
 
         packed_emb = emb
         if lengths is not None and not self.no_pack_padded_seq:
diff --git a/onmt/encoders/transformer.py b/onmt/encoders/transformer.py
index afb796b3..3ddbbc42 100644
--- a/onmt/encoders/transformer.py
+++ b/onmt/encoders/transformer.py
@@ -127,7 +127,6 @@ class TransformerEncoder(EncoderBase):
 
     def forward(self, src, lengths=None):
         """See :func:`EncoderBase.forward()`"""
-        self._check_args(src, lengths)
 
         emb = self.embeddings(src)
 
diff --git a/onmt/modules/conv_multi_step_attention.py b/onmt/modules/conv_multi_step_attention.py
index 545df1c9..0499f76f 100644
--- a/onmt/modules/conv_multi_step_attention.py
+++ b/onmt/modules/conv_multi_step_attention.py
@@ -2,7 +2,6 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from onmt.utils.misc import aeq
 
 
 SCALE_WEIGHT = 0.5 ** 0.5
@@ -46,22 +45,12 @@ class ConvMultiStepAttention(nn.Module):
                 which is the combination of base emb and top output of encode
         """
 
-        # checks
-        # batch, channel, height, width = base_target_emb.size()
         batch, _, height, _ = base_target_emb.size()
-        # batch_, channel_, height_, width_ = input_from_dec.size()
         batch_, _, height_, _ = input_from_dec.size()
-        aeq(batch, batch_)
-        aeq(height, height_)
 
-        # enc_batch, enc_channel, enc_height = encoder_out_top.size()
         enc_batch, _, enc_height = encoder_out_top.size()
-        # enc_batch_, enc_channel_, enc_height_ = encoder_out_combine.size()
         enc_batch_, _, enc_height_ = encoder_out_combine.size()
 
-        aeq(enc_batch, enc_batch_)
-        aeq(enc_height, enc_height_)
-
         preatt = seq_linear(self.linear_in, input_from_dec)
         target = (base_target_emb + preatt) * SCALE_WEIGHT
         target = torch.squeeze(target, 3)
diff --git a/onmt/modules/copy_generator.py b/onmt/modules/copy_generator.py
index 7343ab60..d0edbda6 100644
--- a/onmt/modules/copy_generator.py
+++ b/onmt/modules/copy_generator.py
@@ -1,7 +1,6 @@
 import torch
 import torch.nn as nn
 
-from onmt.utils.misc import aeq
 from onmt.utils.loss import CommonLossCompute
 
 
@@ -107,12 +106,8 @@ class CopyGenerator(nn.Module):
                ``(src_len, batch, extra_words)``
         """
 
-        # CHECKS
-        batch_by_tlen, _ = hidden.size()
-        batch_by_tlen_, slen = attn.size()
-        slen_, batch, cvocab = src_map.size()
-        aeq(batch_by_tlen, batch_by_tlen_)
-        aeq(slen, slen_)
+        _, slen = attn.size()
+        _, batch, cvocab = src_map.size()
 
         # Original probabilities.
         logits = self.linear(hidden)
diff --git a/onmt/modules/global_attention.py b/onmt/modules/global_attention.py
index 74a085e0..5687b07b 100644
--- a/onmt/modules/global_attention.py
+++ b/onmt/modules/global_attention.py
@@ -4,7 +4,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 from onmt.modules.sparse_activations import sparsemax
-from onmt.utils.misc import aeq, sequence_mask
+from onmt.utils.misc import sequence_mask
 
 # This class is mainly used by decoder.py for RNNs but also
 # by the CNN / transformer decoder when copy attention is used
@@ -108,9 +108,6 @@ class GlobalAttention(nn.Module):
         # Check input sizes
         src_batch, src_len, src_dim = h_s.size()
         tgt_batch, tgt_len, tgt_dim = h_t.size()
-        aeq(src_batch, tgt_batch)
-        aeq(src_dim, tgt_dim)
-        aeq(self.dim, src_dim)
 
         if self.attn_type in ["general", "dot"]:
             if self.attn_type == "general":
@@ -161,13 +158,8 @@ class GlobalAttention(nn.Module):
 
         batch, source_l, dim = memory_bank.size()
         batch_, target_l, dim_ = source.size()
-        aeq(batch, batch_)
-        aeq(dim, dim_)
-        aeq(self.dim, dim)
         if coverage is not None:
             batch_, source_l_ = coverage.size()
-            aeq(batch, batch_)
-            aeq(source_l, source_l_)
 
         if coverage is not None:
             cover = coverage.view(-1).unsqueeze(1)
@@ -205,23 +197,13 @@ class GlobalAttention(nn.Module):
 
             # Check output sizes
             batch_, dim_ = attn_h.size()
-            aeq(batch, batch_)
-            aeq(dim, dim_)
             batch_, source_l_ = align_vectors.size()
-            aeq(batch, batch_)
-            aeq(source_l, source_l_)
 
         else:
             attn_h = attn_h.transpose(0, 1).contiguous()
             align_vectors = align_vectors.transpose(0, 1).contiguous()
             # Check output sizes
             target_l_, batch_, dim_ = attn_h.size()
-            aeq(target_l, target_l_)
-            aeq(batch, batch_)
-            aeq(dim, dim_)
             target_l_, batch_, source_l_ = align_vectors.size()
-            aeq(target_l, target_l_)
-            aeq(batch, batch_)
-            aeq(source_l, source_l_)
 
         return attn_h, align_vectors
diff --git a/onmt/modules/sparse_losses.py b/onmt/modules/sparse_losses.py
index 08a24e1d..2a5e0482 100644
--- a/onmt/modules/sparse_losses.py
+++ b/onmt/modules/sparse_losses.py
@@ -3,7 +3,6 @@ import torch.nn as nn
 from torch.autograd import Function
 from torch.cuda.amp import custom_fwd, custom_bwd
 from onmt.modules.sparse_activations import _threshold_and_support
-from onmt.utils.misc import aeq
 
 
 class SparsemaxLossFunction(Function):
@@ -16,8 +15,6 @@ class SparsemaxLossFunction(Function):
         target (LongTensor): ``(n,)``, the indices of the target classes
         """
         input_batch, classes = input.size()
-        target_batch = target.size(0)
-        aeq(input_batch, target_batch)
 
         z_k = input.gather(1, target.unsqueeze(1)).squeeze()
         tau_z, support_size = _threshold_and_support(input, dim=1)
diff --git a/onmt/trainer.py b/onmt/trainer.py
index 8c15438d..4da0dc42 100644
--- a/onmt/trainer.py
+++ b/onmt/trainer.py
@@ -198,8 +198,7 @@ class Trainer(object):
         for batch in iterator:
             batches.append(batch)
             if self.norm_method == "tokens":
-                num_tokens = batch['tgtlen'].sum()
-                normalization += num_tokens.item()
+                normalization += batch['tgtlen'].sum().item()
             else:
                 normalization += len(batch['indices'])
             if len(batches) == self.accum_count:
@@ -260,13 +259,6 @@ class Trainer(object):
             # UPDATE DROPOUT
             self._maybe_update_dropout(step)
 
-            if self.gpu_verbose_level > 1:
-                logger.info("GpuRank %d: index: %d", self.gpu_rank, i)
-            if self.gpu_verbose_level > 0:
-                logger.info("GpuRank %d: reduce_counter: %d \
-                            n_minibatch %d"
-                            % (self.gpu_rank, i + 1, len(batches)))
-
             if self.n_gpu > 1:
                 normalization = sum(onmt.utils.distributed
                                     .all_gather_list
@@ -286,15 +278,9 @@ class Trainer(object):
 
             if (valid_iter is not None and step % valid_steps == 0 and
                     self.gpu_rank == 0):
-                if self.gpu_verbose_level > 0:
-                    logger.info('GpuRank %d: validate step %d'
-                                % (self.gpu_rank, step))
 
                 valid_stats = self.validate(
                     valid_iter, moving_average=self.moving_average)
-                if self.gpu_verbose_level > 0:
-                    logger.info('GpuRank %d: report stat step %d'
-                                % (self.gpu_rank, step))
                 self._report_step(self.optim.learning_rate(),
                                   step, valid_stats=valid_stats)
                 # Run patience mechanism
diff --git a/onmt/utils/__init__.py b/onmt/utils/__init__.py
index 6e7873ac..a09b7241 100644
--- a/onmt/utils/__init__.py
+++ b/onmt/utils/__init__.py
@@ -1,5 +1,5 @@
 """Module defining various utilities."""
-from onmt.utils.misc import split_corpus, aeq, use_gpu, set_random_seed
+from onmt.utils.misc import split_corpus, use_gpu, set_random_seed
 from onmt.utils.alignment import make_batch_align_matrix
 from onmt.utils.report_manager import ReportMgr, build_report_manager
 from onmt.utils.statistics import Statistics
@@ -7,7 +7,7 @@ from onmt.utils.optimizers import MultipleOptimizer, \
     Optimizer, AdaFactor
 from onmt.utils.earlystopping import EarlyStopping, scorers_from_opts
 
-__all__ = ["split_corpus", "aeq", "use_gpu", "set_random_seed", "ReportMgr",
+__all__ = ["split_corpus", "use_gpu", "set_random_seed", "ReportMgr",
            "build_report_manager", "Statistics",
            "MultipleOptimizer", "Optimizer", "AdaFactor", "EarlyStopping",
            "scorers_from_opts", "make_batch_align_matrix"]
diff --git a/onmt/utils/misc.py b/onmt/utils/misc.py
index 8794a4be..14599864 100644
--- a/onmt/utils/misc.py
+++ b/onmt/utils/misc.py
@@ -74,16 +74,6 @@ def _split_corpus(path, shard_size):
                 yield shard
 
 
-def aeq(*args):
-    """
-    Assert all arguments have the same value
-    """
-    arguments = (arg for arg in args)
-    first = next(arguments)
-    assert all(arg == first for arg in arguments), \
-        "Not all arguments have the same value: " + str(args)
-
-
 def sequence_mask(lengths, max_len=None):
     """
     Creates a boolean mask from sequence lengths.
-- 
cgit v1.2.3


From 03a1e955303b66702e440014b563af40fca6def8 Mon Sep 17 00:00:00 2001
From: vince62s <vince62s@yahoo.com>
Date: Mon, 17 Oct 2022 20:43:24 +0200
Subject: fix comments.

---
 onmt/modules/conv_multi_step_attention.py |  4 ++--
 onmt/modules/global_attention.py          | 10 ----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/onmt/modules/conv_multi_step_attention.py b/onmt/modules/conv_multi_step_attention.py
index 0499f76f..917ffabc 100644
--- a/onmt/modules/conv_multi_step_attention.py
+++ b/onmt/modules/conv_multi_step_attention.py
@@ -36,8 +36,8 @@ class ConvMultiStepAttention(nn.Module):
                 encoder_out_combine):
         """
         Args:
-            base_target_emb: target emb tensor
-            input_from_dec: output of decode conv
+            base_target_emb: target emb tensor (batch, channel, height, width)
+            input_from_dec: output of dec conv (batch, channel, height, width)
             encoder_out_top: the key matrix for calculation of attetion weight,
                 which is the top output of encode conv
             encoder_out_combine:
diff --git a/onmt/modules/global_attention.py b/onmt/modules/global_attention.py
index 5687b07b..fcade2d9 100644
--- a/onmt/modules/global_attention.py
+++ b/onmt/modules/global_attention.py
@@ -104,8 +104,6 @@ class GlobalAttention(nn.Module):
           FloatTensor: raw attention scores (unnormalized) for each src index
             ``(batch, tgt_len, src_len)``
         """
-
-        # Check input sizes
         src_batch, src_len, src_dim = h_s.size()
         tgt_batch, tgt_len, tgt_dim = h_t.size()
 
@@ -194,16 +192,8 @@ class GlobalAttention(nn.Module):
         if one_step:
             attn_h = attn_h.squeeze(1)
             align_vectors = align_vectors.squeeze(1)
-
-            # Check output sizes
-            batch_, dim_ = attn_h.size()
-            batch_, source_l_ = align_vectors.size()
-
         else:
             attn_h = attn_h.transpose(0, 1).contiguous()
             align_vectors = align_vectors.transpose(0, 1).contiguous()
-            # Check output sizes
-            target_l_, batch_, dim_ = attn_h.size()
-            target_l_, batch_, source_l_ = align_vectors.size()
 
         return attn_h, align_vectors
-- 
cgit v1.2.3