From 8616a7f4e8b3e65edf44f5960ab020d7e7f978fc Mon Sep 17 00:00:00 2001 From: vince62s Date: Mon, 17 Oct 2022 19:21:59 +0200 Subject: small changes --- onmt/decoders/decoder.py | 8 -------- onmt/encoders/cnn_encoder.py | 2 -- onmt/encoders/encoder.py | 9 --------- onmt/encoders/ggnn_encoder.py | 2 +- onmt/encoders/mean_encoder.py | 1 - onmt/encoders/rnn_encoder.py | 3 +-- onmt/encoders/transformer.py | 1 - onmt/modules/conv_multi_step_attention.py | 11 ----------- onmt/modules/copy_generator.py | 9 ++------- onmt/modules/global_attention.py | 20 +------------------- onmt/modules/sparse_losses.py | 3 --- onmt/trainer.py | 16 +--------------- onmt/utils/__init__.py | 4 ++-- onmt/utils/misc.py | 10 ---------- 14 files changed, 8 insertions(+), 91 deletions(-) diff --git a/onmt/decoders/decoder.py b/onmt/decoders/decoder.py index 8cc8ee08..5481048e 100644 --- a/onmt/decoders/decoder.py +++ b/onmt/decoders/decoder.py @@ -5,8 +5,6 @@ from onmt.models.stacked_rnn import StackedLSTM, StackedGRU from onmt.modules import context_gate_factory, GlobalAttention from onmt.utils.rnn_factory import rnn_factory -from onmt.utils.misc import aeq - class DecoderBase(nn.Module): """Abstract class for decoders. @@ -290,11 +288,7 @@ class StdRNNDecoder(RNNDecoderBase): else: rnn_output, dec_state = self.rnn(emb, self.state["hidden"]) - # Check tgt_len, tgt_batch, _ = tgt.size() - output_len, output_batch, _ = rnn_output.size() - aeq(tgt_len, output_len) - aeq(tgt_batch, output_batch) # Calculate the attention. if not self.attentional: @@ -365,8 +359,6 @@ class InputFeedRNNDecoder(RNNDecoderBase): input_feed = self.state["input_feed"].squeeze(0) input_feed_batch, _ = input_feed.size() _, tgt_batch, _ = tgt.size() - aeq(tgt_batch, input_feed_batch) - # END Additional args check. dec_outs = [] attns = {} diff --git a/onmt/encoders/cnn_encoder.py b/onmt/encoders/cnn_encoder.py index 5dae039c..a44e6e8b 100644 --- a/onmt/encoders/cnn_encoder.py +++ b/onmt/encoders/cnn_encoder.py @@ -36,10 +36,8 @@ class CNNEncoder(EncoderBase): def forward(self, input, lengths=None, hidden=None): """See :class:`onmt.modules.EncoderBase.forward()`""" - self._check_args(input, lengths, hidden) emb = self.embeddings(input) - # s_len, batch, emb_dim = emb.size() emb = emb.transpose(0, 1).contiguous() emb_reshape = emb.view(emb.size(0) * emb.size(1), -1) diff --git a/onmt/encoders/encoder.py b/onmt/encoders/encoder.py index 78802015..b6175638 100644 --- a/onmt/encoders/encoder.py +++ b/onmt/encoders/encoder.py @@ -2,8 +2,6 @@ import torch.nn as nn -from onmt.utils.misc import aeq - class EncoderBase(nn.Module): """ @@ -34,12 +32,6 @@ class EncoderBase(nn.Module): def from_opt(cls, opt, embeddings=None): raise NotImplementedError - def _check_args(self, src, lengths=None, hidden=None): - n_batch = src.size(1) - if lengths is not None: - n_batch_, = lengths.size() - aeq(n_batch, n_batch_) - def forward(self, src, lengths=None): """ Args: @@ -47,7 +39,6 @@ class EncoderBase(nn.Module): padded sequences of sparse indices ``(src_len, batch, nfeat)`` lengths (LongTensor): length of each sequence ``(batch,)`` - Returns: (FloatTensor, FloatTensor, FloatTensor): diff --git a/onmt/encoders/ggnn_encoder.py b/onmt/encoders/ggnn_encoder.py index 1ba32e88..4477ede8 100644 --- a/onmt/encoders/ggnn_encoder.py +++ b/onmt/encoders/ggnn_encoder.py @@ -191,7 +191,7 @@ class GGNNEncoder(EncoderBase): def forward(self, src, lengths=None): """See :func:`EncoderBase.forward()`""" - self._check_args(src, lengths) + nodes = self.n_node batch_size = src.size()[1] first_extra = np.zeros(batch_size, dtype=np.int32) diff --git a/onmt/encoders/mean_encoder.py b/onmt/encoders/mean_encoder.py index 4ec12ff4..b3ee27ec 100644 --- a/onmt/encoders/mean_encoder.py +++ b/onmt/encoders/mean_encoder.py @@ -26,7 +26,6 @@ class MeanEncoder(EncoderBase): def forward(self, src, lengths=None): """See :func:`EncoderBase.forward()`""" - self._check_args(src, lengths) emb = self.embeddings(src) _, batch, emb_dim = emb.size() diff --git a/onmt/encoders/rnn_encoder.py b/onmt/encoders/rnn_encoder.py index 3570dfe1..ed6a26a2 100644 --- a/onmt/encoders/rnn_encoder.py +++ b/onmt/encoders/rnn_encoder.py @@ -62,9 +62,8 @@ class RNNEncoder(EncoderBase): def forward(self, src, lengths=None): """See :func:`EncoderBase.forward()`""" - self._check_args(src, lengths) + emb = self.embeddings(src) - # s_len, batch, emb_dim = emb.size() packed_emb = emb if lengths is not None and not self.no_pack_padded_seq: diff --git a/onmt/encoders/transformer.py b/onmt/encoders/transformer.py index afb796b3..3ddbbc42 100644 --- a/onmt/encoders/transformer.py +++ b/onmt/encoders/transformer.py @@ -127,7 +127,6 @@ class TransformerEncoder(EncoderBase): def forward(self, src, lengths=None): """See :func:`EncoderBase.forward()`""" - self._check_args(src, lengths) emb = self.embeddings(src) diff --git a/onmt/modules/conv_multi_step_attention.py b/onmt/modules/conv_multi_step_attention.py index 545df1c9..0499f76f 100644 --- a/onmt/modules/conv_multi_step_attention.py +++ b/onmt/modules/conv_multi_step_attention.py @@ -2,7 +2,6 @@ import torch import torch.nn as nn import torch.nn.functional as F -from onmt.utils.misc import aeq SCALE_WEIGHT = 0.5 ** 0.5 @@ -46,22 +45,12 @@ class ConvMultiStepAttention(nn.Module): which is the combination of base emb and top output of encode """ - # checks - # batch, channel, height, width = base_target_emb.size() batch, _, height, _ = base_target_emb.size() - # batch_, channel_, height_, width_ = input_from_dec.size() batch_, _, height_, _ = input_from_dec.size() - aeq(batch, batch_) - aeq(height, height_) - # enc_batch, enc_channel, enc_height = encoder_out_top.size() enc_batch, _, enc_height = encoder_out_top.size() - # enc_batch_, enc_channel_, enc_height_ = encoder_out_combine.size() enc_batch_, _, enc_height_ = encoder_out_combine.size() - aeq(enc_batch, enc_batch_) - aeq(enc_height, enc_height_) - preatt = seq_linear(self.linear_in, input_from_dec) target = (base_target_emb + preatt) * SCALE_WEIGHT target = torch.squeeze(target, 3) diff --git a/onmt/modules/copy_generator.py b/onmt/modules/copy_generator.py index 7343ab60..d0edbda6 100644 --- a/onmt/modules/copy_generator.py +++ b/onmt/modules/copy_generator.py @@ -1,7 +1,6 @@ import torch import torch.nn as nn -from onmt.utils.misc import aeq from onmt.utils.loss import CommonLossCompute @@ -107,12 +106,8 @@ class CopyGenerator(nn.Module): ``(src_len, batch, extra_words)`` """ - # CHECKS - batch_by_tlen, _ = hidden.size() - batch_by_tlen_, slen = attn.size() - slen_, batch, cvocab = src_map.size() - aeq(batch_by_tlen, batch_by_tlen_) - aeq(slen, slen_) + _, slen = attn.size() + _, batch, cvocab = src_map.size() # Original probabilities. logits = self.linear(hidden) diff --git a/onmt/modules/global_attention.py b/onmt/modules/global_attention.py index 74a085e0..5687b07b 100644 --- a/onmt/modules/global_attention.py +++ b/onmt/modules/global_attention.py @@ -4,7 +4,7 @@ import torch.nn as nn import torch.nn.functional as F from onmt.modules.sparse_activations import sparsemax -from onmt.utils.misc import aeq, sequence_mask +from onmt.utils.misc import sequence_mask # This class is mainly used by decoder.py for RNNs but also # by the CNN / transformer decoder when copy attention is used @@ -108,9 +108,6 @@ class GlobalAttention(nn.Module): # Check input sizes src_batch, src_len, src_dim = h_s.size() tgt_batch, tgt_len, tgt_dim = h_t.size() - aeq(src_batch, tgt_batch) - aeq(src_dim, tgt_dim) - aeq(self.dim, src_dim) if self.attn_type in ["general", "dot"]: if self.attn_type == "general": @@ -161,13 +158,8 @@ class GlobalAttention(nn.Module): batch, source_l, dim = memory_bank.size() batch_, target_l, dim_ = source.size() - aeq(batch, batch_) - aeq(dim, dim_) - aeq(self.dim, dim) if coverage is not None: batch_, source_l_ = coverage.size() - aeq(batch, batch_) - aeq(source_l, source_l_) if coverage is not None: cover = coverage.view(-1).unsqueeze(1) @@ -205,23 +197,13 @@ class GlobalAttention(nn.Module): # Check output sizes batch_, dim_ = attn_h.size() - aeq(batch, batch_) - aeq(dim, dim_) batch_, source_l_ = align_vectors.size() - aeq(batch, batch_) - aeq(source_l, source_l_) else: attn_h = attn_h.transpose(0, 1).contiguous() align_vectors = align_vectors.transpose(0, 1).contiguous() # Check output sizes target_l_, batch_, dim_ = attn_h.size() - aeq(target_l, target_l_) - aeq(batch, batch_) - aeq(dim, dim_) target_l_, batch_, source_l_ = align_vectors.size() - aeq(target_l, target_l_) - aeq(batch, batch_) - aeq(source_l, source_l_) return attn_h, align_vectors diff --git a/onmt/modules/sparse_losses.py b/onmt/modules/sparse_losses.py index 08a24e1d..2a5e0482 100644 --- a/onmt/modules/sparse_losses.py +++ b/onmt/modules/sparse_losses.py @@ -3,7 +3,6 @@ import torch.nn as nn from torch.autograd import Function from torch.cuda.amp import custom_fwd, custom_bwd from onmt.modules.sparse_activations import _threshold_and_support -from onmt.utils.misc import aeq class SparsemaxLossFunction(Function): @@ -16,8 +15,6 @@ class SparsemaxLossFunction(Function): target (LongTensor): ``(n,)``, the indices of the target classes """ input_batch, classes = input.size() - target_batch = target.size(0) - aeq(input_batch, target_batch) z_k = input.gather(1, target.unsqueeze(1)).squeeze() tau_z, support_size = _threshold_and_support(input, dim=1) diff --git a/onmt/trainer.py b/onmt/trainer.py index 8c15438d..4da0dc42 100644 --- a/onmt/trainer.py +++ b/onmt/trainer.py @@ -198,8 +198,7 @@ class Trainer(object): for batch in iterator: batches.append(batch) if self.norm_method == "tokens": - num_tokens = batch['tgtlen'].sum() - normalization += num_tokens.item() + normalization += batch['tgtlen'].sum().item() else: normalization += len(batch['indices']) if len(batches) == self.accum_count: @@ -260,13 +259,6 @@ class Trainer(object): # UPDATE DROPOUT self._maybe_update_dropout(step) - if self.gpu_verbose_level > 1: - logger.info("GpuRank %d: index: %d", self.gpu_rank, i) - if self.gpu_verbose_level > 0: - logger.info("GpuRank %d: reduce_counter: %d \ - n_minibatch %d" - % (self.gpu_rank, i + 1, len(batches))) - if self.n_gpu > 1: normalization = sum(onmt.utils.distributed .all_gather_list @@ -286,15 +278,9 @@ class Trainer(object): if (valid_iter is not None and step % valid_steps == 0 and self.gpu_rank == 0): - if self.gpu_verbose_level > 0: - logger.info('GpuRank %d: validate step %d' - % (self.gpu_rank, step)) valid_stats = self.validate( valid_iter, moving_average=self.moving_average) - if self.gpu_verbose_level > 0: - logger.info('GpuRank %d: report stat step %d' - % (self.gpu_rank, step)) self._report_step(self.optim.learning_rate(), step, valid_stats=valid_stats) # Run patience mechanism diff --git a/onmt/utils/__init__.py b/onmt/utils/__init__.py index 6e7873ac..a09b7241 100644 --- a/onmt/utils/__init__.py +++ b/onmt/utils/__init__.py @@ -1,5 +1,5 @@ """Module defining various utilities.""" -from onmt.utils.misc import split_corpus, aeq, use_gpu, set_random_seed +from onmt.utils.misc import split_corpus, use_gpu, set_random_seed from onmt.utils.alignment import make_batch_align_matrix from onmt.utils.report_manager import ReportMgr, build_report_manager from onmt.utils.statistics import Statistics @@ -7,7 +7,7 @@ from onmt.utils.optimizers import MultipleOptimizer, \ Optimizer, AdaFactor from onmt.utils.earlystopping import EarlyStopping, scorers_from_opts -__all__ = ["split_corpus", "aeq", "use_gpu", "set_random_seed", "ReportMgr", +__all__ = ["split_corpus", "use_gpu", "set_random_seed", "ReportMgr", "build_report_manager", "Statistics", "MultipleOptimizer", "Optimizer", "AdaFactor", "EarlyStopping", "scorers_from_opts", "make_batch_align_matrix"] diff --git a/onmt/utils/misc.py b/onmt/utils/misc.py index 8794a4be..14599864 100644 --- a/onmt/utils/misc.py +++ b/onmt/utils/misc.py @@ -74,16 +74,6 @@ def _split_corpus(path, shard_size): yield shard -def aeq(*args): - """ - Assert all arguments have the same value - """ - arguments = (arg for arg in args) - first = next(arguments) - assert all(arg == first for arg in arguments), \ - "Not all arguments have the same value: " + str(args) - - def sequence_mask(lengths, max_len=None): """ Creates a boolean mask from sequence lengths. -- cgit v1.2.3 From 03a1e955303b66702e440014b563af40fca6def8 Mon Sep 17 00:00:00 2001 From: vince62s Date: Mon, 17 Oct 2022 20:43:24 +0200 Subject: fix comments. --- onmt/modules/conv_multi_step_attention.py | 4 ++-- onmt/modules/global_attention.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/onmt/modules/conv_multi_step_attention.py b/onmt/modules/conv_multi_step_attention.py index 0499f76f..917ffabc 100644 --- a/onmt/modules/conv_multi_step_attention.py +++ b/onmt/modules/conv_multi_step_attention.py @@ -36,8 +36,8 @@ class ConvMultiStepAttention(nn.Module): encoder_out_combine): """ Args: - base_target_emb: target emb tensor - input_from_dec: output of decode conv + base_target_emb: target emb tensor (batch, channel, height, width) + input_from_dec: output of dec conv (batch, channel, height, width) encoder_out_top: the key matrix for calculation of attetion weight, which is the top output of encode conv encoder_out_combine: diff --git a/onmt/modules/global_attention.py b/onmt/modules/global_attention.py index 5687b07b..fcade2d9 100644 --- a/onmt/modules/global_attention.py +++ b/onmt/modules/global_attention.py @@ -104,8 +104,6 @@ class GlobalAttention(nn.Module): FloatTensor: raw attention scores (unnormalized) for each src index ``(batch, tgt_len, src_len)`` """ - - # Check input sizes src_batch, src_len, src_dim = h_s.size() tgt_batch, tgt_len, tgt_dim = h_t.size() @@ -194,16 +192,8 @@ class GlobalAttention(nn.Module): if one_step: attn_h = attn_h.squeeze(1) align_vectors = align_vectors.squeeze(1) - - # Check output sizes - batch_, dim_ = attn_h.size() - batch_, source_l_ = align_vectors.size() - else: attn_h = attn_h.transpose(0, 1).contiguous() align_vectors = align_vectors.transpose(0, 1).contiguous() - # Check output sizes - target_l_, batch_, dim_ = attn_h.size() - target_l_, batch_, source_l_ = align_vectors.size() return attn_h, align_vectors -- cgit v1.2.3