Fix some python lint.

I used mainly pocketlint, a very good Python linter, but also Syntastic, a vim plugin. Didn't get anywhere near fixing all of Syntastic's complaints though. Once I've cleaned up all (or at least most) of the Python lint, we can start doing regular automated lint checks and keep the code clean.
author: Jeroen Vermeulen <jtv@precisiontranslationtools.com> 2015-05-16 10:58:03 +0300
committer: Jeroen Vermeulen <jtv@precisiontranslationtools.com> 2015-05-16 10:58:03 +0300
commit: 0ffe79579eca183161d86ad38bb34ba8bab3c855 (patch)
tree: 426ef93e43acef1ff9ffa1ad5e0c9efeb3142a8d /scripts/training/bilingual-lm/train_nplm.py
parent: f1ed14eb33c86611a9d9355caf6439a087d71d03 (diff)
1 files changed, 107 insertions, 88 deletions
diff --git a/scripts/training/bilingual-lm/train_nplm.py b/scripts/training/bilingual-lm/train_nplm.py
index 356fd798d..7bc74429e 100755
--- a/scripts/training/bilingual-lm/train_nplm.py
+++ b/scripts/training/bilingual-lm/train_nplm.py
@@ -8,7 +8,9 @@ import subprocess
 import sys
 import os
 
-logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
+logging.basicConfig(
+    format='%(asctime)s %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
 parser = argparse.ArgumentParser()
 parser.add_argument("-w", "--working-dir", dest="working_dir")
 parser.add_argument("-c", "--corpus", dest="corpus_stem")
@@ -18,8 +20,10 @@ parser.add_argument("-n", "--ngram-size", dest="ngram_size", type=int)
 parser.add_argument("-b", "--minibatch-size", dest="minibatch_size", type=int)
 parser.add_argument("-s", "--noise", dest="noise", type=int)
 parser.add_argument("-d", "--hidden", dest="hidden", type=int)
-parser.add_argument("-i", "--input-embedding", dest="input_embedding", type=int)
-parser.add_argument("-o", "--output-embedding", dest="output_embedding", type=int)
+parser.add_argument(
+    "-i", "--input-embedding", dest="input_embedding", type=int)
+parser.add_argument(
+    "-o", "--output-embedding", dest="output_embedding", type=int)
 parser.add_argument("-t", "--threads", dest="threads", type=int)
 parser.add_argument("-m", "--output-model", dest="output_model")
 parser.add_argument("-r", "--output-dir", dest="output_dir")
@@ -35,94 +39,109 @@ parser.add_argument("--output_vocab_size", dest="output_vocab_size", type=int)
 
 
 parser.set_defaults(
-    working_dir = "working"
-    ,corpus_stem = "train.10k"
-    ,nplm_home = "/home/bhaddow/tools/nplm"
-    ,epochs = 10
-    ,ngram_size = 14
-    ,minibatch_size=1000
-    ,noise=100
-    ,hidden=750
-    ,input_embedding=150
-    ,output_embedding=150
-    ,threads=1
-    ,output_model = "train.10k"
-    ,output_dir = None
-    ,config_options_file = "config"
-    ,log_file = "log"
-    ,validation_file = None
-    ,activation_fn = "rectifier"
-    ,learning_rate = 1
-    ,input_words_file = None
-    ,output_words_file = None
-    ,input_vocab_size = 0
-    ,output_vocab_size = 0
+    working_dir="working",
+    corpus_stem="train.10k",
+    nplm_home="/home/bhaddow/tools/nplm",
+    epochs=10,
+    ngram_size=14,
+    minibatch_size=1000,
+    noise=100,
+    hidden=750,
+    input_embedding=150,
+    output_embedding=150,
+    threads=1,
+    output_model="train.10k",
+    output_dir=None,
+    config_options_file="config",
+    log_file="log",
+    validation_file=None,
+    activation_fn="rectifier",
+    learning_rate=1,
+    input_words_file=None,
+    output_words_file=None,
+    input_vocab_size=0,
+    output_vocab_size=0
     )
 
+
 def main(options):
 
-  vocab_command = []
-  if options.input_words_file is not None:
-    vocab_command += ['--input_words_file', options.input_words_file]
-  if options.output_words_file is not None:
-    vocab_command += ['--output_words_file', options.output_words_file]
-  if options.input_vocab_size:
-    vocab_command += ['--input_vocab_size', str(options.input_vocab_size)]
-  if options.output_vocab_size:
-    vocab_command += ['--output_vocab_size', str(options.output_vocab_size)]
-
-  # Set up validation command variable to use with validation set.
-  validations_command = []
-  if options.validation_file is not None:
-    validations_command =["--validation_file", (options.validation_file + ".numberized")]
-
-  # In order to allow for different models to be trained after the same
-  # preparation step, we should provide an option for multiple output directories
-  # If we have not set output_dir, set it to the same thing as the working dir
-
-  if options.output_dir is None:
-    options.output_dir = options.working_dir
-  else:
-    # Create output dir if necessary
-    if not os.path.exists(options.output_dir):
-      os.makedirs(options.output_dir)
-
-  config_file = os.path.join(options.output_dir, options.config_options_file + '-' + options.output_model)
-  log_file = os.path.join(options.output_dir, options.log_file + '-' + options.output_model)
-  log_file_write = open(log_file, 'w')
-  config_file_write = open(config_file, 'w')
-
-  config_file_write.write("Called: " + ' '.join(sys.argv) + '\n\n')
-
-  in_file = os.path.join(options.working_dir, os.path.basename(options.corpus_stem) + ".numberized")
-
-  model_prefix = os.path.join(options.output_dir, options.output_model + ".model.nplm")
-  train_args = [options.nplm_home + "/src/trainNeuralNetwork",
-                "--train_file", in_file,
-                "--num_epochs", str(options.epochs),
-                "--model_prefix", model_prefix,
-                "--learning_rate", str(options.learning_rate),
-                "--minibatch_size", str(options.minibatch_size),
-                "--num_noise_samples", str(options.noise),
-                "--num_hidden", str(options.hidden),
-                "--input_embedding_dimension", str(options.input_embedding),
-                "--output_embedding_dimension", str(options.output_embedding),
-                "--num_threads", str(options.threads),
-                "--activation_function", options.activation_fn] + validations_command + vocab_command
-  print("Train model command: ")
-  print(', '.join(train_args))
-
-  config_file_write.write("Training step:\n" + ' '.join(train_args) + '\n')
-  config_file_write.close()
-
-  log_file_write.write("Training output:\n")
-  ret = subprocess.call(train_args, stdout=log_file_write, stderr=log_file_write)
-  if ret: 
-      raise Exception("Training failed")
-
-  log_file_write.close()
+    vocab_command = []
+    if options.input_words_file is not None:
+        vocab_command += ['--input_words_file', options.input_words_file]
+    if options.output_words_file is not None:
+        vocab_command += ['--output_words_file', options.output_words_file]
+    if options.input_vocab_size:
+        vocab_command += ['--input_vocab_size', str(options.input_vocab_size)]
+    if options.output_vocab_size:
+        vocab_command += [
+            '--output_vocab_size', str(options.output_vocab_size)]
+
+    # Set up validation command variable to use with validation set.
+    validations_command = []
+    if options.validation_file is not None:
+        validations_command = [
+            "--validation_file", (options.validation_file + ".numberized")]
+
+    # In order to allow for different models to be trained after the same
+    # preparation step, we should provide an option for multiple output
+    # directories.
+    # If we have not set output_dir, set it to the same thing as the working
+    # dir.
+
+    if options.output_dir is None:
+        options.output_dir = options.working_dir
+    else:
+        # Create output dir if necessary
+        if not os.path.exists(options.output_dir):
+            os.makedirs(options.output_dir)
+
+    config_file = os.path.join(
+        options.output_dir,
+        options.config_options_file + '-' + options.output_model)
+    log_file = os.path.join(
+        options.output_dir, options.log_file + '-' + options.output_model)
+    log_file_write = open(log_file, 'w')
+    config_file_write = open(config_file, 'w')
+
+    config_file_write.write("Called: " + ' '.join(sys.argv) + '\n\n')
+
+    in_file = os.path.join(
+        options.working_dir,
+        os.path.basename(options.corpus_stem) + ".numberized")
+
+    model_prefix = os.path.join(
+        options.output_dir, options.output_model + ".model.nplm")
+    train_args = [
+        options.nplm_home + "/src/trainNeuralNetwork",
+        "--train_file", in_file,
+        "--num_epochs", str(options.epochs),
+        "--model_prefix", model_prefix,
+        "--learning_rate", str(options.learning_rate),
+        "--minibatch_size", str(options.minibatch_size),
+        "--num_noise_samples", str(options.noise),
+        "--num_hidden", str(options.hidden),
+        "--input_embedding_dimension", str(options.input_embedding),
+        "--output_embedding_dimension", str(options.output_embedding),
+        "--num_threads", str(options.threads),
+        "--activation_function",
+        options.activation_fn,
+    ] + validations_command + vocab_command
+    print("Train model command: ")
+    print(', '.join(train_args))
+
+    config_file_write.write("Training step:\n" + ' '.join(train_args) + '\n')
+    config_file_write.close()
+
+    log_file_write.write("Training output:\n")
+    ret = subprocess.call(
+        train_args, stdout=log_file_write, stderr=log_file_write)
+    if ret:
+        raise Exception("Training failed")
+
+    log_file_write.close()
 
-if __name__ == "__main__":
-  options = parser.parse_args()
-  main(options)
 
+if __name__ == "__main__":
+    options = parser.parse_args()
+    main(options)
author	Jeroen Vermeulen <jtv@precisiontranslationtools.com>	2015-05-16 10:58:03 +0300
committer	Jeroen Vermeulen <jtv@precisiontranslationtools.com>	2015-05-16 10:58:03 +0300
commit	0ffe79579eca183161d86ad38bb34ba8bab3c855 (patch)
tree	426ef93e43acef1ff9ffa1ad5e0c9efeb3142a8d /scripts/training/bilingual-lm/train_nplm.py
parent	f1ed14eb33c86611a9d9355caf6439a087d71d03 (diff)