Update HF Transformers, no longer needed single GPU for prediction

author: ZJaume <jzaragoza@prompsit.com> 2021-11-05 20:56:10 +0300
committer: ZJaume <jzaragoza@prompsit.com> 2021-11-05 20:56:10 +0300
commit: 11a5468d818d223580536353e8697b3df040438e (patch)
tree: 5bd6ab5a84d898bd1709dc0e91a81ddf47682044 /bicleaner_ai
parent: bf3732f93a5290f9f72c5df7f211db05fd3f6a7f (diff)
2 files changed, 15 insertions, 29 deletions
diff --git a/bicleaner_ai/models.py b/bicleaner_ai/models.py
index ac08139..0c20d04 100644
--- a/bicleaner_ai/models.py
+++ b/bicleaner_ai/models.py
@@ -588,20 +588,6 @@ class BCXLMRoberta(BaseModel):
         self.model.save_pretrained(model_filename)
         self.tokenizer.save_pretrained(vocab_filename)
 
-        # predict returns empty output when using multi-gpu
-        # so, reloading model in single gpu is needed for prediction
-        del self.model
-        strategy = tf.distribute.OneDeviceStrategy('/gpu:0')
-        with strategy.scope():
-            self.model = self.load_model(model_filename)
-
-        # Divide the configured batch_size by the number of GPUs
-        # to determine batch_size for single GPU
-        # and reload development set with the new batch_size
-        batch_size = min(1, self.settings["batch_size"]//num_devices)
-        dev_generator.batch_size = batch_size
-        dev_generator.load(dev_set)
-
         y_true = dev_generator.y
         y_pred = self.model.predict(dev_generator, verbose=1).logits
         y_pred_probs = self.softmax_pos_prob(y_pred)
diff --git a/bicleaner_ai/training.py b/bicleaner_ai/training.py
index 6d3a3d4..3bc27b1 100644
--- a/bicleaner_ai/training.py
+++ b/bicleaner_ai/training.py
@@ -276,30 +276,30 @@ def write_metadata(args, classifier, y_true, y_pred, lm_stats):
     out.write(f"f1_score: {f1:.3f}\n")
     out.write(f"matthews_corr_coef: {mcc:.3f}\n")
 
-
     # Writing it by hand (not using YAML libraries) to preserve the order
     out.write(f"source_lang: {args.source_lang}\n")
     out.write(f"target_lang: {args.target_lang}\n")
 
-    # Save base names only if directories are relative
-    if check_relative_path(args.model_dir, args.porn_removal_file):
-        porn_removal_file = os.path.basename(args.porn_removal_file)
-    else:
-        porn_removal_file = args.porn_removal_file
-    if check_relative_path(args.model_dir, args.lm_file_sl):
-        lm_file_sl = os.path.basename(args.lm_file_sl)
-    else:
-        lm_file_sl = args.lm_file_sl
-    if check_relative_path(args.model_dir, args.lm_file_tl):
-        lm_file_tl = os.path.basename(args.lm_file_tl)
-    else:
-        lm_file_tl = args.lm_file_tl
-
     if args.porn_removal_file is not None and args.porn_removal_train is not None:
+        # Save base names only if directories are relative
+        if check_relative_path(args.model_dir, args.porn_removal_file):
+            porn_removal_file = os.path.basename(args.porn_removal_file)
+        else:
+            porn_removal_file = args.porn_removal_file
         out.write(f"porn_removal_file: {porn_removal_file}\n")
         out.write(f"porn_removal_side: {args.porn_removal_side}\n")
 
     if lm_stats is not None and args.lm_file_sl is not None and args.lm_file_tl is not None:
+        # Save base names only if directories are relative
+        if check_relative_path(args.model_dir, args.lm_file_sl):
+            lm_file_sl = os.path.basename(args.lm_file_sl)
+        else:
+            lm_file_sl = args.lm_file_sl
+        if check_relative_path(args.model_dir, args.lm_file_tl):
+            lm_file_tl = os.path.basename(args.lm_file_tl)
+        else:
+            lm_file_tl = args.lm_file_tl
+
         out.write(f"source_lm: {lm_file_sl}\n")
         out.write(f"target_lm: {lm_file_tl}\n")
         out.write(f"lm_type: CHARACTER\n")
author	ZJaume <jzaragoza@prompsit.com>	2021-11-05 20:56:10 +0300
committer	ZJaume <jzaragoza@prompsit.com>	2021-11-05 20:56:10 +0300
commit	11a5468d818d223580536353e8697b3df040438e (patch)
tree	5bd6ab5a84d898bd1709dc0e91a81ddf47682044 /bicleaner_ai
parent	bf3732f93a5290f9f72c5df7f211db05fd3f6a7f (diff)