Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/bitextor/bicleaner-ai.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZJaume <jzaragoza@prompsit.com>2021-11-05 20:56:10 +0300
committerZJaume <jzaragoza@prompsit.com>2021-11-05 20:56:10 +0300
commit11a5468d818d223580536353e8697b3df040438e (patch)
tree5bd6ab5a84d898bd1709dc0e91a81ddf47682044 /bicleaner_ai
parentbf3732f93a5290f9f72c5df7f211db05fd3f6a7f (diff)
Update HF Transformers, no longer needed single GPU for prediction
Diffstat (limited to 'bicleaner_ai')
-rw-r--r--bicleaner_ai/models.py14
-rw-r--r--bicleaner_ai/training.py30
2 files changed, 15 insertions, 29 deletions
diff --git a/bicleaner_ai/models.py b/bicleaner_ai/models.py
index ac08139..0c20d04 100644
--- a/bicleaner_ai/models.py
+++ b/bicleaner_ai/models.py
@@ -588,20 +588,6 @@ class BCXLMRoberta(BaseModel):
self.model.save_pretrained(model_filename)
self.tokenizer.save_pretrained(vocab_filename)
- # predict returns empty output when using multi-gpu
- # so, reloading model in single gpu is needed for prediction
- del self.model
- strategy = tf.distribute.OneDeviceStrategy('/gpu:0')
- with strategy.scope():
- self.model = self.load_model(model_filename)
-
- # Divide the configured batch_size by the number of GPUs
- # to determine batch_size for single GPU
- # and reload development set with the new batch_size
- batch_size = min(1, self.settings["batch_size"]//num_devices)
- dev_generator.batch_size = batch_size
- dev_generator.load(dev_set)
-
y_true = dev_generator.y
y_pred = self.model.predict(dev_generator, verbose=1).logits
y_pred_probs = self.softmax_pos_prob(y_pred)
diff --git a/bicleaner_ai/training.py b/bicleaner_ai/training.py
index 6d3a3d4..3bc27b1 100644
--- a/bicleaner_ai/training.py
+++ b/bicleaner_ai/training.py
@@ -276,30 +276,30 @@ def write_metadata(args, classifier, y_true, y_pred, lm_stats):
out.write(f"f1_score: {f1:.3f}\n")
out.write(f"matthews_corr_coef: {mcc:.3f}\n")
-
# Writing it by hand (not using YAML libraries) to preserve the order
out.write(f"source_lang: {args.source_lang}\n")
out.write(f"target_lang: {args.target_lang}\n")
- # Save base names only if directories are relative
- if check_relative_path(args.model_dir, args.porn_removal_file):
- porn_removal_file = os.path.basename(args.porn_removal_file)
- else:
- porn_removal_file = args.porn_removal_file
- if check_relative_path(args.model_dir, args.lm_file_sl):
- lm_file_sl = os.path.basename(args.lm_file_sl)
- else:
- lm_file_sl = args.lm_file_sl
- if check_relative_path(args.model_dir, args.lm_file_tl):
- lm_file_tl = os.path.basename(args.lm_file_tl)
- else:
- lm_file_tl = args.lm_file_tl
-
if args.porn_removal_file is not None and args.porn_removal_train is not None:
+ # Save base names only if directories are relative
+ if check_relative_path(args.model_dir, args.porn_removal_file):
+ porn_removal_file = os.path.basename(args.porn_removal_file)
+ else:
+ porn_removal_file = args.porn_removal_file
out.write(f"porn_removal_file: {porn_removal_file}\n")
out.write(f"porn_removal_side: {args.porn_removal_side}\n")
if lm_stats is not None and args.lm_file_sl is not None and args.lm_file_tl is not None:
+ # Save base names only if directories are relative
+ if check_relative_path(args.model_dir, args.lm_file_sl):
+ lm_file_sl = os.path.basename(args.lm_file_sl)
+ else:
+ lm_file_sl = args.lm_file_sl
+ if check_relative_path(args.model_dir, args.lm_file_tl):
+ lm_file_tl = os.path.basename(args.lm_file_tl)
+ else:
+ lm_file_tl = args.lm_file_tl
+
out.write(f"source_lm: {lm_file_sl}\n")
out.write(f"target_lm: {lm_file_tl}\n")
out.write(f"lm_type: CHARACTER\n")