diff options
author | ZJaume <jzaragoza@prompsit.com> | 2022-07-05 17:34:41 +0300 |
---|---|---|
committer | ZJaume <jzaragoza@prompsit.com> | 2022-07-05 17:34:41 +0300 |
commit | fec5c745b3e269b81814c797b47d9b51e55bdb97 (patch) | |
tree | 167463592294e446f36efe111829a9abd0c8d07b | |
parent | f25b5188fc07bca09096388e1a8ae6fe97675a77 (diff) |
Redirect all Keras progbars to stderr
Seems that Keras developers won't accept writing progbars to stderr, see
[here](https://github.com/keras-team/keras/pull/12019)
-rw-r--r-- | bicleaner_ai/classify.py | 7 | ||||
-rw-r--r-- | bicleaner_ai/models.py | 61 |
2 files changed, 46 insertions, 22 deletions
diff --git a/bicleaner_ai/classify.py b/bicleaner_ai/classify.py index 7131448..fbb2a51 100644 --- a/bicleaner_ai/classify.py +++ b/bicleaner_ai/classify.py @@ -224,12 +224,17 @@ def classify(args, input, output): # Score a batch of sentences def classify_batch(args, output, buf_sent, buf_sent_sl, buf_sent_tl, buf_score): + if logging.getLogger().level <= logging.DEBUG: + verbose = 1 + else: + verbose = 0 # Classify predictions if len(buf_sent_tl) > 0 and len(buf_sent_sl) > 0: predictions = args.clf.predict(buf_sent_sl, buf_sent_tl, args.batch_size, args.calibrated, - args.raw_output) + args.raw_output, + verbose=verbose) else: predictions = [] p = iter(predictions) diff --git a/bicleaner_ai/models.py b/bicleaner_ai/models.py index cdb522c..ef34308 100644 --- a/bicleaner_ai/models.py +++ b/bicleaner_ai/models.py @@ -9,6 +9,7 @@ from tensorflow.keras.metrics import Precision, Recall from tensorflow.keras.optimizers import Adam from tensorflow.keras.models import load_model from tensorflow.keras import layers +from contextlib import redirect_stdout from glove import Corpus, Glove from abc import ABC, abstractmethod import tensorflow.keras.backend as K @@ -16,6 +17,7 @@ import sentencepiece as sp import tensorflow as tf import numpy as np import logging +import sys try: from . import decomposable_attention @@ -83,13 +85,15 @@ def calibrate_output(y_true, y_pred): else: verbose = 0 model.compile(optimizer=Adam(learning_rate=5e-3), loss=loss) - model.fit(y_pred_balanced, y_target, epochs=5000, verbose=verbose, - batch_size=4096, - validation_split=0.1, - callbacks=[earlystop]) + with redirect_stdout(sys.stderr): + model.fit(y_pred_balanced, y_target, epochs=5000, verbose=verbose, + batch_size=4096, + validation_split=0.1, + callbacks=[earlystop]) # Check mcc hasn't been affected - y_pred_calibrated = model.predict(y_pred) + with redirect_stdout(sys.stderr): + y_pred_calibrated = model.predict(y_pred, verbose=verbose) end_mcc = matthews_corrcoef(y_true, np.where(y_pred_calibrated>=0.5, 1, 0)) logging.debug(f"MCC with calibrated output: {end_mcc}") if (init_mcc - end_mcc) > 0.02: @@ -349,17 +353,24 @@ class BaseModel(ModelInterface): self.model = self.build_model() if logging.getLogger().level == logging.DEBUG: self.model.summary() - self.model.fit(train_generator, - batch_size=settings["batch_size"], - epochs=settings["epochs"], - steps_per_epoch=steps_per_epoch, - validation_data=dev_generator, - callbacks=[earlystop, LRReport()], - verbose=1) + if logging.getLogger().level <= logging.INFO: + verbose = 1 + else: + verbose = 0 + + with redirect_stdout(sys.stderr): + self.model.fit(train_generator, + batch_size=settings["batch_size"], + epochs=settings["epochs"], + steps_per_epoch=steps_per_epoch, + validation_data=dev_generator, + callbacks=[earlystop, LRReport()], + verbose=verbose) self.model.save(model_filename) y_true = dev_generator.y - y_pred_probs = self.model.predict(dev_generator) + with redirect_stdout(sys.stderr): + y_pred_probs = self.model.predict(dev_generator, verbose=verbose) y_pred = np.where(y_pred_probs >= 0.5, 1, 0) logging.info(f"Dev precision: {precision_score(y_true, y_pred):.3f}") logging.info(f"Dev recall: {recall_score(y_true, y_pred):.3f}") @@ -589,20 +600,28 @@ class BCXLMRoberta(BaseModel): from_logits=True), metrics=[FScore(argmax=True), MatthewsCorrCoef(argmax=True)]) + if logging.getLogger().level == logging.DEBUG: self.model.summary() - self.model.fit(train_generator, - epochs=self.settings["epochs"], - steps_per_epoch=steps_per_epoch, - validation_data=dev_generator, - batch_size=self.settings["batch_size"], - callbacks=[earlystop], - verbose=1) + if logging.getLogger().level <= logging.INFO: + verbose = 1 + else: + verbose = 0 + + with redirect_stdout(sys.stderr): + self.model.fit(train_generator, + epochs=self.settings["epochs"], + steps_per_epoch=steps_per_epoch, + validation_data=dev_generator, + batch_size=self.settings["batch_size"], + callbacks=[earlystop], + verbose=verbose) self.model.save_pretrained(model_filename) self.tokenizer.save_pretrained(vocab_filename) y_true = dev_generator.y - y_pred = self.model.predict(dev_generator, verbose=1).logits + with redirect_stdout(sys.stderr): + y_pred = self.model.predict(dev_generator, verbose=verbose).logits y_pred_probs = self.softmax_pos_prob(y_pred) y_pred = np.argmax(y_pred, axis=-1) logging.info(f"Dev precision: {precision_score(y_true, y_pred):.3f}") |