Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/bitextor/bicleaner-ai.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZJaume <jzaragoza@prompsit.com>2022-07-05 17:34:41 +0300
committerZJaume <jzaragoza@prompsit.com>2022-07-05 17:34:41 +0300
commitfec5c745b3e269b81814c797b47d9b51e55bdb97 (patch)
tree167463592294e446f36efe111829a9abd0c8d07b
parentf25b5188fc07bca09096388e1a8ae6fe97675a77 (diff)
Redirect all Keras progbars to stderr
Seems that Keras developers won't accept writing progbars to stderr, see [here](https://github.com/keras-team/keras/pull/12019)
-rw-r--r--bicleaner_ai/classify.py7
-rw-r--r--bicleaner_ai/models.py61
2 files changed, 46 insertions, 22 deletions
diff --git a/bicleaner_ai/classify.py b/bicleaner_ai/classify.py
index 7131448..fbb2a51 100644
--- a/bicleaner_ai/classify.py
+++ b/bicleaner_ai/classify.py
@@ -224,12 +224,17 @@ def classify(args, input, output):
# Score a batch of sentences
def classify_batch(args, output, buf_sent, buf_sent_sl, buf_sent_tl, buf_score):
+ if logging.getLogger().level <= logging.DEBUG:
+ verbose = 1
+ else:
+ verbose = 0
# Classify predictions
if len(buf_sent_tl) > 0 and len(buf_sent_sl) > 0:
predictions = args.clf.predict(buf_sent_sl, buf_sent_tl,
args.batch_size,
args.calibrated,
- args.raw_output)
+ args.raw_output,
+ verbose=verbose)
else:
predictions = []
p = iter(predictions)
diff --git a/bicleaner_ai/models.py b/bicleaner_ai/models.py
index cdb522c..ef34308 100644
--- a/bicleaner_ai/models.py
+++ b/bicleaner_ai/models.py
@@ -9,6 +9,7 @@ from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
+from contextlib import redirect_stdout
from glove import Corpus, Glove
from abc import ABC, abstractmethod
import tensorflow.keras.backend as K
@@ -16,6 +17,7 @@ import sentencepiece as sp
import tensorflow as tf
import numpy as np
import logging
+import sys
try:
from . import decomposable_attention
@@ -83,13 +85,15 @@ def calibrate_output(y_true, y_pred):
else:
verbose = 0
model.compile(optimizer=Adam(learning_rate=5e-3), loss=loss)
- model.fit(y_pred_balanced, y_target, epochs=5000, verbose=verbose,
- batch_size=4096,
- validation_split=0.1,
- callbacks=[earlystop])
+ with redirect_stdout(sys.stderr):
+ model.fit(y_pred_balanced, y_target, epochs=5000, verbose=verbose,
+ batch_size=4096,
+ validation_split=0.1,
+ callbacks=[earlystop])
# Check mcc hasn't been affected
- y_pred_calibrated = model.predict(y_pred)
+ with redirect_stdout(sys.stderr):
+ y_pred_calibrated = model.predict(y_pred, verbose=verbose)
end_mcc = matthews_corrcoef(y_true, np.where(y_pred_calibrated>=0.5, 1, 0))
logging.debug(f"MCC with calibrated output: {end_mcc}")
if (init_mcc - end_mcc) > 0.02:
@@ -349,17 +353,24 @@ class BaseModel(ModelInterface):
self.model = self.build_model()
if logging.getLogger().level == logging.DEBUG:
self.model.summary()
- self.model.fit(train_generator,
- batch_size=settings["batch_size"],
- epochs=settings["epochs"],
- steps_per_epoch=steps_per_epoch,
- validation_data=dev_generator,
- callbacks=[earlystop, LRReport()],
- verbose=1)
+ if logging.getLogger().level <= logging.INFO:
+ verbose = 1
+ else:
+ verbose = 0
+
+ with redirect_stdout(sys.stderr):
+ self.model.fit(train_generator,
+ batch_size=settings["batch_size"],
+ epochs=settings["epochs"],
+ steps_per_epoch=steps_per_epoch,
+ validation_data=dev_generator,
+ callbacks=[earlystop, LRReport()],
+ verbose=verbose)
self.model.save(model_filename)
y_true = dev_generator.y
- y_pred_probs = self.model.predict(dev_generator)
+ with redirect_stdout(sys.stderr):
+ y_pred_probs = self.model.predict(dev_generator, verbose=verbose)
y_pred = np.where(y_pred_probs >= 0.5, 1, 0)
logging.info(f"Dev precision: {precision_score(y_true, y_pred):.3f}")
logging.info(f"Dev recall: {recall_score(y_true, y_pred):.3f}")
@@ -589,20 +600,28 @@ class BCXLMRoberta(BaseModel):
from_logits=True),
metrics=[FScore(argmax=True),
MatthewsCorrCoef(argmax=True)])
+
if logging.getLogger().level == logging.DEBUG:
self.model.summary()
- self.model.fit(train_generator,
- epochs=self.settings["epochs"],
- steps_per_epoch=steps_per_epoch,
- validation_data=dev_generator,
- batch_size=self.settings["batch_size"],
- callbacks=[earlystop],
- verbose=1)
+ if logging.getLogger().level <= logging.INFO:
+ verbose = 1
+ else:
+ verbose = 0
+
+ with redirect_stdout(sys.stderr):
+ self.model.fit(train_generator,
+ epochs=self.settings["epochs"],
+ steps_per_epoch=steps_per_epoch,
+ validation_data=dev_generator,
+ batch_size=self.settings["batch_size"],
+ callbacks=[earlystop],
+ verbose=verbose)
self.model.save_pretrained(model_filename)
self.tokenizer.save_pretrained(vocab_filename)
y_true = dev_generator.y
- y_pred = self.model.predict(dev_generator, verbose=1).logits
+ with redirect_stdout(sys.stderr):
+ y_pred = self.model.predict(dev_generator, verbose=verbose).logits
y_pred_probs = self.softmax_pos_prob(y_pred)
y_pred = np.argmax(y_pred, axis=-1)
logging.info(f"Dev precision: {precision_score(y_true, y_pred):.3f}")