Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-02-05 07:26:46 +0300
committerMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-02-05 07:26:46 +0300
commitd121ba4726e843ab50bf35b81150586d5a581c7f (patch)
tree4aefbd84cb4b62f63a5261281db51a92aa1e93d3 /scripts
parent9f129279b9441d342368ecd3785d04bd618b92da (diff)
address code review comments
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/bert/bert4marian.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/scripts/bert/bert4marian.py b/scripts/bert/bert4marian.py
index 5ccc4659..8070c0fe 100755
--- a/scripts/bert/bert4marian.py
+++ b/scripts/bert/bert4marian.py
@@ -1,4 +1,22 @@
#!/usr/bin/env python3
+"""
+This script takes a Tensorflow BERT checkpoint and a model description in a JSON file and converts
+it to a Marian weight file with numpy weights and an internal YAML description.
+
+This works with checkpoints from https://github.com/google-research/bert
+
+Assmung a BERT checkpoint like this:
+drwxr-xr-x 2 marcinjd marcinjd 4.0K Nov 23 16:39 .
+-rw-r--r-- 1 marcinjd marcinjd 521 Nov 23 16:38 bert_config.json
+-rw-r--r-- 1 marcinjd marcinjd 682M Nov 23 16:39 bert_model.ckpt.data-00000-of-00001
+-rw-r--r-- 1 marcinjd marcinjd 8.5K Nov 23 16:39 bert_model.ckpt.index
+-rw-r--r-- 1 marcinjd marcinjd 888K Nov 23 16:39 bert_model.ckpt.meta
+-rw-r--r-- 1 marcinjd marcinjd 973K Nov 23 16:37 vocab.txt
+
+usage:
+
+./bert.py --bert_prefix bert_model.ckpt --bert_config bert_config.json --marian bert.npz
+"""
import tensorflow as tf
import numpy as np