diff options
author | Rico Sennrich <rico.sennrich@gmx.ch> | 2015-04-08 12:04:01 +0300 |
---|---|---|
committer | Rico Sennrich <rico.sennrich@gmx.ch> | 2015-04-08 12:04:01 +0300 |
commit | 3dc380d71ab1355ff45de1dad63c3ed00cbf9f0b (patch) | |
tree | 5bd61793d059e85969af33b2252cf316c7364b8e | |
parent | 6a0d522ba44838c925985c9b6cf620cf5b42ebee (diff) |
options for premultiplication and unnormalized networks in testNeuralNetwork
-rw-r--r-- | src/param.h | 3 | ||||
-rw-r--r-- | src/testNeuralNetwork.cpp | 52 |
2 files changed, 40 insertions, 15 deletions
diff --git a/src/param.h b/src/param.h index 8502312..fa27e2f 100644 --- a/src/param.h +++ b/src/param.h @@ -55,7 +55,8 @@ struct param int num_threads; int debug; - + bool premultiply; + bool share_embeddings; }; diff --git a/src/testNeuralNetwork.cpp b/src/testNeuralNetwork.cpp index 72d8a81..58b9165 100644 --- a/src/testNeuralNetwork.cpp +++ b/src/testNeuralNetwork.cpp @@ -27,6 +27,8 @@ int main (int argc, char *argv[]) ValueArg<int> debug("", "debug", "Debug level. Higher debug levels print log-probabilities of each n-gram (level 1), and n-gram itself (level 2). Default: 0.", false, 0, "int", cmd); ValueArg<int> num_threads("", "num_threads", "Number of threads. Default: maximum.", false, 0, "int", cmd); + SwitchArg premultiply("", "premultiply", "premultiply hidden layer.", cmd, false); + SwitchArg unnormalized("", "unnormalized", "do not normalize output.", cmd, false); ValueArg<int> minibatch_size("", "minibatch_size", "Minibatch size. Default: 64.", false, 64, "int", cmd); ValueArg<string> arg_test_file("", "test_file", "Test file (one numberized example per line).", true, "", "string", cmd); @@ -39,6 +41,8 @@ int main (int argc, char *argv[]) myParam.test_file = arg_test_file.getValue(); myParam.num_threads = num_threads.getValue(); + myParam.premultiply = premultiply.getValue(); + myParam.normalization = !unnormalized.getValue(); myParam.minibatch_size = minibatch_size.getValue(); myParam.debug = debug.getValue(); @@ -75,6 +79,11 @@ int main (int argc, char *argv[]) myParam.input_embedding_dimension = nn.input_embedding_dimension; myParam.output_embedding_dimension = nn.output_embedding_dimension; + if (myParam.premultiply) { + cerr << "Premultiplying hidden layer" << endl; + nn.premultiply(); + } + ///// Read test data vector<int> test_data_flat; @@ -103,20 +112,35 @@ int main (int argc, char *argv[]) prop.fProp(minibatch.topRows(myParam.ngram_size-1)); - // Do full forward prop through output word embedding layer - if (prop.skip_hidden) - prop.output_layer_node.param->fProp(prop.first_hidden_activation_node.fProp_matrix, scores); - else - prop.output_layer_node.param->fProp(prop.second_hidden_activation_node.fProp_matrix, scores); - - - // And softmax and loss - double minibatch_log_likelihood; - SoftmaxLogLoss().fProp(scores.leftCols(current_minibatch_size), - minibatch.row(myParam.ngram_size-1), - output_probs, - minibatch_log_likelihood); - log_likelihood += minibatch_log_likelihood; + if (myParam.normalization) + { + // Do full forward prop through output word embedding layer + if (prop.skip_hidden) + prop.output_layer_node.param->fProp(prop.first_hidden_activation_node.fProp_matrix, scores); + else + prop.output_layer_node.param->fProp(prop.second_hidden_activation_node.fProp_matrix, scores); + + + // And softmax and loss + double minibatch_log_likelihood; + SoftmaxLogLoss().fProp(scores.leftCols(current_minibatch_size), + minibatch.row(myParam.ngram_size-1), + output_probs, + minibatch_log_likelihood); + log_likelihood += minibatch_log_likelihood; + } + else + { + for (int j=0; j<current_minibatch_size; j++) + { + int output = minibatch(nn.ngram_size-1, j); + if (prop.skip_hidden) + output_probs(output, j) = prop.output_layer_node.param->fProp(prop.first_hidden_activation_node.fProp_matrix, output, j); + else + output_probs(output, j) = prop.output_layer_node.param->fProp(prop.second_hidden_activation_node.fProp_matrix, output, j); + log_likelihood += output_probs(output, j); + } + } if (myParam.debug > 0) { for (int i=0; i<current_minibatch_size; i++) { |