Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2019-02-14 07:35:32 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-02-14 07:44:53 +0300
commit05ce78e3a5735217cb9154a2c1572dc956ffe6fc (patch)
tree6d2486304b84ef15887385ade7ea16b7b62a571e /bench
parent7813a2f2233fa48199b18aa8c03bb439b1fe9ff5 (diff)
clean up depthwise conv interface (#72)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/72 depthwise conv without requantization is not really useful and was generating more template parameter options Reviewed By: jianyuh Differential Revision: D14021514 fbshipit-source-id: 61f646373fcd902fdb2854a96d003a548f29f8eb
Diffstat (limited to 'bench')
-rw-r--r--bench/Depthwise3DBenchmark.cc65
-rw-r--r--bench/DepthwiseBenchmark.cc53
2 files changed, 0 insertions, 118 deletions
diff --git a/bench/Depthwise3DBenchmark.cc b/bench/Depthwise3DBenchmark.cc
index 596fa15..c5f8ed9 100644
--- a/bench/Depthwise3DBenchmark.cc
+++ b/bench/Depthwise3DBenchmark.cc
@@ -131,72 +131,7 @@ int main() {
double ops =
double(NITER) * N * T_OUT * H_OUT * W_OUT * K * K_T * K_H * K_W * 2;
chrono::time_point<chrono::system_clock> t_begin, t_end;
- for (int i = 0; i < NWARMUP + NITER; ++i) {
- llc_flush();
-
- t_begin = chrono::system_clock::now();
-#pragma omp parallel
- {
- int num_threads = fbgemm_get_num_threads();
- int tid = fbgemm_get_thread_num();
- depthwise_3x3x3_pad_1(
- N,
- T,
- H,
- W,
- K,
- stride_t,
- stride_h,
- stride_w,
- A_zero_point,
- A.data(),
- Bp,
- C.data(),
- tid,
- num_threads);
- }
- t_end = chrono::system_clock::now();
- if (i >= NWARMUP) {
- double dt = chrono::duration<double>(t_end - t_begin).count();
- ttot += dt;
- }
- }
-
- // correctness check
- for (int n = 0; n < N; ++n) {
- for (int t = 0; t < T_OUT; ++t) {
- for (int h = 0; h < H_OUT; ++h) {
- for (int w = 0; w < W_OUT; ++w) {
- for (int g = 0; g < K; ++g) {
- int32_t expected =
- C_ref[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
- int32_t actual =
- C[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
- if (expected != actual) {
- cerr << "Depthwise 3x3 results differ at (" << n << ", " << t
- << ", " << h << ", " << w << ", " << g << "). expected "
- << expected << " actual " << actual << endl;
- return -1;
- }
- assert(expected == actual);
- }
- } // w
- } // h
- } // t
- } // n
-
- // Report performance
- printf(
- "N = %d K = %d T = %d H = %d W = %d stride = %d\n",
- N,
- K,
- T,
- H,
- W,
- stride_h);
- printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
- ttot = 0;
for (int i = 0; i < NWARMUP + NITER; ++i) {
llc_flush();
diff --git a/bench/DepthwiseBenchmark.cc b/bench/DepthwiseBenchmark.cc
index 6500e29..780d83c 100644
--- a/bench/DepthwiseBenchmark.cc
+++ b/bench/DepthwiseBenchmark.cc
@@ -240,59 +240,6 @@ int main() {
stride_w,
A_zero_point,
A.data(),
- Bp,
- C.data(),
- tid,
- num_threads);
- }
- t_end = chrono::system_clock::now();
- if (i >= NWARMUP) {
- double dt = chrono::duration<double>(t_end - t_begin).count();
- ttot += dt;
- }
- }
-
- // correctness check
- for (int n = 0; n < N; ++n) {
- for (int h = 0; h < H_OUT; ++h) {
- for (int w = 0; w < W_OUT; ++w) {
- for (int g = 0; g < G; ++g) {
- int32_t expected = C_ref[((n * H_OUT + h) * W_OUT + w) * G + g];
- int32_t actual = C[((n * H_OUT + h) * W_OUT + w) * G + g];
- if (expected != actual) {
- cerr << "Depthwise 3x3 results differ at (" << n << ", " << h
- << ", " << w << ", " << g << "). expected " << expected
- << " actual " << actual << endl;
- return -1;
- }
- assert(expected == actual);
- }
- }
- }
- }
-
- // Report performance
- printf("N = %d G = %d H = %d W = %d stride = %d\n", N, G, H, W, stride_h);
- printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
-
- ttot = 0;
- for (int i = 0; i < NWARMUP + NITER; ++i) {
- llc_flush();
-
- t_begin = chrono::system_clock::now();
-#pragma omp parallel
- {
- int num_threads = fbgemm_get_num_threads();
- int tid = fbgemm_get_thread_num();
- depthwise_3x3_pad_1(
- N,
- H,
- W,
- G,
- stride_h,
- stride_w,
- A_zero_point,
- A.data(),
B_zero_point,
Bp,
C_multiplier,