clean up depthwise conv interface (#72)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/72 depthwise conv without requantization is not really useful and was generating more template parameter options Reviewed By: jianyuh Differential Revision: D14021514 fbshipit-source-id: 61f646373fcd902fdb2854a96d003a548f29f8eb
author: Jongsoo Park <jongsoo@fb.com> 2019-02-14 07:35:32 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2019-02-14 07:44:53 +0300
commit: 05ce78e3a5735217cb9154a2c1572dc956ffe6fc (patch)
tree: 6d2486304b84ef15887385ade7ea16b7b62a571e /bench
parent: 7813a2f2233fa48199b18aa8c03bb439b1fe9ff5 (diff)
2 files changed, 0 insertions, 118 deletions
diff --git a/bench/Depthwise3DBenchmark.cc b/bench/Depthwise3DBenchmark.cc
index 596fa15..c5f8ed9 100644
--- a/bench/Depthwise3DBenchmark.cc
+++ b/bench/Depthwise3DBenchmark.cc
@@ -131,72 +131,7 @@ int main() {
     double ops =
         double(NITER) * N * T_OUT * H_OUT * W_OUT * K * K_T * K_H * K_W * 2;
     chrono::time_point<chrono::system_clock> t_begin, t_end;
-    for (int i = 0; i < NWARMUP + NITER; ++i) {
-      llc_flush();
-
-      t_begin = chrono::system_clock::now();
-#pragma omp parallel
-      {
-        int num_threads = fbgemm_get_num_threads();
-        int tid = fbgemm_get_thread_num();
-        depthwise_3x3x3_pad_1(
-            N,
-            T,
-            H,
-            W,
-            K,
-            stride_t,
-            stride_h,
-            stride_w,
-            A_zero_point,
-            A.data(),
-            Bp,
-            C.data(),
-            tid,
-            num_threads);
-      }
-      t_end = chrono::system_clock::now();
-      if (i >= NWARMUP) {
-        double dt = chrono::duration<double>(t_end - t_begin).count();
-        ttot += dt;
-      }
-    }
-
-    // correctness check
-    for (int n = 0; n < N; ++n) {
-      for (int t = 0; t < T_OUT; ++t) {
-        for (int h = 0; h < H_OUT; ++h) {
-          for (int w = 0; w < W_OUT; ++w) {
-            for (int g = 0; g < K; ++g) {
-              int32_t expected =
-                  C_ref[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
-              int32_t actual =
-                  C[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
-              if (expected != actual) {
-                cerr << "Depthwise 3x3 results differ at (" << n << ", " << t
-                     << ", " << h << ", " << w << ", " << g << "). expected "
-                     << expected << " actual " << actual << endl;
-                return -1;
-              }
-              assert(expected == actual);
-            }
-          } // w
-        } // h
-      } // t
-    } // n
-
-    // Report performance
-    printf(
-        "N = %d K = %d T = %d H = %d W = %d stride = %d\n",
-        N,
-        K,
-        T,
-        H,
-        W,
-        stride_h);
-    printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
 
-    ttot = 0;
     for (int i = 0; i < NWARMUP + NITER; ++i) {
       llc_flush();
 
diff --git a/bench/DepthwiseBenchmark.cc b/bench/DepthwiseBenchmark.cc
index 6500e29..780d83c 100644
--- a/bench/DepthwiseBenchmark.cc
+++ b/bench/DepthwiseBenchmark.cc
@@ -240,59 +240,6 @@ int main() {
             stride_w,
             A_zero_point,
             A.data(),
-            Bp,
-            C.data(),
-            tid,
-            num_threads);
-      }
-      t_end = chrono::system_clock::now();
-      if (i >= NWARMUP) {
-        double dt = chrono::duration<double>(t_end - t_begin).count();
-        ttot += dt;
-      }
-    }
-
-    // correctness check
-    for (int n = 0; n < N; ++n) {
-      for (int h = 0; h < H_OUT; ++h) {
-        for (int w = 0; w < W_OUT; ++w) {
-          for (int g = 0; g < G; ++g) {
-            int32_t expected = C_ref[((n * H_OUT + h) * W_OUT + w) * G + g];
-            int32_t actual = C[((n * H_OUT + h) * W_OUT + w) * G + g];
-            if (expected != actual) {
-              cerr << "Depthwise 3x3 results differ at (" << n << ", " << h
-                   << ", " << w << ", " << g << "). expected " << expected
-                   << " actual " << actual << endl;
-              return -1;
-            }
-            assert(expected == actual);
-          }
-        }
-      }
-    }
-
-    // Report performance
-    printf("N = %d G = %d H = %d W = %d stride = %d\n", N, G, H, W, stride_h);
-    printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
-
-    ttot = 0;
-    for (int i = 0; i < NWARMUP + NITER; ++i) {
-      llc_flush();
-
-      t_begin = chrono::system_clock::now();
-#pragma omp parallel
-      {
-        int num_threads = fbgemm_get_num_threads();
-        int tid = fbgemm_get_thread_num();
-        depthwise_3x3_pad_1(
-            N,
-            H,
-            W,
-            G,
-            stride_h,
-            stride_w,
-            A_zero_point,
-            A.data(),
             B_zero_point,
             Bp,
             C_multiplier,
author	Jongsoo Park <jongsoo@fb.com>	2019-02-14 07:35:32 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2019-02-14 07:44:53 +0300
commit	05ce78e3a5735217cb9154a2c1572dc956ffe6fc (patch)
tree	6d2486304b84ef15887385ade7ea16b7b62a571e /bench
parent	7813a2f2233fa48199b18aa8c03bb439b1fe9ff5 (diff)