Welcome to mirror list, hosted at ThFree Co, Russian Federation.

postprocess_pipeline.h - github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ad26ac5bcf584f4db14d7573df30b16c9ec79a88 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#pragma once

#include "intrinsics.h"
#include "types.h"
#include "utils.h"

#include <tuple>

namespace intgemm {

template <typename... Stages>
using PostprocessPipeline = std::tuple<Stages...>;

template <typename... Stages>
constexpr std::tuple<Stages...> CreatePostprocessPipeline(const Stages&... stages) {
  return std::make_tuple(stages...);
}

template <typename Postprocess, CPUType CpuType>
class PostprocessImpl;

namespace { // anonymous namespace

template <typename... Stages>
using input_register_type = typename std::tuple_element<
    0,
    std::tuple<Stages...>
  >::type::InputRegister;

template <typename... Stages>
using output_register_type = typename std::tuple_element<
    std::tuple_size<std::tuple<Stages...>>::value - 1,
    std::tuple<Stages...>
  >::type::OutputRegister;

template <typename FirstStage, typename... RestStages>
constexpr std::tuple<RestStages...> DropFirstStage(const std::tuple<FirstStage, RestStages...>& pipeline) {
  return make_subtuple(pipeline, sequence_popfront<make_sequence<sizeof...(RestStages) + 1>>());
}

template <CPUType CpuType>
constexpr std::tuple<> InitPostprocessPipelineImpl(std::tuple<> pipeline) {
  return std::tuple<>();
}

template <CPUType CpuType, typename FirstStage, typename... RestStages>
constexpr std::tuple<PostprocessImpl<FirstStage, CpuType>, PostprocessImpl<RestStages, CpuType>...> InitPostprocessPipelineImpl(std::tuple<FirstStage, RestStages...> pipeline) {
  return std::tuple_cat(
    std::tuple<PostprocessImpl<FirstStage, CpuType>>(PostprocessImpl<FirstStage, CpuType>(std::get<0>(pipeline))),
    InitPostprocessPipelineImpl<CpuType, RestStages...>(DropFirstStage(pipeline))
  );
}

template <CPUType CpuType>
struct RunPostprocessPipelineImpl;

#define RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(attribute, cpu_type)                          \
  template <>                                                                                   \
  struct RunPostprocessPipelineImpl<cpu_type> {                                                 \
    template <typename Stage>                                                                   \
    attribute static constexpr output_register_type<Stage>                                      \
    run(std::tuple<Stage> pipeline, input_register_type<Stage> input, Index offset) {           \
      return std::get<0>(pipeline).run(input, offset);                                          \
    }                                                                                           \
    template <typename... Stages>                                                               \
    attribute static constexpr output_register_type<Stages...>                                  \
    run(std::tuple<Stages...> pipeline, input_register_type<Stages...> input, Index offset) {   \
      return run(                                                                               \
        DropFirstStage(pipeline),                                                               \
        std::get<0>(pipeline).run(input, offset), offset);                                      \
    }                                                                                           \
  };

RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_SSE2, CPUType::SSE2)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_SSSE3, CPUType::SSSE3)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_AVX2, CPUType::AVX2)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_AVX512BW, CPUType::AVX512BW)

} // anonymous namespace

template <CPUType CpuType, typename... Stages>
class InitedPostprocessPipeline {};

template <CPUType CpuType, typename... Stages>
constexpr InitedPostprocessPipeline<CpuType, Stages...> InitPostprocessPipeline(std::tuple<Stages...> pipeline) {
  return InitedPostprocessPipeline<CpuType, Stages...>(pipeline);
}

#define INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(attribute, cpu_type)                                                 \
  template <typename... Stages>                                                                                      \
  class InitedPostprocessPipeline<cpu_type, Stages...> {                                                             \
  public:                                                                                                            \
    using InputRegister = input_register_type<PostprocessImpl<Stages, cpu_type>...>;                                 \
    using OutputRegister = output_register_type<PostprocessImpl<Stages, cpu_type>...>;                               \
    InitedPostprocessPipeline(std::tuple<Stages...> pipeline)                                                        \
        : inited_pipeline(InitPostprocessPipelineImpl<cpu_type, Stages...>(pipeline)) {}                             \
    attribute inline OutputRegister run(InputRegister input, Index offset) {                                         \
      return RunPostprocessPipelineImpl<cpu_type>::run(inited_pipeline, input, offset);                              \
    }                                                                                                                \
    attribute inline void run(const InputRegister* input, unsigned length, OutputRegister* output) {                 \
      for (unsigned i = 0; i < length; ++i)                                                                          \
        output[i] = RunPostprocessPipelineImpl<cpu_type>::run(inited_pipeline, input[i], i * sizeof(InputRegister)); \
    }                                                                                                                \
  private:                                                                                                           \
    const std::tuple<PostprocessImpl<Stages, cpu_type>...> inited_pipeline;                                          \
  };

INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_SSE2, CPUType::SSE2)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_SSSE3, CPUType::SSSE3)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_AVX2, CPUType::AVX2)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_AVX512BW, CPUType::AVX512BW)

}