Welcome to mirror list, hosted at ThFree Co, Russian Federation.

postprocess_pipeline.h - github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: f0e22144ab33d6dbc4a80dc022815dee4add3d25 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#pragma once

#include "intrinsics.h"
#include "types.h"

#include <tuple>

namespace intgemm {

template <typename... Stages>
using PostprocessPipeline = std::tuple<Stages...>;

template <typename... Stages>
constexpr std::tuple<Stages...> CreatePostprocessPipeline(const Stages&... stages) {
  return std::tuple<Stages...>(stages...);
}

template <typename Postprocess, CPUType CpuType>
class PostprocessImpl;

namespace { // anonymous namespace

template <std::size_t... I>
struct integer_seq {};

template <std::size_t N, std::size_t... I>
struct integer_seq_from_one_s : integer_seq_from_one_s<N - 1, N - 1, I...> {};

template <std::size_t... I>
struct integer_seq_from_one_s<1, I...> : integer_seq<I...> {};

template <typename... Types>
using integer_seq_from_one = integer_seq_from_one_s<sizeof...(Types) + 1>;

template <typename Stage>
struct remove_first_stage_type_s { using type = std::tuple<>;};

template <typename FirstStage, typename... RestStages>
struct remove_first_stage_type_s<std::tuple<FirstStage, RestStages...>> { using type = std::tuple<RestStages...>; };

template <typename... Stages>
using remove_first_stage_type = typename remove_first_stage_type_s<Stages...>::type;

template <typename FirstStage, typename... RestStages>
struct first_stage_type_s { using type = FirstStage; };

template <typename Stage>
struct first_stage_type_s<Stage> { using type = Stage; };

template <typename... Stages>
using first_stage_type = typename first_stage_type_s<Stages...>::type;

template <typename FirstStage, typename... RestStages>
struct last_stage_type_s { using type = typename last_stage_type_s<RestStages...>::type; };

template <typename Stage>
struct last_stage_type_s<Stage> { using type = Stage; };

template <typename... Stages>
using last_stage_type = typename last_stage_type_s<Stages...>::type;

template <typename... Stages>
using input_register_type = typename first_stage_type<Stages...>::InputRegister;

template <typename... Stages>
using output_register_type = typename last_stage_type<Stages...>::OutputRegister;

template <typename Tuple, typename std::size_t...I>
constexpr remove_first_stage_type<Tuple> ShiftPostprocessPipelineImpl(const Tuple& pipeline, integer_seq<I...>) {
  return CreatePostprocessPipeline(std::get<I>(pipeline)...);
}

template <typename FirstStage, typename... RestStages>
constexpr std::tuple<RestStages...> ShiftPostprocessPipeline(const std::tuple<FirstStage, RestStages...>& pipeline) {
  return ShiftPostprocessPipelineImpl(pipeline, integer_seq_from_one<std::tuple<FirstStage, RestStages...>>());
}

template <CPUType CpuType, typename Stage>
constexpr std::tuple<PostprocessImpl<Stage, CpuType>> InitPostprocessPipelineImpl(std::tuple<Stage> pipeline) {
  return std::tuple<PostprocessImpl<Stage, CpuType>>(PostprocessImpl<Stage, CpuType>(std::get<0>(pipeline)));
}

template <CPUType CpuType, typename FirstStage, typename... RestStages>
constexpr std::tuple<PostprocessImpl<FirstStage, CpuType>, PostprocessImpl<RestStages, CpuType>...> InitPostprocessPipelineImpl(std::tuple<FirstStage, RestStages...> pipeline) {
  return std::tuple_cat(
    std::tuple<PostprocessImpl<FirstStage, CpuType>>(PostprocessImpl<FirstStage, CpuType>(std::get<0>(pipeline))),
    InitPostprocessPipelineImpl<CpuType, RestStages...>(ShiftPostprocessPipeline(pipeline))
  );
}

template <CPUType CpuType>
struct RunPostprocessPipelineImpl;

#define RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(attribute, cpu_type) \
  template <>                                                                                   \
  struct RunPostprocessPipelineImpl<cpu_type> {                                                 \
    template <typename Stage>                                                                   \
    attribute static constexpr output_register_type<Stage>                                      \
    run(std::tuple<Stage> pipeline, input_register_type<Stage> input, Index offset) {           \
      return std::get<0>(pipeline).run(input, offset);                                          \
    }                                                                                           \
    template <typename... Stages>                                                               \
    attribute static constexpr output_register_type<Stages...>                                  \
    run(std::tuple<Stages...> pipeline, input_register_type<Stages...> input, Index offset) {   \
      return run(                                                                               \
        ShiftPostprocessPipeline(pipeline),                                                     \
        std::get<0>(pipeline).run(input, offset), offset);                                      \
    }                                                                                           \
  };

RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_SSE2, CPUType::CPU_SSE2)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_SSSE3, CPUType::CPU_SSSE3)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_AVX2, CPUType::CPU_AVX2)
RUN_POSTPROCESS_PIPELINE_IMPL_INSERT_IMPL(INTGEMM_AVX512BW, CPUType::CPU_AVX512BW)

} // anonymous namespace

template <CPUType CpuType, typename... Stages>
class InitedPostprocessPipeline {};

template <CPUType CpuType, typename... Stages>
constexpr InitedPostprocessPipeline<CpuType, Stages...> InitPostprocessPipeline(std::tuple<Stages...> pipeline) {
  return InitedPostprocessPipeline<CpuType, Stages...>(pipeline);
}

#define INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(attribute, cpu_type) \
  template <typename... Stages>                                                            \
  class InitedPostprocessPipeline<cpu_type, Stages...> {                                   \
  public:                                                                                  \
    using InputRegister = input_register_type<PostprocessImpl<Stages, cpu_type>...>;       \
    using OutputRegister = output_register_type<PostprocessImpl<Stages, cpu_type>...>;     \
    InitedPostprocessPipeline(std::tuple<Stages...> pipeline)                              \
        : inited_pipeline(InitPostprocessPipelineImpl<cpu_type, Stages...>(pipeline)) {}   \
    attribute inline OutputRegister run(InputRegister input, Index offset) {               \
      return RunPostprocessPipelineImpl<cpu_type>::run(inited_pipeline, input, offset);    \
    }                                                                                      \
  private:                                                                                 \
    const std::tuple<PostprocessImpl<Stages, cpu_type>...> inited_pipeline;                \
  };

INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_SSE2, CPUType::CPU_SSE2)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_SSSE3, CPUType::CPU_SSSE3)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_AVX2, CPUType::CPU_AVX2)
INITED_POSTPROCESS_PIPELINE_INSERT_IMPL(INTGEMM_AVX512BW, CPUType::CPU_AVX512BW)

}