Welcome to mirror list, hosted at ThFree Co, Russian Federation.

OptimizedKernelsAvx2.h « src - github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 36d7b2b2d0ddd98b0d8c42f47a6909247f83a1e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 * All rights reserved.
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */
#pragma once

#include <cstdint> // for std::int32_t
#include "fbgemm/FbgemmBuild.h"

namespace fbgemm {

/**
 * @brief Sum a given vector.
 */
FBGEMM_API std::int32_t reduceAvx2(const std::uint8_t* A, int len);

/**
 * @brief Transpose 8 rows from source matrix.
 */
void transpose_8rows(
    int N,
    const uint8_t* src,
    int ld_src,
    uint8_t* dst,
    int ld_dst);

/**
 * @brief avx2 part of the spmdm code.
 */
void spmdmKernelAvx2(
    int N,
    const uint8_t* A_buffer,
    const int32_t* colptr,
    const int8_t* values,
    const int16_t* rowidx,
    int32_t* C_buffer);

} // namespace fbgemm