/* Single instruction, multiple data Copyright 2018 Ahmet Inan */ #ifndef SIMD_HH #define SIMD_HH #include #include #include namespace ldpctool { template union SIMD; template union SIMD { static const int SIZE = WIDTH; typedef float value_type; typedef uint32_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef double value_type; typedef uint64_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef int8_t value_type; typedef uint8_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef int16_t value_type; typedef uint16_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef int32_t value_type; typedef uint32_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef int64_t value_type; typedef uint64_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef uint8_t value_type; typedef uint8_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef uint16_t value_type; typedef uint16_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef uint32_t value_type; typedef uint32_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template union SIMD { static const int SIZE = WIDTH; typedef uint64_t value_type; typedef uint64_t uint_type; value_type v[SIZE]; uint_type u[SIZE]; }; template static inline TYPE vdup(typename TYPE::value_type a) { TYPE tmp; for (int i = 0; i < TYPE::SIZE; ++i) tmp.v[i] = a; return tmp; } template static inline TYPE vzero() { TYPE tmp; for (int i = 0; i < TYPE::SIZE; ++i) tmp.u[i] ^= tmp.u[i]; return tmp; } template static inline DST vreinterpret(SRC a) { static_assert(SRC::SIZE == DST::SIZE, "source and destination width must be same"); static_assert(sizeof(typename SRC::value_type) == sizeof(typename DST::value_type), "source and destination value type sizes must be same"); DST tmp; for (int i = 0; i < DST::SIZE; ++i) tmp.u[i] = a.u[i]; return tmp; } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vmask(SIMD a) { return vreinterpret>(a); } template static inline SIMD vunsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vunsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vunsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vunsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vsigned(SIMD a) { return vreinterpret>(a); } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vneg(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -a.v[i]; return tmp; } template static inline SIMD vabs(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::abs(a.v[i]); return tmp; } template static inline SIMD vqabs(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::abs(std::max(a.v[i], -INT8_MAX)); return tmp; } template static inline SIMD vqabs(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::abs(std::max(a.v[i], -INT16_MAX)); return tmp; } template static inline SIMD vqabs(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::abs(std::max(a.v[i], -INT32_MAX)); return tmp; } template static inline SIMD vqabs(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::abs(std::max(a.v[i], -INT64_MAX)); return tmp; } template static inline SIMD vnot(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ~a.v[i]; return tmp; } template static inline SIMD vnot(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ~a.v[i]; return tmp; } template static inline SIMD vnot(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ~a.v[i]; return tmp; } template static inline SIMD vnot(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ~a.v[i]; return tmp; } template static inline SIMD vorr(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] | b.v[i]; return tmp; } template static inline SIMD vorr(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] | b.v[i]; return tmp; } template static inline SIMD vorr(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] | b.v[i]; return tmp; } template static inline SIMD vorr(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] | b.v[i]; return tmp; } template static inline SIMD vand(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & b.v[i]; return tmp; } template static inline SIMD vand(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & b.v[i]; return tmp; } template static inline SIMD vand(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & b.v[i]; return tmp; } template static inline SIMD vand(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & b.v[i]; return tmp; } template static inline SIMD veor(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] ^ b.v[i]; return tmp; } template static inline SIMD veor(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] ^ b.v[i]; return tmp; } template static inline SIMD veor(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] ^ b.v[i]; return tmp; } template static inline SIMD veor(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] ^ b.v[i]; return tmp; } template static inline SIMD vbic(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & ~b.v[i]; return tmp; } template static inline SIMD vbic(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & ~b.v[i]; return tmp; } template static inline SIMD vbic(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & ~b.v[i]; return tmp; } template static inline SIMD vbic(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] & ~b.v[i]; return tmp; } template static inline SIMD vbsl(SIMD a, SIMD b, SIMD c) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = (a.v[i] & b.v[i]) | (~a.v[i] & c.v[i]); return tmp; } template static inline SIMD vbsl(SIMD a, SIMD b, SIMD c) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = (a.v[i] & b.v[i]) | (~a.v[i] & c.v[i]); return tmp; } template static inline SIMD vbsl(SIMD a, SIMD b, SIMD c) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = (a.v[i] & b.v[i]) | (~a.v[i] & c.v[i]); return tmp; } template static inline SIMD vbsl(SIMD a, SIMD b, SIMD c) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = (a.v[i] & b.v[i]) | (~a.v[i] & c.v[i]); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0.f); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0.); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0); return tmp; } template static inline SIMD vcgtz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > 0); return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == 0.f); return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == 0.); return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -!a.v[i]; return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -!a.v[i]; return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -!a.v[i]; return tmp; } template static inline SIMD vceqz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -!a.v[i]; return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0.f); return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0.); return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0); return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0); return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0); return tmp; } template static inline SIMD vcltz(SIMD a) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] < 0); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vcgt(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] > b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vceq(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = -(a.v[i] == b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmin(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vmax(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(a.v[i], b.v[i]); return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] + b.v[i]; return tmp; } template static inline SIMD vqadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(std::max(int16_t(a.v[i]) + int16_t(b.v[i]), INT8_MIN), INT8_MAX); return tmp; } template static inline SIMD vqadd(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(std::max(int32_t(a.v[i]) + int32_t(b.v[i]), INT16_MIN), INT16_MAX); return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = a.v[i] - b.v[i]; return tmp; } template static inline SIMD vqsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(std::max(int16_t(a.v[i]) - int16_t(b.v[i]), INT8_MIN), INT8_MAX); return tmp; } template static inline SIMD vqsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::min(std::max(int32_t(a.v[i]) - int32_t(b.v[i]), INT16_MIN), INT16_MAX); return tmp; } template static inline SIMD vqsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(int16_t(a.v[i]) - int16_t(b.v[i]), 0); return tmp; } template static inline SIMD vqsub(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = std::max(int32_t(a.v[i]) - int32_t(b.v[i]), 0); return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0.f) - (b.v[i] < 0.f)) * a.v[i]; return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0.) - (b.v[i] < 0.)) * a.v[i]; return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0) - (b.v[i] < 0)) * a.v[i]; return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0) - (b.v[i] < 0)) * a.v[i]; return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0) - (b.v[i] < 0)) * a.v[i]; return tmp; } template static inline SIMD vsign(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < WIDTH; ++i) tmp.v[i] = ((b.v[i] > 0) - (b.v[i] < 0)) * a.v[i]; return tmp; } #if 0 #ifdef __AVX2__ #include "avx2.h" #else #ifdef __SSE4_1__ #include "sse4_1.h" #endif #endif #ifdef __ARM_NEON__ #include "neon.h" #endif #endif } // namespace ldpctool #endif