Go to the documentation of this file.
24 #define GRK_SKIP_POISON
26 #include <xmmintrin.h>
29 #include <emmintrin.h>
32 #include <tmmintrin.h>
35 #include <immintrin.h>
41 #define VREG_INT_COUNT 8
44 #define VREG_INT_COUNT 4
49 #if (defined(__SSE2__) || defined(__AVX2__))
54 #define LOAD_CST(x) _mm256_set1_epi32(x)
55 #define LOAD(x) _mm256_load_si256((const VREG*)(x))
56 #define LOADU(x) _mm256_loadu_si256((const VREG*)(x))
57 #define STORE(x,y) _mm256_store_si256((VREG*)(x),(y))
58 #define STOREU(x,y) _mm256_storeu_si256((VREG*)(x),(y))
59 #define ADD(x,y) _mm256_add_epi32((x),(y))
60 #define SUB(x,y) _mm256_sub_epi32((x),(y))
61 #define SAR(x,y) _mm256_srai_epi32((x),(y))
62 #define MUL(x,y) _mm256_mullo_epi32((x),(y))
64 #define LOADF(x) _mm256_load_ps((float const*)(x))
65 #define LOAD_CST_F(x)_mm256_set1_ps(x)
66 #define ADDF(x,y) _mm256_add_ps((x),(y))
67 #define MULF(x,y) _mm256_mul_ps((x),(y))
68 #define SUBF(x,y) _mm256_sub_ps((x),(y))
69 #define STOREF(x,y) _mm256_store_ps((float*)(x),(y))
72 #define LOAD_CST(x) _mm_set1_epi32(x)
73 #define LOAD(x) _mm_load_si128((const VREG*)(x))
74 #define LOADU(x) _mm_loadu_si128((const VREG*)(x))
75 #define STORE(x,y) _mm_store_si128((VREG*)(x),(y))
76 #define STOREU(x,y) _mm_storeu_si128((VREG*)(x),(y))
77 #define ADD(x,y) _mm_add_epi32((x),(y))
78 #define SUB(x,y) _mm_sub_epi32((x),(y))
80 #define MUL(x,y) _mm_mullo_epi32((x),(y))
81 #define SAR(x,y) _mm_srai_epi32((x),(y))
83 #define LOADF(x) _mm_load_ps((float const*)(x))
84 #define LOAD_CST_F(x) _mm_set1_ps(x)
85 #define ADDF(x,y) _mm_add_ps((x),(y))
86 #define MULF(x,y) _mm_mul_ps((x),(y))
87 #define SUBF(x,y) _mm_sub_ps((x),(y))
88 #define STOREF(x,y) _mm_store_ps((float*)(x),(y))
91 #define ADD3(x,y,z) ADD(ADD(x,y),z)