1 /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 #ifndef __IMMINTRIN_H 24 #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead." 25 #endif 26 27 #ifndef __AVX512VLCDINTRIN_H 28 #define __AVX512VLCDINTRIN_H 29 30 /* Define the default attributes for the functions in this file. */ 31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"))) 32 33 34 static __inline__ __m128i __DEFAULT_FN_ATTRS 35 _mm_broadcastmb_epi64 (__mmask8 __A) 36 { 37 return (__m128i) __builtin_ia32_broadcastmb128 (__A); 38 } 39 40 static __inline__ __m256i __DEFAULT_FN_ATTRS 41 _mm256_broadcastmb_epi64 (__mmask8 __A) 42 { 43 return (__m256i) __builtin_ia32_broadcastmb256 (__A); 44 } 45 46 static __inline__ __m128i __DEFAULT_FN_ATTRS 47 _mm_broadcastmw_epi32 (__mmask16 __A) 48 { 49 return (__m128i) __builtin_ia32_broadcastmw128 (__A); 50 } 51 52 static __inline__ __m256i __DEFAULT_FN_ATTRS 53 _mm256_broadcastmw_epi32 (__mmask16 __A) 54 { 55 return (__m256i) __builtin_ia32_broadcastmw256 (__A); 56 } 57 58 59 static __inline__ __m128i __DEFAULT_FN_ATTRS 60 _mm_conflict_epi64 (__m128i __A) 61 { 62 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 63 (__v2di) _mm_undefined_si128 (), 64 (__mmask8) -1); 65 } 66 67 static __inline__ __m128i __DEFAULT_FN_ATTRS 68 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 69 { 70 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 71 (__v2di) __W, 72 (__mmask8) __U); 73 } 74 75 static __inline__ __m128i __DEFAULT_FN_ATTRS 76 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) 77 { 78 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 79 (__v2di) 80 _mm_setzero_di (), 81 (__mmask8) __U); 82 } 83 84 static __inline__ __m256i __DEFAULT_FN_ATTRS 85 _mm256_conflict_epi64 (__m256i __A) 86 { 87 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 88 (__v4di) _mm256_undefined_si256 (), 89 (__mmask8) -1); 90 } 91 92 static __inline__ __m256i __DEFAULT_FN_ATTRS 93 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 94 { 95 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 96 (__v4di) __W, 97 (__mmask8) __U); 98 } 99 100 static __inline__ __m256i __DEFAULT_FN_ATTRS 101 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) 102 { 103 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 104 (__v4di) _mm256_setzero_si256 (), 105 (__mmask8) __U); 106 } 107 108 static __inline__ __m128i __DEFAULT_FN_ATTRS 109 _mm_conflict_epi32 (__m128i __A) 110 { 111 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 112 (__v4si) _mm_undefined_si128 (), 113 (__mmask8) -1); 114 } 115 116 static __inline__ __m128i __DEFAULT_FN_ATTRS 117 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 118 { 119 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 120 (__v4si) __W, 121 (__mmask8) __U); 122 } 123 124 static __inline__ __m128i __DEFAULT_FN_ATTRS 125 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) 126 { 127 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 128 (__v4si) _mm_setzero_si128 (), 129 (__mmask8) __U); 130 } 131 132 static __inline__ __m256i __DEFAULT_FN_ATTRS 133 _mm256_conflict_epi32 (__m256i __A) 134 { 135 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 136 (__v8si) _mm256_undefined_si256 (), 137 (__mmask8) -1); 138 } 139 140 static __inline__ __m256i __DEFAULT_FN_ATTRS 141 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 142 { 143 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 144 (__v8si) __W, 145 (__mmask8) __U); 146 } 147 148 static __inline__ __m256i __DEFAULT_FN_ATTRS 149 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) 150 { 151 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 152 (__v8si) 153 _mm256_setzero_si256 (), 154 (__mmask8) __U); 155 } 156 157 static __inline__ __m128i __DEFAULT_FN_ATTRS 158 _mm_lzcnt_epi32 (__m128i __A) 159 { 160 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 161 (__v4si) 162 _mm_setzero_si128 (), 163 (__mmask8) -1); 164 } 165 166 static __inline__ __m128i __DEFAULT_FN_ATTRS 167 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 168 { 169 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 170 (__v4si) __W, 171 (__mmask8) __U); 172 } 173 174 static __inline__ __m128i __DEFAULT_FN_ATTRS 175 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) 176 { 177 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 178 (__v4si) 179 _mm_setzero_si128 (), 180 (__mmask8) __U); 181 } 182 183 static __inline__ __m256i __DEFAULT_FN_ATTRS 184 _mm256_lzcnt_epi32 (__m256i __A) 185 { 186 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 187 (__v8si) 188 _mm256_setzero_si256 (), 189 (__mmask8) -1); 190 } 191 192 static __inline__ __m256i __DEFAULT_FN_ATTRS 193 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 194 { 195 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 196 (__v8si) __W, 197 (__mmask8) __U); 198 } 199 200 static __inline__ __m256i __DEFAULT_FN_ATTRS 201 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) 202 { 203 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 204 (__v8si) 205 _mm256_setzero_si256 (), 206 (__mmask8) __U); 207 } 208 209 static __inline__ __m128i __DEFAULT_FN_ATTRS 210 _mm_lzcnt_epi64 (__m128i __A) 211 { 212 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 213 (__v2di) 214 _mm_setzero_di (), 215 (__mmask8) -1); 216 } 217 218 static __inline__ __m128i __DEFAULT_FN_ATTRS 219 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 220 { 221 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 222 (__v2di) __W, 223 (__mmask8) __U); 224 } 225 226 static __inline__ __m128i __DEFAULT_FN_ATTRS 227 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) 228 { 229 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 230 (__v2di) 231 _mm_setzero_di (), 232 (__mmask8) __U); 233 } 234 235 static __inline__ __m256i __DEFAULT_FN_ATTRS 236 _mm256_lzcnt_epi64 (__m256i __A) 237 { 238 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 239 (__v4di) 240 _mm256_setzero_si256 (), 241 (__mmask8) -1); 242 } 243 244 static __inline__ __m256i __DEFAULT_FN_ATTRS 245 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 246 { 247 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 248 (__v4di) __W, 249 (__mmask8) __U); 250 } 251 252 static __inline__ __m256i __DEFAULT_FN_ATTRS 253 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) 254 { 255 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 256 (__v4di) 257 _mm256_setzero_si256 (), 258 (__mmask8) __U); 259 } 260 261 #undef __DEFAULT_FN_ATTRS 262 263 #endif /* __AVX512VLCDINTRIN_H */ 264