1 2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not 3 check the core arithmetic in any detail. This file checks the 16-bit 4 character versions (w is for wide) */ 5 6 #include <string.h> 7 #include <stdio.h> 8 #include <assert.h> 9 10 typedef unsigned char V128[16]; 11 typedef unsigned int UInt; 12 typedef signed int Int; 13 typedef unsigned char UChar; 14 typedef unsigned long long int ULong; 15 typedef UChar Bool; 16 #define False ((Bool)0) 17 #define True ((Bool)1) 18 19 void show_V128 ( V128* vec ) 20 { 21 Int i; 22 for (i = 15; i >= 0; i--) 23 printf("%02x", (UInt)( (*vec)[i] )); 24 } 25 26 void expand ( V128* dst, char* summary ) 27 { 28 Int i; 29 assert( strlen(summary) == 16 ); 30 for (i = 0; i < 16; i++) { 31 UChar xx = 0; 32 UChar x = summary[15-i]; 33 if (x >= '0' && x <= '9') { xx = x - '0'; } 34 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 35 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 36 else assert(0); 37 38 assert(xx < 16); 39 xx = (xx << 4) | xx; 40 assert(xx < 256); 41 (*dst)[i] = xx; 42 } 43 } 44 45 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) 46 { 47 V128 argL, argR; 48 expand( &argL, summL ); 49 expand( &argR, summR ); 50 printf("\n"); 51 printf("rdx %016llx argL ", rdxIN); 52 show_V128(&argL); 53 printf(" rax %016llx argR ", raxIN); 54 show_V128(&argR); 55 printf("\n"); 56 57 ULong block[ 2/*in:argL*/ // 0 0 58 + 2/*in:argR*/ // 2 16 59 + 1/*in:rdx*/ // 4 32 60 + 1/*in:rax*/ // 5 40 61 + 2/*inout:xmm0*/ // 6 48 62 + 1/*inout:rcx*/ // 8 64 63 + 1/*out:rflags*/ ]; // 9 72 64 assert(sizeof(block) == 80); 65 66 UChar* blockC = (UChar*)&block[0]; 67 68 /* ---------------- ISTRI_4B ---------------- */ 69 memset(blockC, 0x55, 80); 70 memcpy(blockC + 0, &argL, 16); 71 memcpy(blockC + 16, &argR, 16); 72 memcpy(blockC + 24, &rdxIN, 8); 73 memcpy(blockC + 32, &raxIN, 8); 74 memcpy(blockC + 40, &rdxIN, 8); 75 __asm__ __volatile__( 76 "movupd 0(%0), %%xmm2" "\n\t" 77 "movupd 16(%0), %%xmm13" "\n\t" 78 "movq 32(%0), %%rdx" "\n\t" 79 "movq 40(%0), %%rax" "\n\t" 80 "movupd 48(%0), %%xmm0" "\n\t" 81 "movw 64(%0), %%cx" "\n\t" 82 "pcmpistri $0x4B, %%xmm2, %%xmm13" "\n\t" 83 "movupd %%xmm0, 48(%0)" "\n\t" 84 "movw %%cx, 64(%0)" "\n\t" 85 "pushfq" "\n\t" 86 "popq %%r15" "\n\t" 87 "movq %%r15, 72(%0)" "\n\t" 88 : /*out*/ 89 : /*in*/"r"(blockC) 90 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 91 ); 92 printf(" istri $0x4B: "); 93 printf(" xmm0 "); 94 show_V128( (V128*)(blockC+48) ); 95 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 96 97 /* ---------------- ISTRI_0B ---------------- */ 98 memset(blockC, 0x55, 80); 99 memcpy(blockC + 0, &argL, 16); 100 memcpy(blockC + 16, &argR, 16); 101 memcpy(blockC + 24, &rdxIN, 8); 102 memcpy(blockC + 32, &raxIN, 8); 103 memcpy(blockC + 40, &rdxIN, 8); 104 __asm__ __volatile__( 105 "movupd 0(%0), %%xmm2" "\n\t" 106 "movupd 16(%0), %%xmm13" "\n\t" 107 "movq 32(%0), %%rdx" "\n\t" 108 "movq 40(%0), %%rax" "\n\t" 109 "movupd 48(%0), %%xmm0" "\n\t" 110 "movw 64(%0), %%cx" "\n\t" 111 "pcmpistri $0x0B, %%xmm2, %%xmm13" "\n\t" 112 "movupd %%xmm0, 48(%0)" "\n\t" 113 "movw %%cx, 64(%0)" "\n\t" 114 "pushfq" "\n\t" 115 "popq %%r15" "\n\t" 116 "movq %%r15, 72(%0)" "\n\t" 117 : /*out*/ 118 : /*in*/"r"(blockC) 119 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 120 ); 121 printf(" istri $0x0B: "); 122 printf(" xmm0 "); 123 show_V128( (V128*)(blockC+48) ); 124 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 125 126 /* ---------------- ISTRM_4B ---------------- */ 127 memset(blockC, 0x55, 80); 128 memcpy(blockC + 0, &argL, 16); 129 memcpy(blockC + 16, &argR, 16); 130 memcpy(blockC + 24, &rdxIN, 8); 131 memcpy(blockC + 32, &raxIN, 8); 132 memcpy(blockC + 40, &rdxIN, 8); 133 __asm__ __volatile__( 134 "movupd 0(%0), %%xmm2" "\n\t" 135 "movupd 16(%0), %%xmm13" "\n\t" 136 "movq 32(%0), %%rdx" "\n\t" 137 "movq 40(%0), %%rax" "\n\t" 138 "movupd 48(%0), %%xmm0" "\n\t" 139 "movw 64(%0), %%cx" "\n\t" 140 "pcmpistrm $0x4B, %%xmm2, %%xmm13" "\n\t" 141 "movupd %%xmm0, 48(%0)" "\n\t" 142 "movw %%cx, 64(%0)" "\n\t" 143 "pushfq" "\n\t" 144 "popq %%r15" "\n\t" 145 "movq %%r15, 72(%0)" "\n\t" 146 : /*out*/ 147 : /*in*/"r"(blockC) 148 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 149 ); 150 printf(" istrm $0x4B: "); 151 printf(" xmm0 "); 152 show_V128( (V128*)(blockC+48) ); 153 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 154 155 /* ---------------- ISTRM_0B ---------------- */ 156 memset(blockC, 0x55, 80); 157 memcpy(blockC + 0, &argL, 16); 158 memcpy(blockC + 16, &argR, 16); 159 memcpy(blockC + 24, &rdxIN, 8); 160 memcpy(blockC + 32, &raxIN, 8); 161 memcpy(blockC + 40, &rdxIN, 8); 162 __asm__ __volatile__( 163 "movupd 0(%0), %%xmm2" "\n\t" 164 "movupd 16(%0), %%xmm13" "\n\t" 165 "movq 32(%0), %%rdx" "\n\t" 166 "movq 40(%0), %%rax" "\n\t" 167 "movupd 48(%0), %%xmm0" "\n\t" 168 "movw 64(%0), %%cx" "\n\t" 169 "pcmpistrm $0x0B, %%xmm2, %%xmm13" "\n\t" 170 "movupd %%xmm0, 48(%0)" "\n\t" 171 "movw %%cx, 64(%0)" "\n\t" 172 "pushfq" "\n\t" 173 "popq %%r15" "\n\t" 174 "movq %%r15, 72(%0)" "\n\t" 175 : /*out*/ 176 : /*in*/"r"(blockC) 177 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 178 ); 179 printf(" istrm $0x0B: "); 180 printf(" xmm0 "); 181 show_V128( (V128*)(blockC+48) ); 182 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 183 184 /* ---------------- ESTRI_4B ---------------- */ 185 memset(blockC, 0x55, 80); 186 memcpy(blockC + 0, &argL, 16); 187 memcpy(blockC + 16, &argR, 16); 188 memcpy(blockC + 24, &rdxIN, 8); 189 memcpy(blockC + 32, &raxIN, 8); 190 memcpy(blockC + 40, &rdxIN, 8); 191 __asm__ __volatile__( 192 "movupd 0(%0), %%xmm2" "\n\t" 193 "movupd 16(%0), %%xmm13" "\n\t" 194 "movq 32(%0), %%rdx" "\n\t" 195 "movq 40(%0), %%rax" "\n\t" 196 "movupd 48(%0), %%xmm0" "\n\t" 197 "movw 64(%0), %%cx" "\n\t" 198 "pcmpestri $0x4B, %%xmm2, %%xmm13" "\n\t" 199 "movupd %%xmm0, 48(%0)" "\n\t" 200 "movw %%cx, 64(%0)" "\n\t" 201 "pushfq" "\n\t" 202 "popq %%r15" "\n\t" 203 "movq %%r15, 72(%0)" "\n\t" 204 : /*out*/ 205 : /*in*/"r"(blockC) 206 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 207 ); 208 printf(" estri $0x4B: "); 209 printf(" xmm0 "); 210 show_V128( (V128*)(blockC+48) ); 211 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 212 213 /* ---------------- ESTRI_0B ---------------- */ 214 memset(blockC, 0x55, 80); 215 memcpy(blockC + 0, &argL, 16); 216 memcpy(blockC + 16, &argR, 16); 217 memcpy(blockC + 24, &rdxIN, 8); 218 memcpy(blockC + 32, &raxIN, 8); 219 memcpy(blockC + 40, &rdxIN, 8); 220 __asm__ __volatile__( 221 "movupd 0(%0), %%xmm2" "\n\t" 222 "movupd 16(%0), %%xmm13" "\n\t" 223 "movq 32(%0), %%rdx" "\n\t" 224 "movq 40(%0), %%rax" "\n\t" 225 "movupd 48(%0), %%xmm0" "\n\t" 226 "movw 64(%0), %%cx" "\n\t" 227 "pcmpestri $0x0B, %%xmm2, %%xmm13" "\n\t" 228 "movupd %%xmm0, 48(%0)" "\n\t" 229 "movw %%cx, 64(%0)" "\n\t" 230 "pushfq" "\n\t" 231 "popq %%r15" "\n\t" 232 "movq %%r15, 72(%0)" "\n\t" 233 : /*out*/ 234 : /*in*/"r"(blockC) 235 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 236 ); 237 printf(" estri $0x0B: "); 238 printf(" xmm0 "); 239 show_V128( (V128*)(blockC+48) ); 240 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 241 242 /* ---------------- ESTRM_4B ---------------- */ 243 memset(blockC, 0x55, 80); 244 memcpy(blockC + 0, &argL, 16); 245 memcpy(blockC + 16, &argR, 16); 246 memcpy(blockC + 24, &rdxIN, 8); 247 memcpy(blockC + 32, &raxIN, 8); 248 memcpy(blockC + 40, &rdxIN, 8); 249 __asm__ __volatile__( 250 "movupd 0(%0), %%xmm2" "\n\t" 251 "movupd 16(%0), %%xmm13" "\n\t" 252 "movq 32(%0), %%rdx" "\n\t" 253 "movq 40(%0), %%rax" "\n\t" 254 "movupd 48(%0), %%xmm0" "\n\t" 255 "movw 64(%0), %%cx" "\n\t" 256 "pcmpestrm $0x4B, %%xmm2, %%xmm13" "\n\t" 257 "movupd %%xmm0, 48(%0)" "\n\t" 258 "movw %%cx, 64(%0)" "\n\t" 259 "pushfq" "\n\t" 260 "popq %%r15" "\n\t" 261 "movq %%r15, 72(%0)" "\n\t" 262 : /*out*/ 263 : /*in*/"r"(blockC) 264 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 265 ); 266 printf(" estrm $0x4B: "); 267 printf(" xmm0 "); 268 show_V128( (V128*)(blockC+48) ); 269 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 270 271 /* ---------------- ESTRM_0B ---------------- */ 272 memset(blockC, 0x55, 80); 273 memcpy(blockC + 0, &argL, 16); 274 memcpy(blockC + 16, &argR, 16); 275 memcpy(blockC + 24, &rdxIN, 8); 276 memcpy(blockC + 32, &raxIN, 8); 277 memcpy(blockC + 40, &rdxIN, 8); 278 __asm__ __volatile__( 279 "movupd 0(%0), %%xmm2" "\n\t" 280 "movupd 16(%0), %%xmm13" "\n\t" 281 "movq 32(%0), %%rdx" "\n\t" 282 "movq 40(%0), %%rax" "\n\t" 283 "movupd 48(%0), %%xmm0" "\n\t" 284 "movw 64(%0), %%cx" "\n\t" 285 "pcmpestrm $0x0B, %%xmm2, %%xmm13" "\n\t" 286 "movupd %%xmm0, 48(%0)" "\n\t" 287 "movw %%cx, 64(%0)" "\n\t" 288 "pushfq" "\n\t" 289 "popq %%r15" "\n\t" 290 "movq %%r15, 72(%0)" "\n\t" 291 : /*out*/ 292 : /*in*/"r"(blockC) 293 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 294 ); 295 printf(" estrm $0x0B: "); 296 printf(" xmm0 "); 297 show_V128( (V128*)(blockC+48) ); 298 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 299 300 301 302 303 } 304 305 int main ( void ) 306 { 307 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 ); 308 one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 ); 309 310 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); 311 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); 312 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); 313 314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); 316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); 317 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); 318 319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); 320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); 321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); 322 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); 323 324 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 325 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); 326 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); 327 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); 328 329 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); 330 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); 331 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); 332 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); 333 334 return 0; 335 } 336