1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "cache.h" 18 19 #ifndef MEMSET 20 # define MEMSET android_memset16 21 #endif 22 23 #ifndef L 24 # define L(label) .L##label 25 #endif 26 27 #ifndef ALIGN 28 # define ALIGN(n) .p2align n 29 #endif 30 31 #ifndef cfi_startproc 32 # define cfi_startproc .cfi_startproc 33 #endif 34 35 #ifndef cfi_endproc 36 # define cfi_endproc .cfi_endproc 37 #endif 38 39 #ifndef cfi_rel_offset 40 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 41 #endif 42 43 #ifndef cfi_restore 44 # define cfi_restore(reg) .cfi_restore reg 45 #endif 46 47 #ifndef cfi_adjust_cfa_offset 48 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 49 #endif 50 51 #ifndef ENTRY 52 # define ENTRY(name) \ 53 .type name, @function; \ 54 .globl name; \ 55 .p2align 4; \ 56 name: \ 57 cfi_startproc 58 #endif 59 60 #ifndef END 61 # define END(name) \ 62 cfi_endproc; \ 63 .size name, .-name 64 #endif 65 66 #define CFI_PUSH(REG) \ 67 cfi_adjust_cfa_offset (4); \ 68 cfi_rel_offset (REG, 0) 69 70 #define CFI_POP(REG) \ 71 cfi_adjust_cfa_offset (-4); \ 72 cfi_restore (REG) 73 74 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 75 #define POP(REG) popl REG; CFI_POP (REG) 76 77 #ifdef USE_AS_BZERO16 78 # define DEST PARMS 79 # define LEN DEST+4 80 # define SETRTNVAL 81 #else 82 # define DEST PARMS 83 # define CHR DEST+4 84 # define LEN CHR+4 85 # define SETRTNVAL movl DEST(%esp), %eax 86 #endif 87 88 #if (defined SHARED || defined __PIC__) 89 # define ENTRANCE PUSH (%ebx); 90 # define RETURN_END POP (%ebx); ret 91 # define RETURN RETURN_END; CFI_PUSH (%ebx) 92 # define PARMS 8 /* Preserve EBX. */ 93 # define JMPTBL(I, B) I - B 94 95 /* Load an entry in a jump table into EBX and branch to it. TABLE is a 96 jump table with relative offsets. */ 97 # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 98 /* We first load PC into EBX. */ \ 99 call __x86.get_pc_thunk.bx; \ 100 /* Get the address of the jump table. */ \ 101 add $(TABLE - .), %ebx; \ 102 /* Get the entry and convert the relative offset to the \ 103 absolute address. */ \ 104 add (%ebx,%ecx,4), %ebx; \ 105 /* We loaded the jump table and adjuested EDX. Go. */ \ 106 jmp *%ebx 107 108 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 109 .globl __x86.get_pc_thunk.bx 110 .hidden __x86.get_pc_thunk.bx 111 ALIGN (4) 112 .type __x86.get_pc_thunk.bx,@function 113 __x86.get_pc_thunk.bx: 114 movl (%esp), %ebx 115 ret 116 #else 117 # define ENTRANCE 118 # define RETURN_END ret 119 # define RETURN RETURN_END 120 # define PARMS 4 121 # define JMPTBL(I, B) I 122 123 /* Branch to an entry in a jump table. TABLE is a jump table with 124 absolute offsets. */ 125 # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 126 jmp *TABLE(,%ecx,4) 127 #endif 128 129 .section .text.sse2,"ax",@progbits 130 ALIGN (4) 131 ENTRY (MEMSET) 132 ENTRANCE 133 134 movl LEN(%esp), %ecx 135 shr $1, %ecx 136 #ifdef USE_AS_BZERO16 137 xor %eax, %eax 138 #else 139 movzwl CHR(%esp), %eax 140 mov %eax, %edx 141 shl $16, %eax 142 or %edx, %eax 143 #endif 144 movl DEST(%esp), %edx 145 cmp $32, %ecx 146 jae L(32wordsormore) 147 148 L(write_less32words): 149 lea (%edx, %ecx, 2), %edx 150 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words)) 151 152 153 .pushsection .rodata.sse2,"a",@progbits 154 ALIGN (2) 155 L(table_less32words): 156 .int JMPTBL (L(write_0words), L(table_less32words)) 157 .int JMPTBL (L(write_1words), L(table_less32words)) 158 .int JMPTBL (L(write_2words), L(table_less32words)) 159 .int JMPTBL (L(write_3words), L(table_less32words)) 160 .int JMPTBL (L(write_4words), L(table_less32words)) 161 .int JMPTBL (L(write_5words), L(table_less32words)) 162 .int JMPTBL (L(write_6words), L(table_less32words)) 163 .int JMPTBL (L(write_7words), L(table_less32words)) 164 .int JMPTBL (L(write_8words), L(table_less32words)) 165 .int JMPTBL (L(write_9words), L(table_less32words)) 166 .int JMPTBL (L(write_10words), L(table_less32words)) 167 .int JMPTBL (L(write_11words), L(table_less32words)) 168 .int JMPTBL (L(write_12words), L(table_less32words)) 169 .int JMPTBL (L(write_13words), L(table_less32words)) 170 .int JMPTBL (L(write_14words), L(table_less32words)) 171 .int JMPTBL (L(write_15words), L(table_less32words)) 172 .int JMPTBL (L(write_16words), L(table_less32words)) 173 .int JMPTBL (L(write_17words), L(table_less32words)) 174 .int JMPTBL (L(write_18words), L(table_less32words)) 175 .int JMPTBL (L(write_19words), L(table_less32words)) 176 .int JMPTBL (L(write_20words), L(table_less32words)) 177 .int JMPTBL (L(write_21words), L(table_less32words)) 178 .int JMPTBL (L(write_22words), L(table_less32words)) 179 .int JMPTBL (L(write_23words), L(table_less32words)) 180 .int JMPTBL (L(write_24words), L(table_less32words)) 181 .int JMPTBL (L(write_25words), L(table_less32words)) 182 .int JMPTBL (L(write_26words), L(table_less32words)) 183 .int JMPTBL (L(write_27words), L(table_less32words)) 184 .int JMPTBL (L(write_28words), L(table_less32words)) 185 .int JMPTBL (L(write_29words), L(table_less32words)) 186 .int JMPTBL (L(write_30words), L(table_less32words)) 187 .int JMPTBL (L(write_31words), L(table_less32words)) 188 .popsection 189 190 ALIGN (4) 191 L(write_28words): 192 movl %eax, -56(%edx) 193 movl %eax, -52(%edx) 194 L(write_24words): 195 movl %eax, -48(%edx) 196 movl %eax, -44(%edx) 197 L(write_20words): 198 movl %eax, -40(%edx) 199 movl %eax, -36(%edx) 200 L(write_16words): 201 movl %eax, -32(%edx) 202 movl %eax, -28(%edx) 203 L(write_12words): 204 movl %eax, -24(%edx) 205 movl %eax, -20(%edx) 206 L(write_8words): 207 movl %eax, -16(%edx) 208 movl %eax, -12(%edx) 209 L(write_4words): 210 movl %eax, -8(%edx) 211 movl %eax, -4(%edx) 212 L(write_0words): 213 SETRTNVAL 214 RETURN 215 216 ALIGN (4) 217 L(write_29words): 218 movl %eax, -58(%edx) 219 movl %eax, -54(%edx) 220 L(write_25words): 221 movl %eax, -50(%edx) 222 movl %eax, -46(%edx) 223 L(write_21words): 224 movl %eax, -42(%edx) 225 movl %eax, -38(%edx) 226 L(write_17words): 227 movl %eax, -34(%edx) 228 movl %eax, -30(%edx) 229 L(write_13words): 230 movl %eax, -26(%edx) 231 movl %eax, -22(%edx) 232 L(write_9words): 233 movl %eax, -18(%edx) 234 movl %eax, -14(%edx) 235 L(write_5words): 236 movl %eax, -10(%edx) 237 movl %eax, -6(%edx) 238 L(write_1words): 239 mov %ax, -2(%edx) 240 SETRTNVAL 241 RETURN 242 243 ALIGN (4) 244 L(write_30words): 245 movl %eax, -60(%edx) 246 movl %eax, -56(%edx) 247 L(write_26words): 248 movl %eax, -52(%edx) 249 movl %eax, -48(%edx) 250 L(write_22words): 251 movl %eax, -44(%edx) 252 movl %eax, -40(%edx) 253 L(write_18words): 254 movl %eax, -36(%edx) 255 movl %eax, -32(%edx) 256 L(write_14words): 257 movl %eax, -28(%edx) 258 movl %eax, -24(%edx) 259 L(write_10words): 260 movl %eax, -20(%edx) 261 movl %eax, -16(%edx) 262 L(write_6words): 263 movl %eax, -12(%edx) 264 movl %eax, -8(%edx) 265 L(write_2words): 266 movl %eax, -4(%edx) 267 SETRTNVAL 268 RETURN 269 270 ALIGN (4) 271 L(write_31words): 272 movl %eax, -62(%edx) 273 movl %eax, -58(%edx) 274 L(write_27words): 275 movl %eax, -54(%edx) 276 movl %eax, -50(%edx) 277 L(write_23words): 278 movl %eax, -46(%edx) 279 movl %eax, -42(%edx) 280 L(write_19words): 281 movl %eax, -38(%edx) 282 movl %eax, -34(%edx) 283 L(write_15words): 284 movl %eax, -30(%edx) 285 movl %eax, -26(%edx) 286 L(write_11words): 287 movl %eax, -22(%edx) 288 movl %eax, -18(%edx) 289 L(write_7words): 290 movl %eax, -14(%edx) 291 movl %eax, -10(%edx) 292 L(write_3words): 293 movl %eax, -6(%edx) 294 movw %ax, -2(%edx) 295 SETRTNVAL 296 RETURN 297 298 ALIGN (4) 299 300 L(32wordsormore): 301 shl $1, %ecx 302 test $0x01, %edx 303 jz L(aligned2bytes) 304 mov %eax, (%edx) 305 mov %eax, -4(%edx, %ecx) 306 sub $2, %ecx 307 add $1, %edx 308 rol $8, %eax 309 L(aligned2bytes): 310 #ifdef USE_AS_BZERO16 311 pxor %xmm0, %xmm0 312 #else 313 movd %eax, %xmm0 314 pshufd $0, %xmm0, %xmm0 315 #endif 316 testl $0xf, %edx 317 jz L(aligned_16) 318 /* ECX > 32 and EDX is not 16 byte aligned. */ 319 L(not_aligned_16): 320 movdqu %xmm0, (%edx) 321 movl %edx, %eax 322 and $-16, %edx 323 add $16, %edx 324 sub %edx, %eax 325 add %eax, %ecx 326 movd %xmm0, %eax 327 328 ALIGN (4) 329 L(aligned_16): 330 cmp $128, %ecx 331 jae L(128bytesormore) 332 333 L(aligned_16_less128bytes): 334 add %ecx, %edx 335 shr $1, %ecx 336 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 337 338 ALIGN (4) 339 L(128bytesormore): 340 #ifdef SHARED_CACHE_SIZE 341 PUSH (%ebx) 342 mov $SHARED_CACHE_SIZE, %ebx 343 #else 344 # if (defined SHARED || defined __PIC__) 345 call __x86.get_pc_thunk.bx 346 add $_GLOBAL_OFFSET_TABLE_, %ebx 347 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 348 # else 349 PUSH (%ebx) 350 mov __x86_shared_cache_size, %ebx 351 # endif 352 #endif 353 cmp %ebx, %ecx 354 jae L(128bytesormore_nt_start) 355 356 357 #ifdef DATA_CACHE_SIZE 358 POP (%ebx) 359 # define RESTORE_EBX_STATE CFI_PUSH (%ebx) 360 cmp $DATA_CACHE_SIZE, %ecx 361 #else 362 # if (defined SHARED || defined __PIC__) 363 # define RESTORE_EBX_STATE 364 call __x86.get_pc_thunk.bx 365 add $_GLOBAL_OFFSET_TABLE_, %ebx 366 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 367 # else 368 POP (%ebx) 369 # define RESTORE_EBX_STATE CFI_PUSH (%ebx) 370 cmp __x86_data_cache_size, %ecx 371 # endif 372 #endif 373 374 jae L(128bytes_L2_normal) 375 subl $128, %ecx 376 L(128bytesormore_normal): 377 sub $128, %ecx 378 movdqa %xmm0, (%edx) 379 movdqa %xmm0, 0x10(%edx) 380 movdqa %xmm0, 0x20(%edx) 381 movdqa %xmm0, 0x30(%edx) 382 movdqa %xmm0, 0x40(%edx) 383 movdqa %xmm0, 0x50(%edx) 384 movdqa %xmm0, 0x60(%edx) 385 movdqa %xmm0, 0x70(%edx) 386 lea 128(%edx), %edx 387 jb L(128bytesless_normal) 388 389 390 sub $128, %ecx 391 movdqa %xmm0, (%edx) 392 movdqa %xmm0, 0x10(%edx) 393 movdqa %xmm0, 0x20(%edx) 394 movdqa %xmm0, 0x30(%edx) 395 movdqa %xmm0, 0x40(%edx) 396 movdqa %xmm0, 0x50(%edx) 397 movdqa %xmm0, 0x60(%edx) 398 movdqa %xmm0, 0x70(%edx) 399 lea 128(%edx), %edx 400 jae L(128bytesormore_normal) 401 402 L(128bytesless_normal): 403 lea 128(%ecx), %ecx 404 add %ecx, %edx 405 shr $1, %ecx 406 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 407 408 ALIGN (4) 409 L(128bytes_L2_normal): 410 prefetcht0 0x380(%edx) 411 prefetcht0 0x3c0(%edx) 412 sub $128, %ecx 413 movdqa %xmm0, (%edx) 414 movaps %xmm0, 0x10(%edx) 415 movaps %xmm0, 0x20(%edx) 416 movaps %xmm0, 0x30(%edx) 417 movaps %xmm0, 0x40(%edx) 418 movaps %xmm0, 0x50(%edx) 419 movaps %xmm0, 0x60(%edx) 420 movaps %xmm0, 0x70(%edx) 421 add $128, %edx 422 cmp $128, %ecx 423 jae L(128bytes_L2_normal) 424 425 L(128bytesless_L2_normal): 426 add %ecx, %edx 427 shr $1, %ecx 428 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 429 430 RESTORE_EBX_STATE 431 L(128bytesormore_nt_start): 432 sub %ebx, %ecx 433 mov %ebx, %eax 434 and $0x7f, %eax 435 add %eax, %ecx 436 movd %xmm0, %eax 437 ALIGN (4) 438 L(128bytesormore_shared_cache_loop): 439 prefetcht0 0x3c0(%edx) 440 prefetcht0 0x380(%edx) 441 sub $0x80, %ebx 442 movdqa %xmm0, (%edx) 443 movdqa %xmm0, 0x10(%edx) 444 movdqa %xmm0, 0x20(%edx) 445 movdqa %xmm0, 0x30(%edx) 446 movdqa %xmm0, 0x40(%edx) 447 movdqa %xmm0, 0x50(%edx) 448 movdqa %xmm0, 0x60(%edx) 449 movdqa %xmm0, 0x70(%edx) 450 add $0x80, %edx 451 cmp $0x80, %ebx 452 jae L(128bytesormore_shared_cache_loop) 453 cmp $0x80, %ecx 454 jb L(shared_cache_loop_end) 455 ALIGN (4) 456 L(128bytesormore_nt): 457 sub $0x80, %ecx 458 movntdq %xmm0, (%edx) 459 movntdq %xmm0, 0x10(%edx) 460 movntdq %xmm0, 0x20(%edx) 461 movntdq %xmm0, 0x30(%edx) 462 movntdq %xmm0, 0x40(%edx) 463 movntdq %xmm0, 0x50(%edx) 464 movntdq %xmm0, 0x60(%edx) 465 movntdq %xmm0, 0x70(%edx) 466 add $0x80, %edx 467 cmp $0x80, %ecx 468 jae L(128bytesormore_nt) 469 sfence 470 L(shared_cache_loop_end): 471 #if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 472 POP (%ebx) 473 #endif 474 add %ecx, %edx 475 shr $1, %ecx 476 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 477 478 479 .pushsection .rodata.sse2,"a",@progbits 480 ALIGN (2) 481 L(table_16_128bytes): 482 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 546 .popsection 547 548 549 ALIGN (4) 550 L(aligned_16_112bytes): 551 movdqa %xmm0, -112(%edx) 552 L(aligned_16_96bytes): 553 movdqa %xmm0, -96(%edx) 554 L(aligned_16_80bytes): 555 movdqa %xmm0, -80(%edx) 556 L(aligned_16_64bytes): 557 movdqa %xmm0, -64(%edx) 558 L(aligned_16_48bytes): 559 movdqa %xmm0, -48(%edx) 560 L(aligned_16_32bytes): 561 movdqa %xmm0, -32(%edx) 562 L(aligned_16_16bytes): 563 movdqa %xmm0, -16(%edx) 564 L(aligned_16_0bytes): 565 SETRTNVAL 566 RETURN 567 568 569 ALIGN (4) 570 L(aligned_16_114bytes): 571 movdqa %xmm0, -114(%edx) 572 L(aligned_16_98bytes): 573 movdqa %xmm0, -98(%edx) 574 L(aligned_16_82bytes): 575 movdqa %xmm0, -82(%edx) 576 L(aligned_16_66bytes): 577 movdqa %xmm0, -66(%edx) 578 L(aligned_16_50bytes): 579 movdqa %xmm0, -50(%edx) 580 L(aligned_16_34bytes): 581 movdqa %xmm0, -34(%edx) 582 L(aligned_16_18bytes): 583 movdqa %xmm0, -18(%edx) 584 L(aligned_16_2bytes): 585 movw %ax, -2(%edx) 586 SETRTNVAL 587 RETURN 588 589 ALIGN (4) 590 L(aligned_16_116bytes): 591 movdqa %xmm0, -116(%edx) 592 L(aligned_16_100bytes): 593 movdqa %xmm0, -100(%edx) 594 L(aligned_16_84bytes): 595 movdqa %xmm0, -84(%edx) 596 L(aligned_16_68bytes): 597 movdqa %xmm0, -68(%edx) 598 L(aligned_16_52bytes): 599 movdqa %xmm0, -52(%edx) 600 L(aligned_16_36bytes): 601 movdqa %xmm0, -36(%edx) 602 L(aligned_16_20bytes): 603 movdqa %xmm0, -20(%edx) 604 L(aligned_16_4bytes): 605 movl %eax, -4(%edx) 606 SETRTNVAL 607 RETURN 608 609 610 ALIGN (4) 611 L(aligned_16_118bytes): 612 movdqa %xmm0, -118(%edx) 613 L(aligned_16_102bytes): 614 movdqa %xmm0, -102(%edx) 615 L(aligned_16_86bytes): 616 movdqa %xmm0, -86(%edx) 617 L(aligned_16_70bytes): 618 movdqa %xmm0, -70(%edx) 619 L(aligned_16_54bytes): 620 movdqa %xmm0, -54(%edx) 621 L(aligned_16_38bytes): 622 movdqa %xmm0, -38(%edx) 623 L(aligned_16_22bytes): 624 movdqa %xmm0, -22(%edx) 625 L(aligned_16_6bytes): 626 movl %eax, -6(%edx) 627 movw %ax, -2(%edx) 628 SETRTNVAL 629 RETURN 630 631 632 ALIGN (4) 633 L(aligned_16_120bytes): 634 movdqa %xmm0, -120(%edx) 635 L(aligned_16_104bytes): 636 movdqa %xmm0, -104(%edx) 637 L(aligned_16_88bytes): 638 movdqa %xmm0, -88(%edx) 639 L(aligned_16_72bytes): 640 movdqa %xmm0, -72(%edx) 641 L(aligned_16_56bytes): 642 movdqa %xmm0, -56(%edx) 643 L(aligned_16_40bytes): 644 movdqa %xmm0, -40(%edx) 645 L(aligned_16_24bytes): 646 movdqa %xmm0, -24(%edx) 647 L(aligned_16_8bytes): 648 movq %xmm0, -8(%edx) 649 SETRTNVAL 650 RETURN 651 652 653 ALIGN (4) 654 L(aligned_16_122bytes): 655 movdqa %xmm0, -122(%edx) 656 L(aligned_16_106bytes): 657 movdqa %xmm0, -106(%edx) 658 L(aligned_16_90bytes): 659 movdqa %xmm0, -90(%edx) 660 L(aligned_16_74bytes): 661 movdqa %xmm0, -74(%edx) 662 L(aligned_16_58bytes): 663 movdqa %xmm0, -58(%edx) 664 L(aligned_16_42bytes): 665 movdqa %xmm0, -42(%edx) 666 L(aligned_16_26bytes): 667 movdqa %xmm0, -26(%edx) 668 L(aligned_16_10bytes): 669 movq %xmm0, -10(%edx) 670 movw %ax, -2(%edx) 671 SETRTNVAL 672 RETURN 673 674 675 ALIGN (4) 676 L(aligned_16_124bytes): 677 movdqa %xmm0, -124(%edx) 678 L(aligned_16_108bytes): 679 movdqa %xmm0, -108(%edx) 680 L(aligned_16_92bytes): 681 movdqa %xmm0, -92(%edx) 682 L(aligned_16_76bytes): 683 movdqa %xmm0, -76(%edx) 684 L(aligned_16_60bytes): 685 movdqa %xmm0, -60(%edx) 686 L(aligned_16_44bytes): 687 movdqa %xmm0, -44(%edx) 688 L(aligned_16_28bytes): 689 movdqa %xmm0, -28(%edx) 690 L(aligned_16_12bytes): 691 movq %xmm0, -12(%edx) 692 movl %eax, -4(%edx) 693 SETRTNVAL 694 RETURN 695 696 697 ALIGN (4) 698 L(aligned_16_126bytes): 699 movdqa %xmm0, -126(%edx) 700 L(aligned_16_110bytes): 701 movdqa %xmm0, -110(%edx) 702 L(aligned_16_94bytes): 703 movdqa %xmm0, -94(%edx) 704 L(aligned_16_78bytes): 705 movdqa %xmm0, -78(%edx) 706 L(aligned_16_62bytes): 707 movdqa %xmm0, -62(%edx) 708 L(aligned_16_46bytes): 709 movdqa %xmm0, -46(%edx) 710 L(aligned_16_30bytes): 711 movdqa %xmm0, -30(%edx) 712 L(aligned_16_14bytes): 713 movq %xmm0, -14(%edx) 714 movl %eax, -6(%edx) 715 movw %ax, -2(%edx) 716 SETRTNVAL 717 RETURN 718 719 END (MEMSET) 720