1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Contributed by: Intel Corporation 18 */ 19 20 #ifndef L 21 # define L(label) .L##label 22 #endif 23 24 #ifndef ALIGN 25 # define ALIGN(n) .p2align n 26 #endif 27 28 #ifndef cfi_startproc 29 # define cfi_startproc .cfi_startproc 30 #endif 31 32 #ifndef cfi_endproc 33 # define cfi_endproc .cfi_endproc 34 #endif 35 36 #ifndef cfi_rel_offset 37 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 38 #endif 39 40 #ifndef cfi_restore 41 # define cfi_restore(reg) .cfi_restore reg 42 #endif 43 44 #ifndef cfi_adjust_cfa_offset 45 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 46 #endif 47 48 #ifndef ENTRY 49 # define ENTRY(name) \ 50 .type name, @function; \ 51 .globl name; \ 52 .p2align 4; \ 53 name: \ 54 cfi_startproc 55 #endif 56 57 #ifndef END 58 # define END(name) \ 59 cfi_endproc; \ 60 .size name, .-name 61 #endif 62 63 #define CFI_PUSH(REG) \ 64 cfi_adjust_cfa_offset (4); \ 65 cfi_rel_offset (REG, 0) 66 67 #define CFI_POP(REG) \ 68 cfi_adjust_cfa_offset (-4); \ 69 cfi_restore (REG) 70 71 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 72 #define POP(REG) popl REG; CFI_POP (REG) 73 74 #ifdef USE_AS_BZERO16 75 # define DEST PARMS 76 # define LEN DEST+4 77 #else 78 # define DEST PARMS 79 # define CHR DEST+4 80 # define LEN CHR+4 81 #endif 82 83 #if 1 84 # define SETRTNVAL 85 #else 86 # define SETRTNVAL movl DEST(%esp), %eax 87 #endif 88 89 #ifdef SHARED 90 # define ENTRANCE PUSH (%ebx); 91 # define RETURN_END POP (%ebx); ret 92 # define RETURN RETURN_END; CFI_PUSH (%ebx) 93 # define PARMS 8 /* Preserve EBX. */ 94 # define JMPTBL(I, B) I - B 95 96 /* Load an entry in a jump table into EBX and branch to it. TABLE is a 97 jump table with relative offsets. */ 98 # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 99 /* We first load PC into EBX. */ \ 100 call __i686.get_pc_thunk.bx; \ 101 /* Get the address of the jump table. */ \ 102 add $(TABLE - .), %ebx; \ 103 /* Get the entry and convert the relative offset to the \ 104 absolute address. */ \ 105 add (%ebx,%ecx,4), %ebx; \ 106 /* We loaded the jump table and adjuested EDX. Go. */ \ 107 jmp *%ebx 108 109 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits 110 .globl __i686.get_pc_thunk.bx 111 .hidden __i686.get_pc_thunk.bx 112 ALIGN (4) 113 .type __i686.get_pc_thunk.bx,@function 114 __i686.get_pc_thunk.bx: 115 movl (%esp), %ebx 116 ret 117 #else 118 # define ENTRANCE 119 # define RETURN_END ret 120 # define RETURN RETURN_END 121 # define PARMS 4 122 # define JMPTBL(I, B) I 123 124 /* Branch to an entry in a jump table. TABLE is a jump table with 125 absolute offsets. */ 126 # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 127 jmp *TABLE(,%ecx,4) 128 #endif 129 130 .section .text.sse2,"ax",@progbits 131 ALIGN (4) 132 ENTRY (sse2_memset16_atom) 133 ENTRANCE 134 135 movl LEN(%esp), %ecx 136 #ifdef USE_AS_ANDROID 137 shr $1, %ecx 138 #endif 139 #ifdef USE_AS_BZERO16 140 xor %eax, %eax 141 #else 142 movzwl CHR(%esp), %eax 143 mov %eax, %edx 144 shl $16, %eax 145 or %edx, %eax 146 #endif 147 movl DEST(%esp), %edx 148 cmp $32, %ecx 149 jae L(32wordsormore) 150 151 L(write_less32words): 152 lea (%edx, %ecx, 2), %edx 153 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words)) 154 155 156 .pushsection .rodata.sse2,"a",@progbits 157 ALIGN (2) 158 L(table_less32words): 159 .int JMPTBL (L(write_0words), L(table_less32words)) 160 .int JMPTBL (L(write_1words), L(table_less32words)) 161 .int JMPTBL (L(write_2words), L(table_less32words)) 162 .int JMPTBL (L(write_3words), L(table_less32words)) 163 .int JMPTBL (L(write_4words), L(table_less32words)) 164 .int JMPTBL (L(write_5words), L(table_less32words)) 165 .int JMPTBL (L(write_6words), L(table_less32words)) 166 .int JMPTBL (L(write_7words), L(table_less32words)) 167 .int JMPTBL (L(write_8words), L(table_less32words)) 168 .int JMPTBL (L(write_9words), L(table_less32words)) 169 .int JMPTBL (L(write_10words), L(table_less32words)) 170 .int JMPTBL (L(write_11words), L(table_less32words)) 171 .int JMPTBL (L(write_12words), L(table_less32words)) 172 .int JMPTBL (L(write_13words), L(table_less32words)) 173 .int JMPTBL (L(write_14words), L(table_less32words)) 174 .int JMPTBL (L(write_15words), L(table_less32words)) 175 .int JMPTBL (L(write_16words), L(table_less32words)) 176 .int JMPTBL (L(write_17words), L(table_less32words)) 177 .int JMPTBL (L(write_18words), L(table_less32words)) 178 .int JMPTBL (L(write_19words), L(table_less32words)) 179 .int JMPTBL (L(write_20words), L(table_less32words)) 180 .int JMPTBL (L(write_21words), L(table_less32words)) 181 .int JMPTBL (L(write_22words), L(table_less32words)) 182 .int JMPTBL (L(write_23words), L(table_less32words)) 183 .int JMPTBL (L(write_24words), L(table_less32words)) 184 .int JMPTBL (L(write_25words), L(table_less32words)) 185 .int JMPTBL (L(write_26words), L(table_less32words)) 186 .int JMPTBL (L(write_27words), L(table_less32words)) 187 .int JMPTBL (L(write_28words), L(table_less32words)) 188 .int JMPTBL (L(write_29words), L(table_less32words)) 189 .int JMPTBL (L(write_30words), L(table_less32words)) 190 .int JMPTBL (L(write_31words), L(table_less32words)) 191 .popsection 192 193 ALIGN (4) 194 L(write_28words): 195 movl %eax, -56(%edx) 196 movl %eax, -52(%edx) 197 L(write_24words): 198 movl %eax, -48(%edx) 199 movl %eax, -44(%edx) 200 L(write_20words): 201 movl %eax, -40(%edx) 202 movl %eax, -36(%edx) 203 L(write_16words): 204 movl %eax, -32(%edx) 205 movl %eax, -28(%edx) 206 L(write_12words): 207 movl %eax, -24(%edx) 208 movl %eax, -20(%edx) 209 L(write_8words): 210 movl %eax, -16(%edx) 211 movl %eax, -12(%edx) 212 L(write_4words): 213 movl %eax, -8(%edx) 214 movl %eax, -4(%edx) 215 L(write_0words): 216 SETRTNVAL 217 RETURN 218 219 ALIGN (4) 220 L(write_29words): 221 movl %eax, -58(%edx) 222 movl %eax, -54(%edx) 223 L(write_25words): 224 movl %eax, -50(%edx) 225 movl %eax, -46(%edx) 226 L(write_21words): 227 movl %eax, -42(%edx) 228 movl %eax, -38(%edx) 229 L(write_17words): 230 movl %eax, -34(%edx) 231 movl %eax, -30(%edx) 232 L(write_13words): 233 movl %eax, -26(%edx) 234 movl %eax, -22(%edx) 235 L(write_9words): 236 movl %eax, -18(%edx) 237 movl %eax, -14(%edx) 238 L(write_5words): 239 movl %eax, -10(%edx) 240 movl %eax, -6(%edx) 241 L(write_1words): 242 mov %ax, -2(%edx) 243 SETRTNVAL 244 RETURN 245 246 ALIGN (4) 247 L(write_30words): 248 movl %eax, -60(%edx) 249 movl %eax, -56(%edx) 250 L(write_26words): 251 movl %eax, -52(%edx) 252 movl %eax, -48(%edx) 253 L(write_22words): 254 movl %eax, -44(%edx) 255 movl %eax, -40(%edx) 256 L(write_18words): 257 movl %eax, -36(%edx) 258 movl %eax, -32(%edx) 259 L(write_14words): 260 movl %eax, -28(%edx) 261 movl %eax, -24(%edx) 262 L(write_10words): 263 movl %eax, -20(%edx) 264 movl %eax, -16(%edx) 265 L(write_6words): 266 movl %eax, -12(%edx) 267 movl %eax, -8(%edx) 268 L(write_2words): 269 movl %eax, -4(%edx) 270 SETRTNVAL 271 RETURN 272 273 ALIGN (4) 274 L(write_31words): 275 movl %eax, -62(%edx) 276 movl %eax, -58(%edx) 277 L(write_27words): 278 movl %eax, -54(%edx) 279 movl %eax, -50(%edx) 280 L(write_23words): 281 movl %eax, -46(%edx) 282 movl %eax, -42(%edx) 283 L(write_19words): 284 movl %eax, -38(%edx) 285 movl %eax, -34(%edx) 286 L(write_15words): 287 movl %eax, -30(%edx) 288 movl %eax, -26(%edx) 289 L(write_11words): 290 movl %eax, -22(%edx) 291 movl %eax, -18(%edx) 292 L(write_7words): 293 movl %eax, -14(%edx) 294 movl %eax, -10(%edx) 295 L(write_3words): 296 movl %eax, -6(%edx) 297 movw %ax, -2(%edx) 298 SETRTNVAL 299 RETURN 300 301 ALIGN (4) 302 303 L(32wordsormore): 304 shl $1, %ecx 305 test $0x01, %edx 306 jz L(aligned2bytes) 307 mov %eax, (%edx) 308 mov %eax, -4(%edx, %ecx) 309 sub $2, %ecx 310 add $1, %edx 311 rol $8, %eax 312 L(aligned2bytes): 313 #ifdef USE_AS_BZERO16 314 pxor %xmm0, %xmm0 315 #else 316 movd %eax, %xmm0 317 pshufd $0, %xmm0, %xmm0 318 #endif 319 testl $0xf, %edx 320 jz L(aligned_16) 321 /* ECX > 32 and EDX is not 16 byte aligned. */ 322 L(not_aligned_16): 323 movdqu %xmm0, (%edx) 324 movl %edx, %eax 325 and $-16, %edx 326 add $16, %edx 327 sub %edx, %eax 328 add %eax, %ecx 329 movd %xmm0, %eax 330 331 ALIGN (4) 332 L(aligned_16): 333 cmp $128, %ecx 334 jae L(128bytesormore) 335 336 L(aligned_16_less128bytes): 337 add %ecx, %edx 338 shr $1, %ecx 339 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 340 341 ALIGN (4) 342 L(128bytesormore): 343 #ifdef SHARED_CACHE_SIZE 344 PUSH (%ebx) 345 mov $SHARED_CACHE_SIZE, %ebx 346 #else 347 # ifdef SHARED 348 call __i686.get_pc_thunk.bx 349 add $_GLOBAL_OFFSET_TABLE_, %ebx 350 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 351 # else 352 PUSH (%ebx) 353 mov __x86_shared_cache_size, %ebx 354 # endif 355 #endif 356 cmp %ebx, %ecx 357 jae L(128bytesormore_nt_start) 358 359 360 #ifdef DATA_CACHE_SIZE 361 POP (%ebx) 362 # define RESTORE_EBX_STATE CFI_PUSH (%ebx) 363 cmp $DATA_CACHE_SIZE, %ecx 364 #else 365 # ifdef SHARED 366 # define RESTORE_EBX_STATE 367 call __i686.get_pc_thunk.bx 368 add $_GLOBAL_OFFSET_TABLE_, %ebx 369 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 370 # else 371 POP (%ebx) 372 # define RESTORE_EBX_STATE CFI_PUSH (%ebx) 373 cmp __x86_data_cache_size, %ecx 374 # endif 375 #endif 376 377 jae L(128bytes_L2_normal) 378 subl $128, %ecx 379 L(128bytesormore_normal): 380 sub $128, %ecx 381 movdqa %xmm0, (%edx) 382 movdqa %xmm0, 0x10(%edx) 383 movdqa %xmm0, 0x20(%edx) 384 movdqa %xmm0, 0x30(%edx) 385 movdqa %xmm0, 0x40(%edx) 386 movdqa %xmm0, 0x50(%edx) 387 movdqa %xmm0, 0x60(%edx) 388 movdqa %xmm0, 0x70(%edx) 389 lea 128(%edx), %edx 390 jb L(128bytesless_normal) 391 392 393 sub $128, %ecx 394 movdqa %xmm0, (%edx) 395 movdqa %xmm0, 0x10(%edx) 396 movdqa %xmm0, 0x20(%edx) 397 movdqa %xmm0, 0x30(%edx) 398 movdqa %xmm0, 0x40(%edx) 399 movdqa %xmm0, 0x50(%edx) 400 movdqa %xmm0, 0x60(%edx) 401 movdqa %xmm0, 0x70(%edx) 402 lea 128(%edx), %edx 403 jae L(128bytesormore_normal) 404 405 L(128bytesless_normal): 406 lea 128(%ecx), %ecx 407 add %ecx, %edx 408 shr $1, %ecx 409 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 410 411 ALIGN (4) 412 L(128bytes_L2_normal): 413 prefetcht0 0x380(%edx) 414 prefetcht0 0x3c0(%edx) 415 sub $128, %ecx 416 movdqa %xmm0, (%edx) 417 movaps %xmm0, 0x10(%edx) 418 movaps %xmm0, 0x20(%edx) 419 movaps %xmm0, 0x30(%edx) 420 movaps %xmm0, 0x40(%edx) 421 movaps %xmm0, 0x50(%edx) 422 movaps %xmm0, 0x60(%edx) 423 movaps %xmm0, 0x70(%edx) 424 add $128, %edx 425 cmp $128, %ecx 426 jae L(128bytes_L2_normal) 427 428 L(128bytesless_L2_normal): 429 add %ecx, %edx 430 shr $1, %ecx 431 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 432 433 RESTORE_EBX_STATE 434 L(128bytesormore_nt_start): 435 sub %ebx, %ecx 436 mov %ebx, %eax 437 and $0x7f, %eax 438 add %eax, %ecx 439 movd %xmm0, %eax 440 ALIGN (4) 441 L(128bytesormore_shared_cache_loop): 442 prefetcht0 0x3c0(%edx) 443 prefetcht0 0x380(%edx) 444 sub $0x80, %ebx 445 movdqa %xmm0, (%edx) 446 movdqa %xmm0, 0x10(%edx) 447 movdqa %xmm0, 0x20(%edx) 448 movdqa %xmm0, 0x30(%edx) 449 movdqa %xmm0, 0x40(%edx) 450 movdqa %xmm0, 0x50(%edx) 451 movdqa %xmm0, 0x60(%edx) 452 movdqa %xmm0, 0x70(%edx) 453 add $0x80, %edx 454 cmp $0x80, %ebx 455 jae L(128bytesormore_shared_cache_loop) 456 cmp $0x80, %ecx 457 jb L(shared_cache_loop_end) 458 ALIGN (4) 459 L(128bytesormore_nt): 460 sub $0x80, %ecx 461 movntdq %xmm0, (%edx) 462 movntdq %xmm0, 0x10(%edx) 463 movntdq %xmm0, 0x20(%edx) 464 movntdq %xmm0, 0x30(%edx) 465 movntdq %xmm0, 0x40(%edx) 466 movntdq %xmm0, 0x50(%edx) 467 movntdq %xmm0, 0x60(%edx) 468 movntdq %xmm0, 0x70(%edx) 469 add $0x80, %edx 470 cmp $0x80, %ecx 471 jae L(128bytesormore_nt) 472 sfence 473 L(shared_cache_loop_end): 474 #if defined DATA_CACHE_SIZE || !defined SHARED 475 POP (%ebx) 476 #endif 477 add %ecx, %edx 478 shr $1, %ecx 479 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 480 481 482 .pushsection .rodata.sse2,"a",@progbits 483 ALIGN (2) 484 L(table_16_128bytes): 485 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 549 .popsection 550 551 552 ALIGN (4) 553 L(aligned_16_112bytes): 554 movdqa %xmm0, -112(%edx) 555 L(aligned_16_96bytes): 556 movdqa %xmm0, -96(%edx) 557 L(aligned_16_80bytes): 558 movdqa %xmm0, -80(%edx) 559 L(aligned_16_64bytes): 560 movdqa %xmm0, -64(%edx) 561 L(aligned_16_48bytes): 562 movdqa %xmm0, -48(%edx) 563 L(aligned_16_32bytes): 564 movdqa %xmm0, -32(%edx) 565 L(aligned_16_16bytes): 566 movdqa %xmm0, -16(%edx) 567 L(aligned_16_0bytes): 568 SETRTNVAL 569 RETURN 570 571 572 ALIGN (4) 573 L(aligned_16_114bytes): 574 movdqa %xmm0, -114(%edx) 575 L(aligned_16_98bytes): 576 movdqa %xmm0, -98(%edx) 577 L(aligned_16_82bytes): 578 movdqa %xmm0, -82(%edx) 579 L(aligned_16_66bytes): 580 movdqa %xmm0, -66(%edx) 581 L(aligned_16_50bytes): 582 movdqa %xmm0, -50(%edx) 583 L(aligned_16_34bytes): 584 movdqa %xmm0, -34(%edx) 585 L(aligned_16_18bytes): 586 movdqa %xmm0, -18(%edx) 587 L(aligned_16_2bytes): 588 movw %ax, -2(%edx) 589 SETRTNVAL 590 RETURN 591 592 ALIGN (4) 593 L(aligned_16_116bytes): 594 movdqa %xmm0, -116(%edx) 595 L(aligned_16_100bytes): 596 movdqa %xmm0, -100(%edx) 597 L(aligned_16_84bytes): 598 movdqa %xmm0, -84(%edx) 599 L(aligned_16_68bytes): 600 movdqa %xmm0, -68(%edx) 601 L(aligned_16_52bytes): 602 movdqa %xmm0, -52(%edx) 603 L(aligned_16_36bytes): 604 movdqa %xmm0, -36(%edx) 605 L(aligned_16_20bytes): 606 movdqa %xmm0, -20(%edx) 607 L(aligned_16_4bytes): 608 movl %eax, -4(%edx) 609 SETRTNVAL 610 RETURN 611 612 613 ALIGN (4) 614 L(aligned_16_118bytes): 615 movdqa %xmm0, -118(%edx) 616 L(aligned_16_102bytes): 617 movdqa %xmm0, -102(%edx) 618 L(aligned_16_86bytes): 619 movdqa %xmm0, -86(%edx) 620 L(aligned_16_70bytes): 621 movdqa %xmm0, -70(%edx) 622 L(aligned_16_54bytes): 623 movdqa %xmm0, -54(%edx) 624 L(aligned_16_38bytes): 625 movdqa %xmm0, -38(%edx) 626 L(aligned_16_22bytes): 627 movdqa %xmm0, -22(%edx) 628 L(aligned_16_6bytes): 629 movl %eax, -6(%edx) 630 movw %ax, -2(%edx) 631 SETRTNVAL 632 RETURN 633 634 635 ALIGN (4) 636 L(aligned_16_120bytes): 637 movdqa %xmm0, -120(%edx) 638 L(aligned_16_104bytes): 639 movdqa %xmm0, -104(%edx) 640 L(aligned_16_88bytes): 641 movdqa %xmm0, -88(%edx) 642 L(aligned_16_72bytes): 643 movdqa %xmm0, -72(%edx) 644 L(aligned_16_56bytes): 645 movdqa %xmm0, -56(%edx) 646 L(aligned_16_40bytes): 647 movdqa %xmm0, -40(%edx) 648 L(aligned_16_24bytes): 649 movdqa %xmm0, -24(%edx) 650 L(aligned_16_8bytes): 651 movq %xmm0, -8(%edx) 652 SETRTNVAL 653 RETURN 654 655 656 ALIGN (4) 657 L(aligned_16_122bytes): 658 movdqa %xmm0, -122(%edx) 659 L(aligned_16_106bytes): 660 movdqa %xmm0, -106(%edx) 661 L(aligned_16_90bytes): 662 movdqa %xmm0, -90(%edx) 663 L(aligned_16_74bytes): 664 movdqa %xmm0, -74(%edx) 665 L(aligned_16_58bytes): 666 movdqa %xmm0, -58(%edx) 667 L(aligned_16_42bytes): 668 movdqa %xmm0, -42(%edx) 669 L(aligned_16_26bytes): 670 movdqa %xmm0, -26(%edx) 671 L(aligned_16_10bytes): 672 movq %xmm0, -10(%edx) 673 movw %ax, -2(%edx) 674 SETRTNVAL 675 RETURN 676 677 678 ALIGN (4) 679 L(aligned_16_124bytes): 680 movdqa %xmm0, -124(%edx) 681 L(aligned_16_108bytes): 682 movdqa %xmm0, -108(%edx) 683 L(aligned_16_92bytes): 684 movdqa %xmm0, -92(%edx) 685 L(aligned_16_76bytes): 686 movdqa %xmm0, -76(%edx) 687 L(aligned_16_60bytes): 688 movdqa %xmm0, -60(%edx) 689 L(aligned_16_44bytes): 690 movdqa %xmm0, -44(%edx) 691 L(aligned_16_28bytes): 692 movdqa %xmm0, -28(%edx) 693 L(aligned_16_12bytes): 694 movq %xmm0, -12(%edx) 695 movl %eax, -4(%edx) 696 SETRTNVAL 697 RETURN 698 699 700 ALIGN (4) 701 L(aligned_16_126bytes): 702 movdqa %xmm0, -126(%edx) 703 L(aligned_16_110bytes): 704 movdqa %xmm0, -110(%edx) 705 L(aligned_16_94bytes): 706 movdqa %xmm0, -94(%edx) 707 L(aligned_16_78bytes): 708 movdqa %xmm0, -78(%edx) 709 L(aligned_16_62bytes): 710 movdqa %xmm0, -62(%edx) 711 L(aligned_16_46bytes): 712 movdqa %xmm0, -46(%edx) 713 L(aligned_16_30bytes): 714 movdqa %xmm0, -30(%edx) 715 L(aligned_16_14bytes): 716 movq %xmm0, -14(%edx) 717 movl %eax, -6(%edx) 718 movw %ax, -2(%edx) 719 SETRTNVAL 720 RETURN 721 722 END (sse2_memset16_atom) 723