1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Contributed by: Intel Corporation 18 */ 19 20 #include "cache.h" 21 22 #ifndef L 23 # define L(label) .L##label 24 #endif 25 26 #ifndef ALIGN 27 # define ALIGN(n) .p2align n 28 #endif 29 30 #ifndef cfi_startproc 31 # define cfi_startproc .cfi_startproc 32 #endif 33 34 #ifndef cfi_endproc 35 # define cfi_endproc .cfi_endproc 36 #endif 37 38 #ifndef ENTRY 39 # define ENTRY(name) \ 40 .type name, @function; \ 41 .globl name; \ 42 .p2align 4; \ 43 name: \ 44 cfi_startproc 45 #endif 46 47 #ifndef END 48 # define END(name) \ 49 cfi_endproc; \ 50 .size name, .-name 51 #endif 52 53 #define JMPTBL(I, B) I - B 54 55 /* Branch to an entry in a jump table. TABLE is a jump table with 56 relative offsets. INDEX is a register contains the index into the 57 jump table. SCALE is the scale of INDEX. */ 58 #define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 59 lea TABLE(%rip), %r11; \ 60 movslq (%r11, INDEX, SCALE), INDEX; \ 61 lea (%r11, INDEX), INDEX; \ 62 jmp *INDEX 63 64 .section .text.sse2,"ax",@progbits 65 ALIGN (4) 66 ENTRY (android_memset16) // Address in rdi 67 shr $1, %rdx // Count in rdx 68 movzwl %si, %ecx 69 /* Fill the whole ECX with pattern. */ 70 shl $16, %esi 71 or %esi, %ecx // Pattern in ecx 72 73 cmp $32, %rdx 74 jae L(32wordsormore) 75 76 L(write_less32words): 77 lea (%rdi, %rdx, 2), %rdi 78 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words), %rdx, 4) 79 80 .pushsection .rodata.sse2,"a",@progbits 81 ALIGN (2) 82 L(table_less32words): 83 .int JMPTBL (L(write_0words), L(table_less32words)) 84 .int JMPTBL (L(write_1words), L(table_less32words)) 85 .int JMPTBL (L(write_2words), L(table_less32words)) 86 .int JMPTBL (L(write_3words), L(table_less32words)) 87 .int JMPTBL (L(write_4words), L(table_less32words)) 88 .int JMPTBL (L(write_5words), L(table_less32words)) 89 .int JMPTBL (L(write_6words), L(table_less32words)) 90 .int JMPTBL (L(write_7words), L(table_less32words)) 91 .int JMPTBL (L(write_8words), L(table_less32words)) 92 .int JMPTBL (L(write_9words), L(table_less32words)) 93 .int JMPTBL (L(write_10words), L(table_less32words)) 94 .int JMPTBL (L(write_11words), L(table_less32words)) 95 .int JMPTBL (L(write_12words), L(table_less32words)) 96 .int JMPTBL (L(write_13words), L(table_less32words)) 97 .int JMPTBL (L(write_14words), L(table_less32words)) 98 .int JMPTBL (L(write_15words), L(table_less32words)) 99 .int JMPTBL (L(write_16words), L(table_less32words)) 100 .int JMPTBL (L(write_17words), L(table_less32words)) 101 .int JMPTBL (L(write_18words), L(table_less32words)) 102 .int JMPTBL (L(write_19words), L(table_less32words)) 103 .int JMPTBL (L(write_20words), L(table_less32words)) 104 .int JMPTBL (L(write_21words), L(table_less32words)) 105 .int JMPTBL (L(write_22words), L(table_less32words)) 106 .int JMPTBL (L(write_23words), L(table_less32words)) 107 .int JMPTBL (L(write_24words), L(table_less32words)) 108 .int JMPTBL (L(write_25words), L(table_less32words)) 109 .int JMPTBL (L(write_26words), L(table_less32words)) 110 .int JMPTBL (L(write_27words), L(table_less32words)) 111 .int JMPTBL (L(write_28words), L(table_less32words)) 112 .int JMPTBL (L(write_29words), L(table_less32words)) 113 .int JMPTBL (L(write_30words), L(table_less32words)) 114 .int JMPTBL (L(write_31words), L(table_less32words)) 115 .popsection 116 117 ALIGN (4) 118 L(write_28words): 119 movl %ecx, -56(%rdi) 120 movl %ecx, -52(%rdi) 121 L(write_24words): 122 movl %ecx, -48(%rdi) 123 movl %ecx, -44(%rdi) 124 L(write_20words): 125 movl %ecx, -40(%rdi) 126 movl %ecx, -36(%rdi) 127 L(write_16words): 128 movl %ecx, -32(%rdi) 129 movl %ecx, -28(%rdi) 130 L(write_12words): 131 movl %ecx, -24(%rdi) 132 movl %ecx, -20(%rdi) 133 L(write_8words): 134 movl %ecx, -16(%rdi) 135 movl %ecx, -12(%rdi) 136 L(write_4words): 137 movl %ecx, -8(%rdi) 138 movl %ecx, -4(%rdi) 139 L(write_0words): 140 ret 141 142 ALIGN (4) 143 L(write_29words): 144 movl %ecx, -58(%rdi) 145 movl %ecx, -54(%rdi) 146 L(write_25words): 147 movl %ecx, -50(%rdi) 148 movl %ecx, -46(%rdi) 149 L(write_21words): 150 movl %ecx, -42(%rdi) 151 movl %ecx, -38(%rdi) 152 L(write_17words): 153 movl %ecx, -34(%rdi) 154 movl %ecx, -30(%rdi) 155 L(write_13words): 156 movl %ecx, -26(%rdi) 157 movl %ecx, -22(%rdi) 158 L(write_9words): 159 movl %ecx, -18(%rdi) 160 movl %ecx, -14(%rdi) 161 L(write_5words): 162 movl %ecx, -10(%rdi) 163 movl %ecx, -6(%rdi) 164 L(write_1words): 165 mov %cx, -2(%rdi) 166 ret 167 168 ALIGN (4) 169 L(write_30words): 170 movl %ecx, -60(%rdi) 171 movl %ecx, -56(%rdi) 172 L(write_26words): 173 movl %ecx, -52(%rdi) 174 movl %ecx, -48(%rdi) 175 L(write_22words): 176 movl %ecx, -44(%rdi) 177 movl %ecx, -40(%rdi) 178 L(write_18words): 179 movl %ecx, -36(%rdi) 180 movl %ecx, -32(%rdi) 181 L(write_14words): 182 movl %ecx, -28(%rdi) 183 movl %ecx, -24(%rdi) 184 L(write_10words): 185 movl %ecx, -20(%rdi) 186 movl %ecx, -16(%rdi) 187 L(write_6words): 188 movl %ecx, -12(%rdi) 189 movl %ecx, -8(%rdi) 190 L(write_2words): 191 movl %ecx, -4(%rdi) 192 ret 193 194 ALIGN (4) 195 L(write_31words): 196 movl %ecx, -62(%rdi) 197 movl %ecx, -58(%rdi) 198 L(write_27words): 199 movl %ecx, -54(%rdi) 200 movl %ecx, -50(%rdi) 201 L(write_23words): 202 movl %ecx, -46(%rdi) 203 movl %ecx, -42(%rdi) 204 L(write_19words): 205 movl %ecx, -38(%rdi) 206 movl %ecx, -34(%rdi) 207 L(write_15words): 208 movl %ecx, -30(%rdi) 209 movl %ecx, -26(%rdi) 210 L(write_11words): 211 movl %ecx, -22(%rdi) 212 movl %ecx, -18(%rdi) 213 L(write_7words): 214 movl %ecx, -14(%rdi) 215 movl %ecx, -10(%rdi) 216 L(write_3words): 217 movl %ecx, -6(%rdi) 218 movw %cx, -2(%rdi) 219 ret 220 221 ALIGN (4) 222 L(32wordsormore): 223 shl $1, %rdx 224 test $0x01, %edi 225 jz L(aligned2bytes) 226 mov %ecx, (%rdi) 227 mov %ecx, -4(%rdi, %rdx) 228 sub $2, %rdx 229 add $1, %rdi 230 rol $8, %ecx 231 L(aligned2bytes): 232 /* Fill xmm0 with the pattern. */ 233 movd %ecx, %xmm0 234 pshufd $0, %xmm0, %xmm0 235 236 testl $0xf, %edi 237 jz L(aligned_16) 238 /* RDX > 32 and RDI is not 16 byte aligned. */ 239 movdqu %xmm0, (%rdi) 240 mov %rdi, %rsi 241 and $-16, %rdi 242 add $16, %rdi 243 sub %rdi, %rsi 244 add %rsi, %rdx 245 246 ALIGN (4) 247 L(aligned_16): 248 cmp $128, %rdx 249 jge L(128bytesormore) 250 251 L(aligned_16_less128bytes): 252 add %rdx, %rdi 253 shr $1, %rdx 254 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 255 256 ALIGN (4) 257 L(128bytesormore): 258 cmp $SHARED_CACHE_SIZE, %rdx 259 jg L(128bytesormore_nt) 260 261 L(128bytesormore_normal): 262 sub $128, %rdx 263 movdqa %xmm0, (%rdi) 264 movdqa %xmm0, 0x10(%rdi) 265 movdqa %xmm0, 0x20(%rdi) 266 movdqa %xmm0, 0x30(%rdi) 267 movdqa %xmm0, 0x40(%rdi) 268 movdqa %xmm0, 0x50(%rdi) 269 movdqa %xmm0, 0x60(%rdi) 270 movdqa %xmm0, 0x70(%rdi) 271 lea 128(%rdi), %rdi 272 cmp $128, %rdx 273 jl L(128bytesless_normal) 274 275 sub $128, %rdx 276 movdqa %xmm0, (%rdi) 277 movdqa %xmm0, 0x10(%rdi) 278 movdqa %xmm0, 0x20(%rdi) 279 movdqa %xmm0, 0x30(%rdi) 280 movdqa %xmm0, 0x40(%rdi) 281 movdqa %xmm0, 0x50(%rdi) 282 movdqa %xmm0, 0x60(%rdi) 283 movdqa %xmm0, 0x70(%rdi) 284 lea 128(%rdi), %rdi 285 cmp $128, %rdx 286 jl L(128bytesless_normal) 287 288 sub $128, %rdx 289 movdqa %xmm0, (%rdi) 290 movdqa %xmm0, 0x10(%rdi) 291 movdqa %xmm0, 0x20(%rdi) 292 movdqa %xmm0, 0x30(%rdi) 293 movdqa %xmm0, 0x40(%rdi) 294 movdqa %xmm0, 0x50(%rdi) 295 movdqa %xmm0, 0x60(%rdi) 296 movdqa %xmm0, 0x70(%rdi) 297 lea 128(%rdi), %rdi 298 cmp $128, %rdx 299 jl L(128bytesless_normal) 300 301 sub $128, %rdx 302 movdqa %xmm0, (%rdi) 303 movdqa %xmm0, 0x10(%rdi) 304 movdqa %xmm0, 0x20(%rdi) 305 movdqa %xmm0, 0x30(%rdi) 306 movdqa %xmm0, 0x40(%rdi) 307 movdqa %xmm0, 0x50(%rdi) 308 movdqa %xmm0, 0x60(%rdi) 309 movdqa %xmm0, 0x70(%rdi) 310 lea 128(%rdi), %rdi 311 cmp $128, %rdx 312 jge L(128bytesormore_normal) 313 314 L(128bytesless_normal): 315 add %rdx, %rdi 316 shr $1, %rdx 317 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 318 319 ALIGN (4) 320 L(128bytesormore_nt): 321 sub $128, %rdx 322 movntdq %xmm0, (%rdi) 323 movntdq %xmm0, 0x10(%rdi) 324 movntdq %xmm0, 0x20(%rdi) 325 movntdq %xmm0, 0x30(%rdi) 326 movntdq %xmm0, 0x40(%rdi) 327 movntdq %xmm0, 0x50(%rdi) 328 movntdq %xmm0, 0x60(%rdi) 329 movntdq %xmm0, 0x70(%rdi) 330 lea 128(%rdi), %rdi 331 cmp $128, %rdx 332 jge L(128bytesormore_nt) 333 334 sfence 335 add %rdx, %rdi 336 shr $1, %rdx 337 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 338 339 .pushsection .rodata.sse2,"a",@progbits 340 ALIGN (2) 341 L(table_16_128bytes): 342 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 343 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 344 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 345 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 346 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 347 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 348 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 349 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 350 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 351 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 352 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 353 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 354 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 355 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 356 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 357 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 358 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 359 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 360 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 361 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 362 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 363 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 364 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 365 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 366 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 367 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 368 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 369 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 370 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 371 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 372 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 373 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 374 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 375 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 376 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 377 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 378 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 379 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 380 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 381 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 382 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 383 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 384 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 385 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 386 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 387 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 388 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 389 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 390 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 391 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 406 .popsection 407 408 ALIGN (4) 409 L(aligned_16_112bytes): 410 movdqa %xmm0, -112(%rdi) 411 L(aligned_16_96bytes): 412 movdqa %xmm0, -96(%rdi) 413 L(aligned_16_80bytes): 414 movdqa %xmm0, -80(%rdi) 415 L(aligned_16_64bytes): 416 movdqa %xmm0, -64(%rdi) 417 L(aligned_16_48bytes): 418 movdqa %xmm0, -48(%rdi) 419 L(aligned_16_32bytes): 420 movdqa %xmm0, -32(%rdi) 421 L(aligned_16_16bytes): 422 movdqa %xmm0, -16(%rdi) 423 L(aligned_16_0bytes): 424 ret 425 426 ALIGN (4) 427 L(aligned_16_114bytes): 428 movdqa %xmm0, -114(%rdi) 429 L(aligned_16_98bytes): 430 movdqa %xmm0, -98(%rdi) 431 L(aligned_16_82bytes): 432 movdqa %xmm0, -82(%rdi) 433 L(aligned_16_66bytes): 434 movdqa %xmm0, -66(%rdi) 435 L(aligned_16_50bytes): 436 movdqa %xmm0, -50(%rdi) 437 L(aligned_16_34bytes): 438 movdqa %xmm0, -34(%rdi) 439 L(aligned_16_18bytes): 440 movdqa %xmm0, -18(%rdi) 441 L(aligned_16_2bytes): 442 movw %cx, -2(%rdi) 443 ret 444 445 ALIGN (4) 446 L(aligned_16_116bytes): 447 movdqa %xmm0, -116(%rdi) 448 L(aligned_16_100bytes): 449 movdqa %xmm0, -100(%rdi) 450 L(aligned_16_84bytes): 451 movdqa %xmm0, -84(%rdi) 452 L(aligned_16_68bytes): 453 movdqa %xmm0, -68(%rdi) 454 L(aligned_16_52bytes): 455 movdqa %xmm0, -52(%rdi) 456 L(aligned_16_36bytes): 457 movdqa %xmm0, -36(%rdi) 458 L(aligned_16_20bytes): 459 movdqa %xmm0, -20(%rdi) 460 L(aligned_16_4bytes): 461 movl %ecx, -4(%rdi) 462 ret 463 464 ALIGN (4) 465 L(aligned_16_118bytes): 466 movdqa %xmm0, -118(%rdi) 467 L(aligned_16_102bytes): 468 movdqa %xmm0, -102(%rdi) 469 L(aligned_16_86bytes): 470 movdqa %xmm0, -86(%rdi) 471 L(aligned_16_70bytes): 472 movdqa %xmm0, -70(%rdi) 473 L(aligned_16_54bytes): 474 movdqa %xmm0, -54(%rdi) 475 L(aligned_16_38bytes): 476 movdqa %xmm0, -38(%rdi) 477 L(aligned_16_22bytes): 478 movdqa %xmm0, -22(%rdi) 479 L(aligned_16_6bytes): 480 movl %ecx, -6(%rdi) 481 movw %cx, -2(%rdi) 482 ret 483 484 ALIGN (4) 485 L(aligned_16_120bytes): 486 movdqa %xmm0, -120(%rdi) 487 L(aligned_16_104bytes): 488 movdqa %xmm0, -104(%rdi) 489 L(aligned_16_88bytes): 490 movdqa %xmm0, -88(%rdi) 491 L(aligned_16_72bytes): 492 movdqa %xmm0, -72(%rdi) 493 L(aligned_16_56bytes): 494 movdqa %xmm0, -56(%rdi) 495 L(aligned_16_40bytes): 496 movdqa %xmm0, -40(%rdi) 497 L(aligned_16_24bytes): 498 movdqa %xmm0, -24(%rdi) 499 L(aligned_16_8bytes): 500 movq %xmm0, -8(%rdi) 501 ret 502 503 ALIGN (4) 504 L(aligned_16_122bytes): 505 movdqa %xmm0, -122(%rdi) 506 L(aligned_16_106bytes): 507 movdqa %xmm0, -106(%rdi) 508 L(aligned_16_90bytes): 509 movdqa %xmm0, -90(%rdi) 510 L(aligned_16_74bytes): 511 movdqa %xmm0, -74(%rdi) 512 L(aligned_16_58bytes): 513 movdqa %xmm0, -58(%rdi) 514 L(aligned_16_42bytes): 515 movdqa %xmm0, -42(%rdi) 516 L(aligned_16_26bytes): 517 movdqa %xmm0, -26(%rdi) 518 L(aligned_16_10bytes): 519 movq %xmm0, -10(%rdi) 520 movw %cx, -2(%rdi) 521 ret 522 523 ALIGN (4) 524 L(aligned_16_124bytes): 525 movdqa %xmm0, -124(%rdi) 526 L(aligned_16_108bytes): 527 movdqa %xmm0, -108(%rdi) 528 L(aligned_16_92bytes): 529 movdqa %xmm0, -92(%rdi) 530 L(aligned_16_76bytes): 531 movdqa %xmm0, -76(%rdi) 532 L(aligned_16_60bytes): 533 movdqa %xmm0, -60(%rdi) 534 L(aligned_16_44bytes): 535 movdqa %xmm0, -44(%rdi) 536 L(aligned_16_28bytes): 537 movdqa %xmm0, -28(%rdi) 538 L(aligned_16_12bytes): 539 movq %xmm0, -12(%rdi) 540 movl %ecx, -4(%rdi) 541 ret 542 543 ALIGN (4) 544 L(aligned_16_126bytes): 545 movdqa %xmm0, -126(%rdi) 546 L(aligned_16_110bytes): 547 movdqa %xmm0, -110(%rdi) 548 L(aligned_16_94bytes): 549 movdqa %xmm0, -94(%rdi) 550 L(aligned_16_78bytes): 551 movdqa %xmm0, -78(%rdi) 552 L(aligned_16_62bytes): 553 movdqa %xmm0, -62(%rdi) 554 L(aligned_16_46bytes): 555 movdqa %xmm0, -46(%rdi) 556 L(aligned_16_30bytes): 557 movdqa %xmm0, -30(%rdi) 558 L(aligned_16_14bytes): 559 movq %xmm0, -14(%rdi) 560 movl %ecx, -6(%rdi) 561 movw %cx, -2(%rdi) 562 ret 563 564 END (android_memset16) 565