1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 %include "vpx_ports/x86_abi_support.asm" 13 14 15 %define BLOCK_HEIGHT_WIDTH 4 16 %define vp8_filter_weight 128 17 %define VP8_FILTER_SHIFT 7 18 19 20 ;void vp8_filter_block1d_h6_mmx 21 ;( 22 ; unsigned char *src_ptr, 23 ; unsigned short *output_ptr, 24 ; unsigned int src_pixels_per_line, 25 ; unsigned int pixel_step, 26 ; unsigned int output_height, 27 ; unsigned int output_width, 28 ; short * vp8_filter 29 ;) 30 global sym(vp8_filter_block1d_h6_mmx) 31 sym(vp8_filter_block1d_h6_mmx): 32 push rbp 33 mov rbp, rsp 34 SHADOW_ARGS_TO_STACK 7 35 GET_GOT rbx 36 push rsi 37 push rdi 38 ; end prolog 39 40 mov rdx, arg(6) ;vp8_filter 41 42 movq mm1, [rdx + 16] ; do both the negative taps first!!! 43 movq mm2, [rdx + 32] ; 44 movq mm6, [rdx + 48] ; 45 movq mm7, [rdx + 64] ; 46 47 mov rdi, arg(1) ;output_ptr 48 mov rsi, arg(0) ;src_ptr 49 movsxd rcx, dword ptr arg(4) ;output_height 50 movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? 51 pxor mm0, mm0 ; mm0 = 00000000 52 53 nextrow: 54 movq mm3, [rsi-2] ; mm3 = p-2..p5 55 movq mm4, mm3 ; mm4 = p-2..p5 56 psrlq mm3, 8 ; mm3 = p-1..p5 57 punpcklbw mm3, mm0 ; mm3 = p-1..p2 58 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 59 60 movq mm5, mm4 ; mm5 = p-2..p5 61 punpckhbw mm4, mm0 ; mm5 = p2..p5 62 pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers 63 paddsw mm3, mm4 ; mm3 += mm5 64 65 movq mm4, mm5 ; mm4 = p-2..p5; 66 psrlq mm5, 16 ; mm5 = p0..p5; 67 punpcklbw mm5, mm0 ; mm5 = p0..p3 68 pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers 69 paddsw mm3, mm5 ; mm3 += mm5 70 71 movq mm5, mm4 ; mm5 = p-2..p5 72 psrlq mm4, 24 ; mm4 = p1..p5 73 punpcklbw mm4, mm0 ; mm4 = p1..p4 74 pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers 75 paddsw mm3, mm4 ; mm3 += mm5 76 77 ; do outer positive taps 78 movd mm4, [rsi+3] 79 punpcklbw mm4, mm0 ; mm5 = p3..p6 80 pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers 81 paddsw mm3, mm4 ; mm3 += mm5 82 83 punpcklbw mm5, mm0 ; mm5 = p-2..p1 84 pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers 85 paddsw mm3, mm5 ; mm3 += mm5 86 87 paddsw mm3, [GLOBAL(rd)] ; mm3 += round value 88 psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 89 packuswb mm3, mm0 ; pack and unpack to saturate 90 punpcklbw mm3, mm0 ; 91 92 movq [rdi], mm3 ; store the results in the destination 93 94 %if ABI_IS_32BIT 95 add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line 96 add rdi, rax; 97 %else 98 movsxd r8, dword ptr arg(2) ;src_pixels_per_line 99 add rdi, rax; 100 101 add rsi, r8 ; next line 102 %endif 103 104 dec rcx ; decrement count 105 jnz nextrow ; next row 106 107 ; begin epilog 108 pop rdi 109 pop rsi 110 RESTORE_GOT 111 UNSHADOW_ARGS 112 pop rbp 113 ret 114 115 116 ;void vp8_filter_block1dc_v6_mmx 117 ;( 118 ; short *src_ptr, 119 ; unsigned char *output_ptr, 120 ; int output_pitch, 121 ; unsigned int pixels_per_line, 122 ; unsigned int pixel_step, 123 ; unsigned int output_height, 124 ; unsigned int output_width, 125 ; short * vp8_filter 126 ;) 127 global sym(vp8_filter_block1dc_v6_mmx) 128 sym(vp8_filter_block1dc_v6_mmx): 129 push rbp 130 mov rbp, rsp 131 SHADOW_ARGS_TO_STACK 8 132 GET_GOT rbx 133 push rsi 134 push rdi 135 ; end prolog 136 137 movq mm5, [GLOBAL(rd)] 138 push rbx 139 mov rbx, arg(7) ;vp8_filter 140 movq mm1, [rbx + 16] ; do both the negative taps first!!! 141 movq mm2, [rbx + 32] ; 142 movq mm6, [rbx + 48] ; 143 movq mm7, [rbx + 64] ; 144 145 movsxd rdx, dword ptr arg(3) ;pixels_per_line 146 mov rdi, arg(1) ;output_ptr 147 mov rsi, arg(0) ;src_ptr 148 sub rsi, rdx 149 sub rsi, rdx 150 movsxd rcx, DWORD PTR arg(5) ;output_height 151 movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? 152 pxor mm0, mm0 ; mm0 = 00000000 153 154 155 nextrow_cv: 156 movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 157 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 158 159 160 movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 161 pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. 162 paddsw mm3, mm4 ; mm3 += mm4 163 164 movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 165 pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. 166 paddsw mm3, mm4 ; mm3 += mm4 167 168 movq mm4, [rsi] ; mm4 = p0..p3 = row -2 169 pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. 170 paddsw mm3, mm4 ; mm3 += mm4 171 172 173 add rsi, rdx ; move source forward 1 line to avoid 3 * pitch 174 movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 175 pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. 176 paddsw mm3, mm4 ; mm3 += mm4 177 178 movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 179 pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. 180 paddsw mm3, mm4 ; mm3 += mm4 181 182 183 paddsw mm3, mm5 ; mm3 += round value 184 psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 185 packuswb mm3, mm0 ; pack and saturate 186 187 movd [rdi],mm3 ; store the results in the destination 188 ; the subsequent iterations repeat 3 out of 4 of these reads. Since the 189 ; recon block should be in cache this shouldn't cost much. Its obviously 190 ; avoidable!!!. 191 lea rdi, [rdi+rax] ; 192 dec rcx ; decrement count 193 jnz nextrow_cv ; next row 194 195 pop rbx 196 197 ; begin epilog 198 pop rdi 199 pop rsi 200 RESTORE_GOT 201 UNSHADOW_ARGS 202 pop rbp 203 ret 204 205 206 ;void bilinear_predict8x8_mmx 207 ;( 208 ; unsigned char *src_ptr, 209 ; int src_pixels_per_line, 210 ; int xoffset, 211 ; int yoffset, 212 ; unsigned char *dst_ptr, 213 ; int dst_pitch 214 ;) 215 global sym(vp8_bilinear_predict8x8_mmx) 216 sym(vp8_bilinear_predict8x8_mmx): 217 push rbp 218 mov rbp, rsp 219 SHADOW_ARGS_TO_STACK 6 220 GET_GOT rbx 221 push rsi 222 push rdi 223 ; end prolog 224 225 ;const short *HFilter = bilinear_filters_mmx[xoffset]; 226 ;const short *VFilter = bilinear_filters_mmx[yoffset]; 227 228 movsxd rax, dword ptr arg(2) ;xoffset 229 mov rdi, arg(4) ;dst_ptr ; 230 231 shl rax, 5 ; offset * 32 232 lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] 233 234 add rax, rcx ; HFilter 235 mov rsi, arg(0) ;src_ptr ; 236 237 movsxd rdx, dword ptr arg(5) ;dst_pitch 238 movq mm1, [rax] ; 239 240 movq mm2, [rax+16] ; 241 movsxd rax, dword ptr arg(3) ;yoffset 242 243 pxor mm0, mm0 ; 244 245 shl rax, 5 ; offset*32 246 add rax, rcx ; VFilter 247 248 lea rcx, [rdi+rdx*8] ; 249 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 250 251 252 253 ; get the first horizontal line done ; 254 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 255 movq mm4, mm3 ; make a copy of current line 256 257 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 258 punpckhbw mm4, mm0 ; 259 260 pmullw mm3, mm1 ; 261 pmullw mm4, mm1 ; 262 263 movq mm5, [rsi+1] ; 264 movq mm6, mm5 ; 265 266 punpcklbw mm5, mm0 ; 267 punpckhbw mm6, mm0 ; 268 269 pmullw mm5, mm2 ; 270 pmullw mm6, mm2 ; 271 272 paddw mm3, mm5 ; 273 paddw mm4, mm6 ; 274 275 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 276 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 277 278 paddw mm4, [GLOBAL(rd)] ; 279 psraw mm4, VP8_FILTER_SHIFT ; 280 281 movq mm7, mm3 ; 282 packuswb mm7, mm4 ; 283 284 add rsi, rdx ; next line 285 next_row_8x8: 286 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 287 movq mm4, mm3 ; make a copy of current line 288 289 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 290 punpckhbw mm4, mm0 ; 291 292 pmullw mm3, mm1 ; 293 pmullw mm4, mm1 ; 294 295 movq mm5, [rsi+1] ; 296 movq mm6, mm5 ; 297 298 punpcklbw mm5, mm0 ; 299 punpckhbw mm6, mm0 ; 300 301 pmullw mm5, mm2 ; 302 pmullw mm6, mm2 ; 303 304 paddw mm3, mm5 ; 305 paddw mm4, mm6 ; 306 307 movq mm5, mm7 ; 308 movq mm6, mm7 ; 309 310 punpcklbw mm5, mm0 ; 311 punpckhbw mm6, mm0 312 313 pmullw mm5, [rax] ; 314 pmullw mm6, [rax] ; 315 316 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 317 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 318 319 paddw mm4, [GLOBAL(rd)] ; 320 psraw mm4, VP8_FILTER_SHIFT ; 321 322 movq mm7, mm3 ; 323 packuswb mm7, mm4 ; 324 325 326 pmullw mm3, [rax+16] ; 327 pmullw mm4, [rax+16] ; 328 329 paddw mm3, mm5 ; 330 paddw mm4, mm6 ; 331 332 333 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 334 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 335 336 paddw mm4, [GLOBAL(rd)] ; 337 psraw mm4, VP8_FILTER_SHIFT ; 338 339 packuswb mm3, mm4 340 341 movq [rdi], mm3 ; store the results in the destination 342 343 %if ABI_IS_32BIT 344 add rsi, rdx ; next line 345 add rdi, dword ptr arg(5) ;dst_pitch ; 346 %else 347 movsxd r8, dword ptr arg(5) ;dst_pitch 348 add rsi, rdx ; next line 349 add rdi, r8 ;dst_pitch 350 %endif 351 cmp rdi, rcx ; 352 jne next_row_8x8 353 354 ; begin epilog 355 pop rdi 356 pop rsi 357 RESTORE_GOT 358 UNSHADOW_ARGS 359 pop rbp 360 ret 361 362 363 ;void bilinear_predict8x4_mmx 364 ;( 365 ; unsigned char *src_ptr, 366 ; int src_pixels_per_line, 367 ; int xoffset, 368 ; int yoffset, 369 ; unsigned char *dst_ptr, 370 ; int dst_pitch 371 ;) 372 global sym(vp8_bilinear_predict8x4_mmx) 373 sym(vp8_bilinear_predict8x4_mmx): 374 push rbp 375 mov rbp, rsp 376 SHADOW_ARGS_TO_STACK 6 377 GET_GOT rbx 378 push rsi 379 push rdi 380 ; end prolog 381 382 ;const short *HFilter = bilinear_filters_mmx[xoffset]; 383 ;const short *VFilter = bilinear_filters_mmx[yoffset]; 384 385 movsxd rax, dword ptr arg(2) ;xoffset 386 mov rdi, arg(4) ;dst_ptr ; 387 388 lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] 389 shl rax, 5 390 391 mov rsi, arg(0) ;src_ptr ; 392 add rax, rcx 393 394 movsxd rdx, dword ptr arg(5) ;dst_pitch 395 movq mm1, [rax] ; 396 397 movq mm2, [rax+16] ; 398 movsxd rax, dword ptr arg(3) ;yoffset 399 400 pxor mm0, mm0 ; 401 shl rax, 5 402 403 add rax, rcx 404 lea rcx, [rdi+rdx*4] ; 405 406 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 407 408 ; get the first horizontal line done ; 409 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 410 movq mm4, mm3 ; make a copy of current line 411 412 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 413 punpckhbw mm4, mm0 ; 414 415 pmullw mm3, mm1 ; 416 pmullw mm4, mm1 ; 417 418 movq mm5, [rsi+1] ; 419 movq mm6, mm5 ; 420 421 punpcklbw mm5, mm0 ; 422 punpckhbw mm6, mm0 ; 423 424 pmullw mm5, mm2 ; 425 pmullw mm6, mm2 ; 426 427 paddw mm3, mm5 ; 428 paddw mm4, mm6 ; 429 430 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 431 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 432 433 paddw mm4, [GLOBAL(rd)] ; 434 psraw mm4, VP8_FILTER_SHIFT ; 435 436 movq mm7, mm3 ; 437 packuswb mm7, mm4 ; 438 439 add rsi, rdx ; next line 440 next_row_8x4: 441 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 442 movq mm4, mm3 ; make a copy of current line 443 444 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 445 punpckhbw mm4, mm0 ; 446 447 pmullw mm3, mm1 ; 448 pmullw mm4, mm1 ; 449 450 movq mm5, [rsi+1] ; 451 movq mm6, mm5 ; 452 453 punpcklbw mm5, mm0 ; 454 punpckhbw mm6, mm0 ; 455 456 pmullw mm5, mm2 ; 457 pmullw mm6, mm2 ; 458 459 paddw mm3, mm5 ; 460 paddw mm4, mm6 ; 461 462 movq mm5, mm7 ; 463 movq mm6, mm7 ; 464 465 punpcklbw mm5, mm0 ; 466 punpckhbw mm6, mm0 467 468 pmullw mm5, [rax] ; 469 pmullw mm6, [rax] ; 470 471 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 472 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 473 474 paddw mm4, [GLOBAL(rd)] ; 475 psraw mm4, VP8_FILTER_SHIFT ; 476 477 movq mm7, mm3 ; 478 packuswb mm7, mm4 ; 479 480 481 pmullw mm3, [rax+16] ; 482 pmullw mm4, [rax+16] ; 483 484 paddw mm3, mm5 ; 485 paddw mm4, mm6 ; 486 487 488 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 489 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 490 491 paddw mm4, [GLOBAL(rd)] ; 492 psraw mm4, VP8_FILTER_SHIFT ; 493 494 packuswb mm3, mm4 495 496 movq [rdi], mm3 ; store the results in the destination 497 498 %if ABI_IS_32BIT 499 add rsi, rdx ; next line 500 add rdi, dword ptr arg(5) ;dst_pitch ; 501 %else 502 movsxd r8, dword ptr arg(5) ;dst_pitch 503 add rsi, rdx ; next line 504 add rdi, r8 505 %endif 506 cmp rdi, rcx ; 507 jne next_row_8x4 508 509 ; begin epilog 510 pop rdi 511 pop rsi 512 RESTORE_GOT 513 UNSHADOW_ARGS 514 pop rbp 515 ret 516 517 518 ;void bilinear_predict4x4_mmx 519 ;( 520 ; unsigned char *src_ptr, 521 ; int src_pixels_per_line, 522 ; int xoffset, 523 ; int yoffset, 524 ; unsigned char *dst_ptr, 525 ; int dst_pitch 526 ;) 527 global sym(vp8_bilinear_predict4x4_mmx) 528 sym(vp8_bilinear_predict4x4_mmx): 529 push rbp 530 mov rbp, rsp 531 SHADOW_ARGS_TO_STACK 6 532 GET_GOT rbx 533 push rsi 534 push rdi 535 ; end prolog 536 537 ;const short *HFilter = bilinear_filters_mmx[xoffset]; 538 ;const short *VFilter = bilinear_filters_mmx[yoffset]; 539 540 movsxd rax, dword ptr arg(2) ;xoffset 541 mov rdi, arg(4) ;dst_ptr ; 542 543 lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] 544 shl rax, 5 545 546 add rax, rcx ; HFilter 547 mov rsi, arg(0) ;src_ptr ; 548 549 movsxd rdx, dword ptr arg(5) ;ldst_pitch 550 movq mm1, [rax] ; 551 552 movq mm2, [rax+16] ; 553 movsxd rax, dword ptr arg(3) ;yoffset 554 555 pxor mm0, mm0 ; 556 shl rax, 5 557 558 add rax, rcx 559 lea rcx, [rdi+rdx*4] ; 560 561 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 562 563 ; get the first horizontal line done ; 564 movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 565 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 566 567 pmullw mm3, mm1 ; 568 movd mm5, [rsi+1] ; 569 570 punpcklbw mm5, mm0 ; 571 pmullw mm5, mm2 ; 572 573 paddw mm3, mm5 ; 574 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 575 576 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 577 578 movq mm7, mm3 ; 579 packuswb mm7, mm0 ; 580 581 add rsi, rdx ; next line 582 next_row_4x4: 583 movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 584 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 585 586 pmullw mm3, mm1 ; 587 movd mm5, [rsi+1] ; 588 589 punpcklbw mm5, mm0 ; 590 pmullw mm5, mm2 ; 591 592 paddw mm3, mm5 ; 593 594 movq mm5, mm7 ; 595 punpcklbw mm5, mm0 ; 596 597 pmullw mm5, [rax] ; 598 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 599 600 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 601 movq mm7, mm3 ; 602 603 packuswb mm7, mm0 ; 604 605 pmullw mm3, [rax+16] ; 606 paddw mm3, mm5 ; 607 608 609 paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 610 psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 611 612 packuswb mm3, mm0 613 movd [rdi], mm3 ; store the results in the destination 614 615 %if ABI_IS_32BIT 616 add rsi, rdx ; next line 617 add rdi, dword ptr arg(5) ;dst_pitch ; 618 %else 619 movsxd r8, dword ptr arg(5) ;dst_pitch ; 620 add rsi, rdx ; next line 621 add rdi, r8 622 %endif 623 624 cmp rdi, rcx ; 625 jne next_row_4x4 626 627 ; begin epilog 628 pop rdi 629 pop rsi 630 RESTORE_GOT 631 UNSHADOW_ARGS 632 pop rbp 633 ret 634 635 636 637 SECTION_RODATA 638 align 16 639 rd: 640 times 4 dw 0x40 641 642 align 16 643 global HIDDEN_DATA(sym(vp8_six_tap_mmx)) 644 sym(vp8_six_tap_mmx): 645 times 8 dw 0 646 times 8 dw 0 647 times 8 dw 128 648 times 8 dw 0 649 times 8 dw 0 650 times 8 dw 0 651 652 times 8 dw 0 653 times 8 dw -6 654 times 8 dw 123 655 times 8 dw 12 656 times 8 dw -1 657 times 8 dw 0 658 659 times 8 dw 2 660 times 8 dw -11 661 times 8 dw 108 662 times 8 dw 36 663 times 8 dw -8 664 times 8 dw 1 665 666 times 8 dw 0 667 times 8 dw -9 668 times 8 dw 93 669 times 8 dw 50 670 times 8 dw -6 671 times 8 dw 0 672 673 times 8 dw 3 674 times 8 dw -16 675 times 8 dw 77 676 times 8 dw 77 677 times 8 dw -16 678 times 8 dw 3 679 680 times 8 dw 0 681 times 8 dw -6 682 times 8 dw 50 683 times 8 dw 93 684 times 8 dw -9 685 times 8 dw 0 686 687 times 8 dw 1 688 times 8 dw -8 689 times 8 dw 36 690 times 8 dw 108 691 times 8 dw -11 692 times 8 dw 2 693 694 times 8 dw 0 695 times 8 dw -1 696 times 8 dw 12 697 times 8 dw 123 698 times 8 dw -6 699 times 8 dw 0 700 701 702 align 16 703 global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx)) 704 sym(vp8_bilinear_filters_mmx): 705 times 8 dw 128 706 times 8 dw 0 707 708 times 8 dw 112 709 times 8 dw 16 710 711 times 8 dw 96 712 times 8 dw 32 713 714 times 8 dw 80 715 times 8 dw 48 716 717 times 8 dw 64 718 times 8 dw 64 719 720 times 8 dw 48 721 times 8 dw 80 722 723 times 8 dw 32 724 times 8 dw 96 725 726 times 8 dw 16 727 times 8 dw 112 728