Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11     EXPORT |vp8_short_walsh4x4_armv6|
     12 
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     AREA    |.text|, CODE, READONLY  ; name this block of code
     18 
     19 ;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch)
     20 |vp8_short_walsh4x4_armv6| PROC
     21 
     22     stmdb       sp!, {r4 - r11, lr}
     23 
     24     mov         r12, r2              ; ugh. not clean
     25     ldr         r2, [r0]             ; [1  |  0]
     26     ldr         r3, [r0, #4]         ; [3  |  2]
     27     ldr         r4, [r0, r12]!       ; [5  |  4]
     28     ldr         r5, [r0, #4]         ; [7  |  6]
     29     ldr         r6, [r0, r12]!       ; [9  |  8]
     30     ldr         r7, [r0, #4]         ; [11 | 10]
     31     ldr         r8, [r0, r12]!       ; [13 | 12]
     32     ldr         r9, [r0, #4]         ; [15 | 14]
     33 
     34     qsubaddx    r10, r2, r3          ; [c1|a1] [1-2   |   0+3]
     35     qaddsubx    r11, r2, r3          ; [b1|d1] [1+2   |   0-3]
     36     qsubaddx    r12, r4, r5          ; [c1|a1] [5-6   |   4+7]
     37     qaddsubx    lr, r4, r5           ; [b1|d1] [5+6   |   4-7]
     38 
     39     qaddsubx    r2, r10, r11         ; [1 | 2] [c1+d1 | a1-b1]
     40     qaddsubx    r3, r11, r10         ; [0 | 3] [b1+a1 | d1-c1]
     41     qaddsubx    r4, r12, lr          ; [5 | 6] [c1+d1 | a1-b1]
     42     qaddsubx    r5, lr, r12          ; [4 | 7] [b1+a1 | d1-c1]
     43 
     44     qsubaddx    r10, r6, r7          ; [c1|a1] [9-10  |  8+11]
     45     qaddsubx    r11, r6, r7          ; [b1|d1] [9+10  |  8-11]
     46     qsubaddx    r12, r8, r9          ; [c1|a1] [13-14 | 12+15]
     47     qaddsubx    lr, r8, r9           ; [b1|d1] [13+14 | 12-15]
     48 
     49     qaddsubx    r6, r10, r11         ; [9 |10] [c1+d1 | a1-b1]
     50     qaddsubx    r7, r11, r10         ; [8 |11] [b1+a1 | d1-c1]
     51     qaddsubx    r8, r12, lr          ; [13|14] [c1+d1 | a1-b1]
     52     qaddsubx    r9, lr, r12          ; [12|15] [b1+a1 | d1-c1]
     53 
     54     ; first transform complete
     55 
     56     qadd16      r10, r3, r9          ; a1 [0+12  |  3+15]
     57     qadd16      r11, r5, r7          ; b1 [4+8   |  7+11]
     58     qsub16      r12, r5, r7          ; c1 [4-8   |  7-11]
     59     qsub16      lr, r3, r9           ; d1 [0-12  |  3-15]
     60 
     61     qadd16      r3, r10, r11         ; a2 [a1+b1] [0 | 3]
     62     qadd16      r5, r12, lr          ; b2 [c1+d1] [4 | 7]
     63     qsub16      r7, r10, r11         ; c2 [a1-b1] [8 |11]
     64     qsub16      r9, lr, r12          ; d2 [d1-c1] [12|15]
     65 
     66     qadd16      r10, r2, r8          ; a1 [1+13  |  2+14]
     67     qadd16      r11, r4, r6          ; b1 [5+9   |  6+10]
     68     qsub16      r12, r4, r6          ; c1 [5-9   |  6-10]
     69     qsub16      lr, r2, r8           ; d1 [1-13  |  2-14]
     70 
     71     qadd16      r2, r10, r11         ; a2 [a1+b1] [1 | 2]
     72     qadd16      r4, r12, lr          ; b2 [c1+d1] [5 | 6]
     73     qsub16      r6, r10, r11         ; c2 [a1-b1] [9 |10]
     74     qsub16      r8, lr, r12          ; d2 [d1-c1] [13|14]
     75 
     76     ; [a-d]2 += ([a-d]2 > 0)
     77 
     78     asrs        r10, r3, #16
     79     addpl       r10, r10, #1         ; [~0]
     80     asrs        r11, r2, #16
     81     addpl       r11, r11, #1         ; [~1]
     82     lsl         r11, r11, #15        ; [1  |  x]
     83     pkhtb       r10, r11, r10, asr #1; [1  |  0]
     84     str         r10, [r1], #4
     85 
     86     lsls        r11, r2, #16
     87     addpl       r11, r11, #0x10000   ; [~2]
     88     lsls        r12, r3, #16
     89     addpl       r12, r12, #0x10000   ; [~3]
     90     asr         r12, r12, #1         ; [3  |  x]
     91     pkhtb       r11, r12, r11, asr #17; [3  |  2]
     92     str         r11, [r1], #4
     93 
     94     asrs        r2, r5, #16
     95     addpl       r2, r2, #1           ; [~4]
     96     asrs        r3, r4, #16
     97     addpl       r3, r3, #1           ; [~5]
     98     lsl         r3, r3, #15          ; [5  |  x]
     99     pkhtb       r2, r3, r2, asr #1   ; [5  |  4]
    100     str         r2, [r1], #4
    101 
    102     lsls        r2, r4, #16
    103     addpl       r2, r2, #0x10000     ; [~6]
    104     lsls        r3, r5, #16
    105     addpl       r3, r3, #0x10000     ; [~7]
    106     asr         r3, r3, #1           ; [7  |  x]
    107     pkhtb       r2, r3, r2, asr #17  ; [7  |  6]
    108     str         r2, [r1], #4
    109 
    110     asrs        r2, r7, #16
    111     addpl       r2, r2, #1           ; [~8]
    112     asrs        r3, r6, #16
    113     addpl       r3, r3, #1           ; [~9]
    114     lsl         r3, r3, #15          ; [9  |  x]
    115     pkhtb       r2, r3, r2, asr #1   ; [9  |  8]
    116     str         r2, [r1], #4
    117 
    118     lsls        r2, r6, #16
    119     addpl       r2, r2, #0x10000     ; [~10]
    120     lsls        r3, r7, #16
    121     addpl       r3, r3, #0x10000     ; [~11]
    122     asr         r3, r3, #1           ; [11 |  x]
    123     pkhtb       r2, r3, r2, asr #17  ; [11 | 10]
    124     str         r2, [r1], #4
    125 
    126     asrs        r2, r9, #16
    127     addpl       r2, r2, #1           ; [~12]
    128     asrs        r3, r8, #16
    129     addpl       r3, r3, #1           ; [~13]
    130     lsl         r3, r3, #15          ; [13 |  x]
    131     pkhtb       r2, r3, r2, asr #1   ; [13 | 12]
    132     str         r2, [r1], #4
    133 
    134     lsls        r2, r8, #16
    135     addpl       r2, r2, #0x10000     ; [~14]
    136     lsls        r3, r9, #16
    137     addpl       r3, r3, #0x10000     ; [~15]
    138     asr         r3, r3, #1           ; [15 |  x]
    139     pkhtb       r2, r3, r2, asr #17  ; [15 | 14]
    140     str         r2, [r1]
    141 
    142     ldmia       sp!, {r4 - r11, pc}
    143     ENDP        ; |vp8_short_walsh4x4_armv6|
    144 
    145     END
    146