Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11     EXPORT |vp8_short_inv_walsh4x4_v6|
     12     EXPORT |vp8_short_inv_walsh4x4_1_v6|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA    |.text|, CODE, READONLY  ; name this block of code
     19 
     20 ;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
     21 |vp8_short_inv_walsh4x4_v6| PROC
     22 
     23     stmdb       sp!, {r4 - r11, lr}
     24 
     25     ldr         r2, [r0], #4         ; [1  |  0]
     26     ldr         r3, [r0], #4         ; [3  |  2]
     27     ldr         r4, [r0], #4         ; [5  |  4]
     28     ldr         r5, [r0], #4         ; [7  |  6]
     29     ldr         r6, [r0], #4         ; [9  |  8]
     30     ldr         r7, [r0], #4         ; [11 | 10]
     31     ldr         r8, [r0], #4         ; [13 | 12]
     32     ldr         r9, [r0]             ; [15 | 14]
     33 
     34     qadd16      r10, r2, r8          ; a1 [1+13  |  0+12]
     35     qadd16      r11, r4, r6          ; b1 [5+9   |  4+8]
     36     qsub16      r12, r4, r6          ; c1 [5-9   |  4-8]
     37     qsub16      lr, r2, r8           ; d1 [1-13  |  0-12]
     38 
     39     qadd16      r2, r10, r11         ; a1 + b1 [1  |  0]
     40     qadd16      r4, r12, lr          ; c1 + d1 [5  |  4]
     41     qsub16      r6, r10, r11         ; a1 - b1 [9  |  8]
     42     qsub16      r8, lr, r12          ; d1 - c1 [13 | 12]
     43 
     44     qadd16      r10, r3, r9          ; a1 [3+15  |  2+14]
     45     qadd16      r11, r5, r7          ; b1 [7+11  |  6+10]
     46     qsub16      r12, r5, r7          ; c1 [7-11  |  6-10]
     47     qsub16      lr, r3, r9           ; d1 [3-15  |  2-14]
     48 
     49     qadd16      r3, r10, r11         ; a1 + b1 [3  |  2]
     50     qadd16      r5, r12, lr          ; c1 + d1 [7  |  6]
     51     qsub16      r7, r10, r11         ; a1 - b1 [11 | 10]
     52     qsub16      r9, lr, r12          ; d1 - c1 [15 | 14]
     53 
     54     ; first transform complete
     55 
     56     qsubaddx    r10, r2, r3          ; [c1|a1] [1-2   |   0+3]
     57     qaddsubx    r11, r2, r3          ; [b1|d1] [1+2   |   0-3]
     58     qsubaddx    r12, r4, r5          ; [c1|a1] [5-6   |   4+7]
     59     qaddsubx    lr, r4, r5           ; [b1|d1] [5+6   |   4-7]
     60 
     61     qaddsubx    r2, r10, r11         ; [b2|c2] [c1+d1 | a1-b1]
     62     qaddsubx    r3, r11, r10         ; [a2|d2] [b1+a1 | d1-c1]
     63     ldr         r10, c0x00030003
     64     qaddsubx    r4, r12, lr          ; [b2|c2] [c1+d1 | a1-b1]
     65     qaddsubx    r5, lr, r12          ; [a2|d2] [b1+a1 | d1-c1]
     66 
     67     qadd16      r2, r2, r10          ; [b2+3|c2+3]
     68     qadd16      r3, r3, r10          ; [a2+3|d2+3]
     69     qadd16      r4, r4, r10          ; [b2+3|c2+3]
     70     qadd16      r5, r5, r10          ; [a2+3|d2+3]
     71 
     72     asr         r12, r2, #3          ; [1  |  x]
     73     pkhtb       r12, r12, r3, asr #19; [1  |  0]
     74     lsl         lr, r3, #16          ; [~3 |  x]
     75     lsl         r2, r2, #16          ; [~2 |  x]
     76     asr         lr, lr, #3           ; [3  |  x]
     77     pkhtb       lr, lr, r2, asr #19  ; [3  |  2]
     78 
     79     asr         r2, r4, #3           ; [5  |  x]
     80     pkhtb       r2, r2, r5, asr #19  ; [5  |  4]
     81     lsl         r3, r5, #16          ; [~7 |  x]
     82     lsl         r4, r4, #16          ; [~6 |  x]
     83     asr         r3, r3, #3           ; [7  |  x]
     84     pkhtb       r3, r3, r4, asr #19  ; [7  |  6]
     85 
     86     str         r12, [r1], #4
     87     str         lr, [r1], #4
     88     str         r2, [r1], #4
     89     str         r3, [r1], #4
     90 
     91     qsubaddx    r2, r6, r7           ; [c1|a1] [9-10  |  8+11]
     92     qaddsubx    r3, r6, r7           ; [b1|d1] [9+10  |  8-11]
     93     qsubaddx    r4, r8, r9           ; [c1|a1] [13-14 | 12+15]
     94     qaddsubx    r5, r8, r9           ; [b1|d1] [13+14 | 12-15]
     95 
     96     qaddsubx    r6, r2, r3           ; [b2|c2] [c1+d1 | a1-b1]
     97     qaddsubx    r7, r3, r2           ; [a2|d2] [b1+a1 | d1-c1]
     98     qaddsubx    r8, r4, r5           ; [b2|c2] [c1+d1 | a1-b1]
     99     qaddsubx    r9, r5, r4           ; [a2|d2] [b1+a1 | d1-c1]
    100 
    101     qadd16      r6, r6, r10          ; [b2+3|c2+3]
    102     qadd16      r7, r7, r10          ; [a2+3|d2+3]
    103     qadd16      r8, r8, r10          ; [b2+3|c2+3]
    104     qadd16      r9, r9, r10          ; [a2+3|d2+3]
    105 
    106     asr         r2, r6, #3           ; [9  |  x]
    107     pkhtb       r2, r2, r7, asr #19  ; [9  |  8]
    108     lsl         r3, r7, #16          ; [~11|  x]
    109     lsl         r4, r6, #16          ; [~10|  x]
    110     asr         r3, r3, #3           ; [11 |  x]
    111     pkhtb       r3, r3, r4, asr #19  ; [11 | 10]
    112 
    113     asr         r4, r8, #3           ; [13 |  x]
    114     pkhtb       r4, r4, r9, asr #19  ; [13 | 12]
    115     lsl         r5, r9, #16          ; [~15|  x]
    116     lsl         r6, r8, #16          ; [~14|  x]
    117     asr         r5, r5, #3           ; [15 |  x]
    118     pkhtb       r5, r5, r6, asr #19  ; [15 | 14]
    119 
    120     str         r2, [r1], #4
    121     str         r3, [r1], #4
    122     str         r4, [r1], #4
    123     str         r5, [r1]
    124 
    125     ldmia       sp!, {r4 - r11, pc}
    126     ENDP        ; |vp8_short_inv_walsh4x4_v6|
    127 
    128 
    129 ;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
    130 |vp8_short_inv_walsh4x4_1_v6| PROC
    131 
    132     ldrsh       r2, [r0]             ; [0]
    133     add         r2, r2, #3           ; [0] + 3
    134     asr         r2, r2, #3           ; a1 ([0]+3) >> 3
    135     lsl         r2, r2, #16          ; [a1 |  x]
    136     orr         r2, r2, r2, lsr #16  ; [a1 | a1]
    137 
    138     str         r2, [r1], #4
    139     str         r2, [r1], #4
    140     str         r2, [r1], #4
    141     str         r2, [r1], #4
    142     str         r2, [r1], #4
    143     str         r2, [r1], #4
    144     str         r2, [r1], #4
    145     str         r2, [r1]
    146 
    147     bx          lr
    148     ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
    149 
    150 ; Constant Pool
    151 c0x00030003 DCD 0x00030003
    152     END
    153