Home | History | Annotate | Download | only in ppc
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     .globl vp8_subtract_mbuv_ppc
     13     .globl vp8_subtract_mby_ppc
     14 
     15 ;# r3 short *diff
     16 ;# r4 unsigned char *usrc
     17 ;# r5 unsigned char *vsrc
     18 ;# r6 unsigned char *pred
     19 ;# r7 int stride
     20 vp8_subtract_mbuv_ppc:
     21     mfspr   r11, 256            ;# get old VRSAVE
     22     oris    r12, r11, 0xf000
     23     mtspr   256, r12            ;# set VRSAVE
     24 
     25     li      r9, 256
     26     add     r3, r3, r9
     27     add     r3, r3, r9
     28     add     r6, r6, r9
     29 
     30     li      r10, 16
     31     li      r9,  4
     32     mtctr   r9
     33 
     34     vspltisw v0, 0
     35 
     36 mbu_loop:
     37     lvsl    v5, 0, r4           ;# permutate value for alignment
     38     lvx     v1, 0, r4           ;# src
     39     lvx     v2, 0, r6           ;# pred
     40 
     41     add     r4, r4, r7
     42     addi    r6, r6, 16
     43 
     44     vperm   v1, v1, v0, v5
     45 
     46     vmrghb  v3, v0, v1          ;# unpack high src  to short
     47     vmrghb  v4, v0, v2          ;# unpack high pred to short
     48 
     49     lvsl    v5, 0, r4           ;# permutate value for alignment
     50     lvx     v1, 0, r4           ;# src
     51 
     52     add     r4, r4, r7
     53 
     54     vsubshs v3, v3, v4
     55 
     56     stvx    v3, 0, r3           ;# store out diff
     57 
     58     vperm   v1, v1, v0, v5
     59 
     60     vmrghb  v3, v0, v1          ;# unpack high src  to short
     61     vmrglb  v4, v0, v2          ;# unpack high pred to short
     62 
     63     vsubshs v3, v3, v4
     64 
     65     stvx    v3, r10, r3         ;# store out diff
     66 
     67     addi    r3, r3, 32
     68 
     69     bdnz    mbu_loop
     70 
     71     mtctr   r9
     72 
     73 mbv_loop:
     74     lvsl    v5, 0, r5           ;# permutate value for alignment
     75     lvx     v1, 0, r5           ;# src
     76     lvx     v2, 0, r6           ;# pred
     77 
     78     add     r5, r5, r7
     79     addi    r6, r6, 16
     80 
     81     vperm   v1, v1, v0, v5
     82 
     83     vmrghb  v3, v0, v1          ;# unpack high src  to short
     84     vmrghb  v4, v0, v2          ;# unpack high pred to short
     85 
     86     lvsl    v5, 0, r5           ;# permutate value for alignment
     87     lvx     v1, 0, r5           ;# src
     88 
     89     add     r5, r5, r7
     90 
     91     vsubshs v3, v3, v4
     92 
     93     stvx    v3, 0, r3           ;# store out diff
     94 
     95     vperm   v1, v1, v0, v5
     96 
     97     vmrghb  v3, v0, v1          ;# unpack high src  to short
     98     vmrglb  v4, v0, v2          ;# unpack high pred to short
     99 
    100     vsubshs v3, v3, v4
    101 
    102     stvx    v3, r10, r3         ;# store out diff
    103 
    104     addi    r3, r3, 32
    105 
    106     bdnz    mbv_loop
    107 
    108     mtspr   256, r11            ;# reset old VRSAVE
    109 
    110     blr
    111 
    112 ;# r3 short *diff
    113 ;# r4 unsigned char *src
    114 ;# r5 unsigned char *pred
    115 ;# r6 int stride
    116 vp8_subtract_mby_ppc:
    117     mfspr   r11, 256            ;# get old VRSAVE
    118     oris    r12, r11, 0xf800
    119     mtspr   256, r12            ;# set VRSAVE
    120 
    121     li      r10, 16
    122     mtctr   r10
    123 
    124     vspltisw v0, 0
    125 
    126 mby_loop:
    127     lvx     v1, 0, r4           ;# src
    128     lvx     v2, 0, r5           ;# pred
    129 
    130     add     r4, r4, r6
    131     addi    r5, r5, 16
    132 
    133     vmrghb  v3, v0, v1          ;# unpack high src  to short
    134     vmrghb  v4, v0, v2          ;# unpack high pred to short
    135 
    136     vsubshs v3, v3, v4
    137 
    138     stvx    v3, 0, r3           ;# store out diff
    139 
    140     vmrglb  v3, v0, v1          ;# unpack low src  to short
    141     vmrglb  v4, v0, v2          ;# unpack low pred to short
    142 
    143     vsubshs v3, v3, v4
    144 
    145     stvx    v3, r10, r3         ;# store out diff
    146 
    147     addi    r3, r3, 32
    148 
    149     bdnz    mby_loop
    150 
    151     mtspr   256, r11            ;# reset old VRSAVE
    152 
    153     blr
    154