Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
     15 ;                           short *qcoeff_ptr,short *dequant_ptr,
     16 ;                           short *scan_mask, short *round_ptr,
     17 ;                           short *quant_ptr, short *dqcoeff_ptr);
     18 global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
     19 sym(vp8_fast_quantize_b_impl_mmx):
     20     push        rbp
     21     mov         rbp, rsp
     22     SHADOW_ARGS_TO_STACK 8
     23     push rsi
     24     push rdi
     25     ; end prolog
     26 
     27 
     28         mov             rsi,        arg(0) ;coeff_ptr
     29         movq            mm0,        [rsi]
     30 
     31         mov             rax,        arg(1) ;zbin_ptr
     32         movq            mm1,        [rax]
     33 
     34         movq            mm3,        mm0
     35         psraw           mm0,        15
     36 
     37         pxor            mm3,        mm0
     38         psubw           mm3,        mm0         ; abs
     39 
     40         movq            mm2,        mm3
     41         pcmpgtw         mm1,        mm2
     42 
     43         pandn           mm1,        mm2
     44         movq            mm3,        mm1
     45 
     46         mov             rdx,        arg(6) ;quant_ptr
     47         movq            mm1,        [rdx]
     48 
     49         mov             rcx,        arg(5) ;round_ptr
     50         movq            mm2,        [rcx]
     51 
     52         paddw           mm3,        mm2
     53         pmulhuw         mm3,        mm1
     54 
     55         pxor            mm3,        mm0
     56         psubw           mm3,        mm0     ;gain the sign back
     57 
     58         mov             rdi,        arg(2) ;qcoeff_ptr
     59         movq            mm0,        mm3
     60 
     61         movq            [rdi],      mm3
     62 
     63         mov             rax,        arg(3) ;dequant_ptr
     64         movq            mm2,        [rax]
     65 
     66         pmullw          mm3,        mm2
     67         mov             rax,        arg(7) ;dqcoeff_ptr
     68 
     69         movq            [rax],      mm3
     70 
     71         ; next 8
     72         movq            mm4,        [rsi+8]
     73 
     74         mov             rax,        arg(1) ;zbin_ptr
     75         movq            mm5,        [rax+8]
     76 
     77         movq            mm7,        mm4
     78         psraw           mm4,        15
     79 
     80         pxor            mm7,        mm4
     81         psubw           mm7,        mm4         ; abs
     82 
     83         movq            mm6,        mm7
     84         pcmpgtw         mm5,        mm6
     85 
     86         pandn           mm5,        mm6
     87         movq            mm7,        mm5
     88 
     89         movq            mm5,        [rdx+8]
     90         movq            mm6,        [rcx+8]
     91 
     92         paddw           mm7,        mm6
     93         pmulhuw         mm7,        mm5
     94 
     95         pxor            mm7,        mm4
     96         psubw           mm7,        mm4;gain the sign back
     97 
     98         mov             rdi,        arg(2) ;qcoeff_ptr
     99 
    100         movq            mm1,        mm7
    101         movq            [rdi+8],    mm7
    102 
    103         mov             rax,        arg(3) ;dequant_ptr
    104         movq            mm6,        [rax+8]
    105 
    106         pmullw          mm7,        mm6
    107         mov             rax,        arg(7) ;dqcoeff_ptr
    108 
    109         movq            [rax+8],    mm7
    110 
    111 
    112                 ; next 8
    113         movq            mm4,        [rsi+16]
    114 
    115         mov             rax,        arg(1) ;zbin_ptr
    116         movq            mm5,        [rax+16]
    117 
    118         movq            mm7,        mm4
    119         psraw           mm4,        15
    120 
    121         pxor            mm7,        mm4
    122         psubw           mm7,        mm4         ; abs
    123 
    124         movq            mm6,        mm7
    125         pcmpgtw         mm5,        mm6
    126 
    127         pandn           mm5,        mm6
    128         movq            mm7,        mm5
    129 
    130         movq            mm5,        [rdx+16]
    131         movq            mm6,        [rcx+16]
    132 
    133         paddw           mm7,        mm6
    134         pmulhuw         mm7,        mm5
    135 
    136         pxor            mm7,        mm4
    137         psubw           mm7,        mm4;gain the sign back
    138 
    139         mov             rdi,        arg(2) ;qcoeff_ptr
    140 
    141         movq            mm1,        mm7
    142         movq            [rdi+16],   mm7
    143 
    144         mov             rax,        arg(3) ;dequant_ptr
    145         movq            mm6,        [rax+16]
    146 
    147         pmullw          mm7,        mm6
    148         mov             rax,        arg(7) ;dqcoeff_ptr
    149 
    150         movq            [rax+16],   mm7
    151 
    152 
    153                 ; next 8
    154         movq            mm4,        [rsi+24]
    155 
    156         mov             rax,        arg(1) ;zbin_ptr
    157         movq            mm5,        [rax+24]
    158 
    159         movq            mm7,        mm4
    160         psraw           mm4,        15
    161 
    162         pxor            mm7,        mm4
    163         psubw           mm7,        mm4         ; abs
    164 
    165         movq            mm6,        mm7
    166         pcmpgtw         mm5,        mm6
    167 
    168         pandn           mm5,        mm6
    169         movq            mm7,        mm5
    170 
    171         movq            mm5,        [rdx+24]
    172         movq            mm6,        [rcx+24]
    173 
    174         paddw           mm7,        mm6
    175         pmulhuw         mm7,        mm5
    176 
    177         pxor            mm7,        mm4
    178         psubw           mm7,        mm4;gain the sign back
    179 
    180         mov             rdi,        arg(2) ;qcoeff_ptr
    181 
    182         movq            mm1,        mm7
    183         movq            [rdi+24],   mm7
    184 
    185         mov             rax,        arg(3) ;dequant_ptr
    186         movq            mm6,        [rax+24]
    187 
    188         pmullw          mm7,        mm6
    189         mov             rax,        arg(7) ;dqcoeff_ptr
    190 
    191         movq            [rax+24],   mm7
    192 
    193 
    194 
    195         mov             rdi,        arg(4) ;scan_mask
    196         mov             rsi,        arg(2) ;qcoeff_ptr
    197 
    198         pxor            mm5,        mm5
    199         pxor            mm7,        mm7
    200 
    201         movq            mm0,        [rsi]
    202         movq            mm1,        [rsi+8]
    203 
    204         movq            mm2,        [rdi]
    205         movq            mm3,        [rdi+8];
    206 
    207         pcmpeqw         mm0,        mm7
    208         pcmpeqw         mm1,        mm7
    209 
    210         pcmpeqw         mm6,        mm6
    211         pxor            mm0,        mm6
    212 
    213         pxor            mm1,        mm6
    214         psrlw           mm0,        15
    215 
    216         psrlw           mm1,        15
    217         pmaddwd         mm0,        mm2
    218 
    219         pmaddwd         mm1,        mm3
    220         movq            mm5,        mm0
    221 
    222         paddd           mm5,        mm1
    223 
    224         movq            mm0,        [rsi+16]
    225         movq            mm1,        [rsi+24]
    226 
    227         movq            mm2,        [rdi+16]
    228         movq            mm3,        [rdi+24];
    229 
    230         pcmpeqw         mm0,        mm7
    231         pcmpeqw         mm1,        mm7
    232 
    233         pcmpeqw         mm6,        mm6
    234         pxor            mm0,        mm6
    235 
    236         pxor            mm1,        mm6
    237         psrlw           mm0,        15
    238 
    239         psrlw           mm1,        15
    240         pmaddwd         mm0,        mm2
    241 
    242         pmaddwd         mm1,        mm3
    243         paddd           mm5,        mm0
    244 
    245         paddd           mm5,        mm1
    246         movq            mm0,        mm5
    247 
    248         psrlq           mm5,        32
    249         paddd           mm0,        mm5
    250 
    251         ; eob adjustment begins here
    252         movq            rcx,        mm0
    253         and             rcx,        0xffff
    254 
    255         xor             rdx,        rdx
    256         sub             rdx,        rcx ; rdx=-rcx
    257 
    258         bsr             rax,        rcx
    259         inc             rax
    260 
    261         sar             rdx,        31
    262         and             rax,        rdx
    263         ; Substitute the sse assembly for the old mmx mixed assembly/C. The
    264         ; following is kept as reference
    265         ;    movq            rcx,        mm0
    266         ;    bsr             rax,        rcx
    267         ;
    268         ;    mov             eob,        rax
    269         ;    mov             eee,        rcx
    270         ;
    271         ;if(eee==0)
    272         ;{
    273         ;    eob=-1;
    274         ;}
    275         ;else if(eee<0)
    276         ;{
    277         ;    eob=15;
    278         ;}
    279         ;d->eob = eob+1;
    280 
    281     ; begin epilog
    282     pop rdi
    283     pop rsi
    284     UNSHADOW_ARGS
    285     pop         rbp
    286     ret
    287