Home | History | Annotate | Download | only in vpx_ports
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_config.asm"
     13 
     14 ; 32/64 bit compatibility macros
     15 ;
     16 ; In general, we make the source use 64 bit syntax, then twiddle with it using
     17 ; the preprocessor to get the 32 bit syntax on 32 bit platforms.
     18 ;
     19 %ifidn __OUTPUT_FORMAT__,elf32
     20 %define ABI_IS_32BIT 1
     21 %elifidn __OUTPUT_FORMAT__,macho32
     22 %define ABI_IS_32BIT 1
     23 %elifidn __OUTPUT_FORMAT__,win32
     24 %define ABI_IS_32BIT 1
     25 %elifidn __OUTPUT_FORMAT__,aout
     26 %define ABI_IS_32BIT 1
     27 %else
     28 %define ABI_IS_32BIT 0
     29 %endif
     30 
     31 %if ABI_IS_32BIT
     32 %define rax eax
     33 %define rbx ebx
     34 %define rcx ecx
     35 %define rdx edx
     36 %define rsi esi
     37 %define rdi edi
     38 %define rsp esp
     39 %define rbp ebp
     40 %define movsxd mov
     41 %macro movq 2
     42   %ifidn %1,eax
     43     movd %1,%2
     44   %elifidn %2,eax
     45     movd %1,%2
     46   %elifidn %1,ebx
     47     movd %1,%2
     48   %elifidn %2,ebx
     49     movd %1,%2
     50   %elifidn %1,ecx
     51     movd %1,%2
     52   %elifidn %2,ecx
     53     movd %1,%2
     54   %elifidn %1,edx
     55     movd %1,%2
     56   %elifidn %2,edx
     57     movd %1,%2
     58   %elifidn %1,esi
     59     movd %1,%2
     60   %elifidn %2,esi
     61     movd %1,%2
     62   %elifidn %1,edi
     63     movd %1,%2
     64   %elifidn %2,edi
     65     movd %1,%2
     66   %elifidn %1,esp
     67     movd %1,%2
     68   %elifidn %2,esp
     69     movd %1,%2
     70   %elifidn %1,ebp
     71     movd %1,%2
     72   %elifidn %2,ebp
     73     movd %1,%2
     74   %else
     75     movq %1,%2
     76   %endif
     77 %endmacro
     78 %endif
     79 
     80 
     81 ; LIBVPX_YASM_WIN64
     82 ; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64
     83 ; or win64 is defined on the Yasm command line.
     84 %ifidn __OUTPUT_FORMAT__,win64
     85 %define LIBVPX_YASM_WIN64 1
     86 %elifidn __OUTPUT_FORMAT__,x64
     87 %define LIBVPX_YASM_WIN64 1
     88 %else
     89 %define LIBVPX_YASM_WIN64 0
     90 %endif
     91 
     92 ; sym()
     93 ; Return the proper symbol name for the target ABI.
     94 ;
     95 ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
     96 ; with C linkage be prefixed with an underscore.
     97 ;
     98 %ifidn   __OUTPUT_FORMAT__,elf32
     99 %define sym(x) x
    100 %elifidn __OUTPUT_FORMAT__,elf64
    101 %define sym(x) x
    102 %elifidn __OUTPUT_FORMAT__,elfx32
    103 %define sym(x) x
    104 %elif LIBVPX_YASM_WIN64
    105 %define sym(x) x
    106 %else
    107 %define sym(x) _ %+ x
    108 %endif
    109 
    110 ;  PRIVATE
    111 ;  Macro for the attribute to hide a global symbol for the target ABI.
    112 ;  This is only active if CHROMIUM is defined.
    113 ;
    114 ;  Chromium doesn't like exported global symbols due to symbol clashing with
    115 ;  plugins among other things.
    116 ;
    117 ;  Requires Chromium's patched copy of yasm:
    118 ;    http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
    119 ;    http://www.tortall.net/projects/yasm/ticket/236
    120 ;
    121 %ifdef CHROMIUM
    122   %ifidn   __OUTPUT_FORMAT__,elf32
    123     %define PRIVATE :hidden
    124   %elifidn __OUTPUT_FORMAT__,elf64
    125     %define PRIVATE :hidden
    126   %elifidn __OUTPUT_FORMAT__,elfx32
    127     %define PRIVATE :hidden
    128   %elif LIBVPX_YASM_WIN64
    129     %define PRIVATE
    130   %else
    131     %define PRIVATE :private_extern
    132   %endif
    133 %else
    134   %define PRIVATE
    135 %endif
    136 
    137 ; arg()
    138 ; Return the address specification of the given argument
    139 ;
    140 %if ABI_IS_32BIT
    141   %define arg(x) [ebp+8+4*x]
    142 %else
    143   ; 64 bit ABI passes arguments in registers. This is a workaround to get up
    144   ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
    145   %if LIBVPX_YASM_WIN64
    146     %define arg(x) [rbp+16+8*x]
    147   %else
    148     %define arg(x) [rbp-8-8*x]
    149   %endif
    150 %endif
    151 
    152 ; REG_SZ_BYTES, REG_SZ_BITS
    153 ; Size of a register
    154 %if ABI_IS_32BIT
    155 %define REG_SZ_BYTES 4
    156 %define REG_SZ_BITS  32
    157 %else
    158 %define REG_SZ_BYTES 8
    159 %define REG_SZ_BITS  64
    160 %endif
    161 
    162 
    163 ; ALIGN_STACK <alignment> <register>
    164 ; This macro aligns the stack to the given alignment (in bytes). The stack
    165 ; is left such that the previous value of the stack pointer is the first
    166 ; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
    167 ; This macro uses one temporary register, which is not preserved, and thus
    168 ; must be specified as an argument.
    169 %macro ALIGN_STACK 2
    170     mov         %2, rsp
    171     and         rsp, -%1
    172     lea         rsp, [rsp - (%1 - REG_SZ_BYTES)]
    173     push        %2
    174 %endmacro
    175 
    176 
    177 ;
    178 ; The Microsoft assembler tries to impose a certain amount of type safety in
    179 ; its register usage. YASM doesn't recognize these directives, so we just
    180 ; %define them away to maintain as much compatibility as possible with the
    181 ; original inline assembler we're porting from.
    182 ;
    183 %idefine PTR
    184 %idefine XMMWORD
    185 %idefine MMWORD
    186 
    187 ; PIC macros
    188 ;
    189 %if ABI_IS_32BIT
    190   %if CONFIG_PIC=1
    191   %ifidn __OUTPUT_FORMAT__,elf32
    192     %define WRT_PLT wrt ..plt
    193     %macro GET_GOT 1
    194       extern _GLOBAL_OFFSET_TABLE_
    195       push %1
    196       call %%get_got
    197       %%sub_offset:
    198       jmp %%exitGG
    199       %%get_got:
    200       mov %1, [esp]
    201       add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
    202       ret
    203       %%exitGG:
    204       %undef GLOBAL
    205       %define GLOBAL(x) x + %1 wrt ..gotoff
    206       %undef RESTORE_GOT
    207       %define RESTORE_GOT pop %1
    208     %endmacro
    209   %elifidn __OUTPUT_FORMAT__,macho32
    210     %macro GET_GOT 1
    211       push %1
    212       call %%get_got
    213       %%get_got:
    214       pop  %1
    215       %undef GLOBAL
    216       %define GLOBAL(x) x + %1 - %%get_got
    217       %undef RESTORE_GOT
    218       %define RESTORE_GOT pop %1
    219     %endmacro
    220   %endif
    221   %endif
    222 
    223   %ifdef CHROMIUM
    224     %ifidn __OUTPUT_FORMAT__,macho32
    225       %define HIDDEN_DATA(x) x:private_extern
    226     %else
    227       %define HIDDEN_DATA(x) x
    228     %endif
    229   %else
    230     %define HIDDEN_DATA(x) x
    231   %endif
    232 %else
    233   %macro GET_GOT 1
    234   %endmacro
    235   %define GLOBAL(x) rel x
    236   %ifidn __OUTPUT_FORMAT__,elf64
    237     %define WRT_PLT wrt ..plt
    238     %define HIDDEN_DATA(x) x:data hidden
    239   %elifidn __OUTPUT_FORMAT__,elfx32
    240     %define WRT_PLT wrt ..plt
    241     %define HIDDEN_DATA(x) x:data hidden
    242   %elifidn __OUTPUT_FORMAT__,macho64
    243     %ifdef CHROMIUM
    244       %define HIDDEN_DATA(x) x:private_extern
    245     %else
    246       %define HIDDEN_DATA(x) x
    247     %endif
    248   %else
    249     %define HIDDEN_DATA(x) x
    250   %endif
    251 %endif
    252 %ifnmacro GET_GOT
    253     %macro GET_GOT 1
    254     %endmacro
    255     %define GLOBAL(x) x
    256 %endif
    257 %ifndef RESTORE_GOT
    258 %define RESTORE_GOT
    259 %endif
    260 %ifndef WRT_PLT
    261 %define WRT_PLT
    262 %endif
    263 
    264 %if ABI_IS_32BIT
    265   %macro SHADOW_ARGS_TO_STACK 1
    266   %endm
    267   %define UNSHADOW_ARGS
    268 %else
    269 %if LIBVPX_YASM_WIN64
    270   %macro SHADOW_ARGS_TO_STACK 1 ; argc
    271     %if %1 > 0
    272         mov arg(0),rcx
    273     %endif
    274     %if %1 > 1
    275         mov arg(1),rdx
    276     %endif
    277     %if %1 > 2
    278         mov arg(2),r8
    279     %endif
    280     %if %1 > 3
    281         mov arg(3),r9
    282     %endif
    283   %endm
    284 %else
    285   %macro SHADOW_ARGS_TO_STACK 1 ; argc
    286     %if %1 > 0
    287         push rdi
    288     %endif
    289     %if %1 > 1
    290         push rsi
    291     %endif
    292     %if %1 > 2
    293         push rdx
    294     %endif
    295     %if %1 > 3
    296         push rcx
    297     %endif
    298     %if %1 > 4
    299         push r8
    300     %endif
    301     %if %1 > 5
    302         push r9
    303     %endif
    304     %if %1 > 6
    305       %assign i %1-6
    306       %assign off 16
    307       %rep i
    308         mov rax,[rbp+off]
    309         push rax
    310         %assign off off+8
    311       %endrep
    312     %endif
    313   %endm
    314 %endif
    315   %define UNSHADOW_ARGS mov rsp, rbp
    316 %endif
    317 
    318 ; Win64 ABI requires that XMM6:XMM15 are callee saved
    319 ; SAVE_XMM n, [u]
    320 ; store registers 6-n on the stack
    321 ; if u is specified, use unaligned movs.
    322 ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
    323 ; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
    324 ; but in some cases this is not done and unaligned movs must be used.
    325 %if LIBVPX_YASM_WIN64
    326 %macro SAVE_XMM 1-2 a
    327   %if %1 < 6
    328     %error Only xmm registers 6-15 must be preserved
    329   %else
    330     %assign last_xmm %1
    331     %define movxmm movdq %+ %2
    332     %assign xmm_stack_space ((last_xmm - 5) * 16)
    333     sub rsp, xmm_stack_space
    334     %assign i 6
    335     %rep (last_xmm - 5)
    336       movxmm [rsp + ((i - 6) * 16)], xmm %+ i
    337       %assign i i+1
    338     %endrep
    339   %endif
    340 %endmacro
    341 %macro RESTORE_XMM 0
    342   %ifndef last_xmm
    343     %error RESTORE_XMM must be paired with SAVE_XMM n
    344   %else
    345     %assign i last_xmm
    346     %rep (last_xmm - 5)
    347       movxmm xmm %+ i, [rsp +((i - 6) * 16)]
    348       %assign i i-1
    349     %endrep
    350     add rsp, xmm_stack_space
    351     ; there are a couple functions which return from multiple places.
    352     ; otherwise, we could uncomment these:
    353     ; %undef last_xmm
    354     ; %undef xmm_stack_space
    355     ; %undef movxmm
    356   %endif
    357 %endmacro
    358 %else
    359 %macro SAVE_XMM 1-2
    360 %endmacro
    361 %macro RESTORE_XMM 0
    362 %endmacro
    363 %endif
    364 
    365 ; Name of the rodata section
    366 ;
    367 ; .rodata seems to be an elf-ism, as it doesn't work on OSX.
    368 ;
    369 %ifidn __OUTPUT_FORMAT__,macho64
    370 %define SECTION_RODATA section .text
    371 %elifidn __OUTPUT_FORMAT__,macho32
    372 %macro SECTION_RODATA 0
    373 section .text
    374 %endmacro
    375 %elifidn __OUTPUT_FORMAT__,aout
    376 %define SECTION_RODATA section .data
    377 %else
    378 %define SECTION_RODATA section .rodata
    379 %endif
    380 
    381 
    382 ; Tell GNU ld that we don't require an executable stack.
    383 %ifidn __OUTPUT_FORMAT__,elf32
    384 section .note.GNU-stack noalloc noexec nowrite progbits
    385 section .text
    386 %elifidn __OUTPUT_FORMAT__,elf64
    387 section .note.GNU-stack noalloc noexec nowrite progbits
    388 section .text
    389 %elifidn __OUTPUT_FORMAT__,elfx32
    390 section .note.GNU-stack noalloc noexec nowrite progbits
    391 section .text
    392 %endif
    393 
    394 ; On Android platforms use lrand48 when building postproc routines. Prior to L
    395 ; rand() was not available.
    396 %if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1
    397 %ifdef __ANDROID__
    398 extern sym(lrand48)
    399 %define LIBVPX_RAND lrand48
    400 %else
    401 extern sym(rand)
    402 %define LIBVPX_RAND rand
    403 %endif
    404 %endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
    405