Home | History | Annotate | Download | only in ia32
      1 ;  vim:filetype=nasm ts=8
      2 
      3 ;  libFLAC - Free Lossless Audio Codec library
      4 ;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
      5 ;
      6 ;  Redistribution and use in source and binary forms, with or without
      7 ;  modification, are permitted provided that the following conditions
      8 ;  are met:
      9 ;
     10 ;  - Redistributions of source code must retain the above copyright
     11 ;  notice, this list of conditions and the following disclaimer.
     12 ;
     13 ;  - Redistributions in binary form must reproduce the above copyright
     14 ;  notice, this list of conditions and the following disclaimer in the
     15 ;  documentation and/or other materials provided with the distribution.
     16 ;
     17 ;  - Neither the name of the Xiph.org Foundation nor the names of its
     18 ;  contributors may be used to endorse or promote products derived from
     19 ;  this software without specific prior written permission.
     20 ;
     21 ;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22 ;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23 ;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24 ;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     25 ;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     26 ;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     27 ;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     28 ;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     29 ;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     30 ;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     31 ;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 
     33 %include "nasm.h"
     34 
     35 	data_section
     36 
     37 cglobal precompute_partition_info_sums_32bit_asm_ia32_
     38 
     39 	code_section
     40 
     41 
     42 ; **********************************************************************
     43 ;
     44 ; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
     45 ; void precompute_partition_info_sums_32bit_(
     46 ; 	const FLAC__int32 residual[],
     47 ; 	FLAC__uint64 abs_residual_partition_sums[],
     48 ; 	unsigned blocksize,
     49 ; 	unsigned predictor_order,
     50 ; 	unsigned min_partition_order,
     51 ; 	unsigned max_partition_order
     52 ; )
     53 ;
     54 	ALIGN 16
     55 cident precompute_partition_info_sums_32bit_asm_ia32_
     56 
     57 	;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
     58 	;; [esp + 4]	const FLAC__int32 residual[]
     59 	;; [esp + 8]	FLAC__uint64 abs_residual_partition_sums[]
     60 	;; [esp + 12]	unsigned blocksize
     61 	;; [esp + 16]	unsigned predictor_order
     62 	;; [esp + 20]	unsigned min_partition_order
     63 	;; [esp + 24]	unsigned max_partition_order
     64 	push	ebp
     65 	push	ebx
     66 	push	esi
     67 	push	edi
     68 	sub	esp, 8
     69 	;; [esp + 28]	const FLAC__int32 residual[]
     70 	;; [esp + 32]	FLAC__uint64 abs_residual_partition_sums[]
     71 	;; [esp + 36]	unsigned blocksize
     72 	;; [esp + 40]	unsigned predictor_order
     73 	;; [esp + 44]	unsigned min_partition_order
     74 	;; [esp + 48]	unsigned max_partition_order
     75 	;; [esp]	partitions
     76 	;; [esp + 4]	default_partition_samples
     77 
     78 	mov	ecx, [esp + 48]
     79 	mov	eax, 1
     80 	shl	eax, cl
     81 	mov	[esp], eax		; [esp] <- partitions = 1u << max_partition_order;
     82 	mov	eax, [esp + 36]
     83 	shr	eax, cl
     84 	mov	[esp + 4], eax		; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order;
     85 
     86 	;
     87 	; first do max_partition_order
     88 	;
     89 	mov	edi, [esp + 4]
     90 	sub	edi, [esp + 40]		; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples
     91 	xor	esi, esi		; esi <- residual_sample = 0
     92 	xor	ecx, ecx		; ecx <- partition = 0
     93 	mov	ebp, [esp + 28]		; ebp <- residual[]
     94 	xor	ebx, ebx		; ebx <- abs_residual_partition_sum = 0;
     95 	; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1
     96 	ALIGN	16
     97 .loop0:					; for(partition = residual_sample = 0; partition < partitions; partition++) {
     98 .loop1:					;   for( ; residual_sample < end; residual_sample++)
     99 	mov	eax, [ebp + esi * 4]
    100 	cdq
    101 	xor	eax, edx
    102 	sub	eax, edx
    103 	add	ebx, eax		;     abs_residual_partition_sum += abs(residual[residual_sample]);
    104 	;@@@@@@ check overflow flag and abort here?
    105 	add	esi, byte 1
    106 	cmp	esi, edi		;   /* since the loop will always run at least once, we can put the loop check down here */
    107 	jb	.loop1
    108 .next1:
    109 	add	edi, [esp + 4]		;   end += default_partition_samples;
    110 	mov	eax, [esp + 32]
    111 	mov	[eax + ecx * 8], ebx	;   abs_residual_partition_sums[partition] = abs_residual_partition_sum;
    112 	mov	[eax + ecx * 8 + 4], dword 0
    113 	xor	ebx, ebx		;   abs_residual_partition_sum = 0;
    114 	add	ecx, byte 1
    115 	cmp	ecx, [esp]		; /* since the loop will always run at least once, we can put the loop check down here */
    116 	jb	.loop0
    117 .next0:					; }
    118 	;
    119 	; now merge partitions for lower orders
    120 	;
    121 	mov	esi, [esp + 32]		; esi <- abs_residual_partition_sums[from_partition==0];
    122 	mov	eax, [esp]
    123 	lea	edi, [esi + eax * 8]	; edi <- abs_residual_partition_sums[to_partition==partitions];
    124 	mov	ecx, [esp + 48]
    125 	sub	ecx, byte 1		; ecx <- partition_order = (int)max_partition_order - 1;
    126 	ALIGN 16
    127 .loop2:					; for(; partition_order >= (int)min_partition_order; partition_order--) {
    128 	cmp	ecx, [esp + 44]
    129 	jl	.next2
    130 	mov	edx, 1
    131 	shl	edx, cl			;   const unsigned partitions = 1u << partition_order;
    132 	ALIGN 16
    133 .loop3:					;   for(i = 0; i < partitions; i++) {
    134 	mov	eax, [esi]
    135 	mov	ebx, [esi + 4]
    136 	add	eax, [esi + 8]
    137 	adc	ebx, [esi + 12]
    138 	mov	[edi], eax
    139 	mov	[edi + 4], ebx		;     a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1];
    140 	add	esi, byte 16
    141 	add	edi, byte 8
    142 	sub	edx, byte 1
    143 	jnz	.loop3			;   }
    144 	sub	ecx, byte 1
    145 	jmp	.loop2			; }
    146 .next2:
    147 
    148 	add	esp, 8
    149 	pop	edi
    150 	pop	esi
    151 	pop	ebx
    152 	pop	ebp
    153 	ret
    154 
    155 end
    156 
    157 %ifdef OBJ_FORMAT_elf
    158 	section .note.GNU-stack noalloc
    159 %endif
    160