1 ; vim:filetype=nasm ts=8 2 3 ; libFLAC - Free Lossless Audio Codec library 4 ; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson 5 ; 6 ; Redistribution and use in source and binary forms, with or without 7 ; modification, are permitted provided that the following conditions 8 ; are met: 9 ; 10 ; - Redistributions of source code must retain the above copyright 11 ; notice, this list of conditions and the following disclaimer. 12 ; 13 ; - Redistributions in binary form must reproduce the above copyright 14 ; notice, this list of conditions and the following disclaimer in the 15 ; documentation and/or other materials provided with the distribution. 16 ; 17 ; - Neither the name of the Xiph.org Foundation nor the names of its 18 ; contributors may be used to endorse or promote products derived from 19 ; this software without specific prior written permission. 20 ; 21 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 25 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 %include "nasm.h" 34 35 data_section 36 37 cglobal precompute_partition_info_sums_32bit_asm_ia32_ 38 39 code_section 40 41 42 ; ********************************************************************** 43 ; 44 ; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter) 45 ; void precompute_partition_info_sums_32bit_( 46 ; const FLAC__int32 residual[], 47 ; FLAC__uint64 abs_residual_partition_sums[], 48 ; unsigned blocksize, 49 ; unsigned predictor_order, 50 ; unsigned min_partition_order, 51 ; unsigned max_partition_order 52 ; ) 53 ; 54 ALIGN 16 55 cident precompute_partition_info_sums_32bit_asm_ia32_ 56 57 ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time 58 ;; [esp + 4] const FLAC__int32 residual[] 59 ;; [esp + 8] FLAC__uint64 abs_residual_partition_sums[] 60 ;; [esp + 12] unsigned blocksize 61 ;; [esp + 16] unsigned predictor_order 62 ;; [esp + 20] unsigned min_partition_order 63 ;; [esp + 24] unsigned max_partition_order 64 push ebp 65 push ebx 66 push esi 67 push edi 68 sub esp, 8 69 ;; [esp + 28] const FLAC__int32 residual[] 70 ;; [esp + 32] FLAC__uint64 abs_residual_partition_sums[] 71 ;; [esp + 36] unsigned blocksize 72 ;; [esp + 40] unsigned predictor_order 73 ;; [esp + 44] unsigned min_partition_order 74 ;; [esp + 48] unsigned max_partition_order 75 ;; [esp] partitions 76 ;; [esp + 4] default_partition_samples 77 78 mov ecx, [esp + 48] 79 mov eax, 1 80 shl eax, cl 81 mov [esp], eax ; [esp] <- partitions = 1u << max_partition_order; 82 mov eax, [esp + 36] 83 shr eax, cl 84 mov [esp + 4], eax ; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order; 85 86 ; 87 ; first do max_partition_order 88 ; 89 mov edi, [esp + 4] 90 sub edi, [esp + 40] ; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples 91 xor esi, esi ; esi <- residual_sample = 0 92 xor ecx, ecx ; ecx <- partition = 0 93 mov ebp, [esp + 28] ; ebp <- residual[] 94 xor ebx, ebx ; ebx <- abs_residual_partition_sum = 0; 95 ; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1 96 ALIGN 16 97 .loop0: ; for(partition = residual_sample = 0; partition < partitions; partition++) { 98 .loop1: ; for( ; residual_sample < end; residual_sample++) 99 mov eax, [ebp + esi * 4] 100 cdq 101 xor eax, edx 102 sub eax, edx 103 add ebx, eax ; abs_residual_partition_sum += abs(residual[residual_sample]); 104 ;@@@@@@ check overflow flag and abort here? 105 add esi, byte 1 106 cmp esi, edi ; /* since the loop will always run at least once, we can put the loop check down here */ 107 jb .loop1 108 .next1: 109 add edi, [esp + 4] ; end += default_partition_samples; 110 mov eax, [esp + 32] 111 mov [eax + ecx * 8], ebx ; abs_residual_partition_sums[partition] = abs_residual_partition_sum; 112 mov [eax + ecx * 8 + 4], dword 0 113 xor ebx, ebx ; abs_residual_partition_sum = 0; 114 add ecx, byte 1 115 cmp ecx, [esp] ; /* since the loop will always run at least once, we can put the loop check down here */ 116 jb .loop0 117 .next0: ; } 118 ; 119 ; now merge partitions for lower orders 120 ; 121 mov esi, [esp + 32] ; esi <- abs_residual_partition_sums[from_partition==0]; 122 mov eax, [esp] 123 lea edi, [esi + eax * 8] ; edi <- abs_residual_partition_sums[to_partition==partitions]; 124 mov ecx, [esp + 48] 125 sub ecx, byte 1 ; ecx <- partition_order = (int)max_partition_order - 1; 126 ALIGN 16 127 .loop2: ; for(; partition_order >= (int)min_partition_order; partition_order--) { 128 cmp ecx, [esp + 44] 129 jl .next2 130 mov edx, 1 131 shl edx, cl ; const unsigned partitions = 1u << partition_order; 132 ALIGN 16 133 .loop3: ; for(i = 0; i < partitions; i++) { 134 mov eax, [esi] 135 mov ebx, [esi + 4] 136 add eax, [esi + 8] 137 adc ebx, [esi + 12] 138 mov [edi], eax 139 mov [edi + 4], ebx ; a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1]; 140 add esi, byte 16 141 add edi, byte 8 142 sub edx, byte 1 143 jnz .loop3 ; } 144 sub ecx, byte 1 145 jmp .loop2 ; } 146 .next2: 147 148 add esp, 8 149 pop edi 150 pop esi 151 pop ebx 152 pop ebp 153 ret 154 155 end 156 157 %ifdef OBJ_FORMAT_elf 158 section .note.GNU-stack noalloc 159 %endif 160