1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h" 12 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" 13 14 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct1. 15 // Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from 16 // entropy_coding.c file. 17 void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[], 18 const int32_t matrix1[], 19 int32_t matrix_product[], 20 const int matrix1_index_factor1, 21 const int matrix0_index_factor1, 22 const int matrix1_index_init_case, 23 const int matrix1_index_step, 24 const int matrix0_index_step, 25 const int inner_loop_count, 26 const int mid_loop_count, 27 const int shift) { 28 if (matrix1_index_init_case != 0) { 29 int j = SUBFRAMES, k = 0, n = 0; 30 int32_t r0, r1, r2, sum32; 31 int32_t* product_start = matrix_product; 32 int32_t* product_ptr; 33 const uint32_t product_step = 4 * mid_loop_count; 34 const uint32_t matrix0_step = 2 * matrix0_index_step; 35 const uint32_t matrix1_step = 4 * matrix1_index_step; 36 const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; 37 const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; 38 const int16_t* matrix0_start = matrix0; 39 const int32_t* matrix1_start = matrix1; 40 int16_t* matrix0_ptr; 41 int32_t* matrix1_ptr; 42 43 __asm __volatile ( 44 ".set push \n\t" 45 ".set noreorder \n\t" 46 "1: \n\t" 47 "addu %[product_ptr], %[product_start], $0 \n\t" 48 "addu %[k], %[product_step], $0 \n\t" 49 "addiu %[j], %[j], -1 \n\t" 50 "addu %[matrix1_start], %[matrix1], $0 \n\t" 51 "2: \n\t" 52 "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t" 53 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" 54 "addu %[n], %[inner_loop_count], $0 \n\t" 55 "mul %[sum32], $0, $0 \n\t" 56 "3: \n\t" 57 "lw %[r0], 0(%[matrix1_ptr]) \n\t" 58 "lh %[r1], 0(%[matrix0_ptr]) \n\t" 59 "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t" 60 "sllv %[r0], %[r0], %[shift] \n\t" 61 "andi %[r2], %[r0], 0xffff \n\t" 62 "sra %[r2], %[r2], 1 \n\t" 63 "mul %[r2], %[r2], %[r1] \n\t" 64 "sra %[r0], %[r0], 16 \n\t" 65 "mul %[r0], %[r0], %[r1] \n\t" 66 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" 67 "addiu %[n], %[n], -1 \n\t" 68 #if defined(MIPS_DSP_R1_LE) 69 "shra_r.w %[r2], %[r2], 15 \n\t" 70 #else 71 "addiu %[r2], %[r2], 0x4000 \n\t" 72 "sra %[r2], %[r2], 15 \n\t" 73 #endif 74 "addu %[sum32], %[sum32], %[r2] \n\t" 75 "bgtz %[n], 3b \n\t" 76 " addu %[sum32], %[sum32], %[r0] \n\t" 77 "addiu %[k], %[k], -4 \n\t" 78 "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t" 79 "sw %[sum32], 0(%[product_ptr]) \n\t" 80 "bgtz %[k], 2b \n\t" 81 " addiu %[product_ptr], %[product_ptr], 4 \n\t" 82 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" 83 "bgtz %[j], 1b \n\t" 84 " addu %[product_start], %[product_start], %[product_step] \n\t" 85 ".set pop \n\t" 86 : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), 87 [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start), 88 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), 89 [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), 90 [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) 91 : [product_step] "r" (product_step), [matrix1] "r" (matrix1), 92 [inner_loop_count] "r" (inner_loop_count), 93 [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), 94 [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), 95 [matrix0_step2] "r" (matrix0_step2) 96 : "hi", "lo", "memory" 97 ); 98 } else { 99 int j = SUBFRAMES, k = 0, n = 0; 100 int32_t r0, r1, r2, sum32; 101 int32_t* product_start = matrix_product; 102 int32_t* product_ptr; 103 const uint32_t product_step = 4 * mid_loop_count; 104 const uint32_t matrix0_step = 2 * matrix0_index_step; 105 const uint32_t matrix1_step = 4 * matrix1_index_step; 106 const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; 107 const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; 108 const int16_t* matrix0_start = matrix0; 109 const int32_t* matrix1_start = matrix1; 110 int16_t* matrix0_ptr; 111 int32_t* matrix1_ptr; 112 113 __asm __volatile ( 114 ".set push \n\t" 115 ".set noreorder \n\t" 116 "1: \n\t" 117 "addu %[product_ptr], %[product_start], $0 \n\t" 118 "addu %[k], %[product_step], $0 \n\t" 119 "addiu %[j], %[j], -1 \n\t" 120 "addu %[matrix0_start], %[matrix0], $0 \n\t" 121 "2: \n\t" 122 "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t" 123 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" 124 "addu %[n], %[inner_loop_count], $0 \n\t" 125 "mul %[sum32], $0, $0 \n\t" 126 "3: \n\t" 127 "lw %[r0], 0(%[matrix1_ptr]) \n\t" 128 "lh %[r1], 0(%[matrix0_ptr]) \n\t" 129 "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t" 130 "sllv %[r0], %[r0], %[shift] \n\t" 131 "andi %[r2], %[r0], 0xffff \n\t" 132 "sra %[r2], %[r2], 1 \n\t" 133 "mul %[r2], %[r2], %[r1] \n\t" 134 "sra %[r0], %[r0], 16 \n\t" 135 "mul %[r0], %[r0], %[r1] \n\t" 136 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" 137 "addiu %[n], %[n], -1 \n\t" 138 #if defined(MIPS_DSP_R1_LE) 139 "shra_r.w %[r2], %[r2], 15 \n\t" 140 #else 141 "addiu %[r2], %[r2], 0x4000 \n\t" 142 "sra %[r2], %[r2], 15 \n\t" 143 #endif 144 "addu %[sum32], %[sum32], %[r2] \n\t" 145 "bgtz %[n], 3b \n\t" 146 " addu %[sum32], %[sum32], %[r0] \n\t" 147 "addiu %[k], %[k], -4 \n\t" 148 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" 149 "sw %[sum32], 0(%[product_ptr]) \n\t" 150 "bgtz %[k], 2b \n\t" 151 " addiu %[product_ptr], %[product_ptr], 4 \n\t" 152 "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t" 153 "bgtz %[j], 1b \n\t" 154 " addu %[product_start], %[product_start], %[product_step] \n\t" 155 ".set pop \n\t" 156 : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), 157 [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start), 158 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), 159 [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), 160 [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) 161 : [product_step] "r" (product_step), [matrix0] "r" (matrix0), 162 [inner_loop_count] "r" (inner_loop_count), 163 [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), 164 [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), 165 [matrix0_step2] "r" (matrix0_step2) 166 : "hi", "lo", "memory" 167 ); 168 } 169 } 170 171 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct2. 172 // Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from 173 // entropy_coding.c file. 174 void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[], 175 const int32_t matrix1[], 176 int32_t matrix_product[], 177 const int matrix0_index_factor, 178 const int matrix0_index_step) { 179 int j = 0, n = 0; 180 int loop_count = SUBFRAMES; 181 const int16_t* matrix0_ptr; 182 const int32_t* matrix1_ptr; 183 const int16_t* matrix0_start = matrix0; 184 const int matrix0_step = 2 * matrix0_index_step; 185 const int matrix0_step2 = 2 * matrix0_index_factor; 186 int32_t r0, r1, r2, r3, r4, sum32, sum32_2; 187 188 __asm __volatile ( 189 ".set push \n\t" 190 ".set noreorder \n\t" 191 "addu %[j], %[loop_count], $0 \n\t" 192 "addu %[matrix0_start], %[matrix0], $0 \n\t" 193 "1: \n\t" 194 "addu %[matrix1_ptr], %[matrix1], $0 \n\t" 195 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" 196 "addu %[n], %[loop_count], $0 \n\t" 197 "mul %[sum32], $0, $0 \n\t" 198 "mul %[sum32_2], $0, $0 \n\t" 199 "2: \n\t" 200 "lw %[r0], 0(%[matrix1_ptr]) \n\t" 201 "lw %[r1], 4(%[matrix1_ptr]) \n\t" 202 "lh %[r2], 0(%[matrix0_ptr]) \n\t" 203 "andi %[r3], %[r0], 0xffff \n\t" 204 "sra %[r3], %[r3], 1 \n\t" 205 "mul %[r3], %[r3], %[r2] \n\t" 206 "andi %[r4], %[r1], 0xffff \n\t" 207 "sra %[r4], %[r4], 1 \n\t" 208 "mul %[r4], %[r4], %[r2] \n\t" 209 "sra %[r0], %[r0], 16 \n\t" 210 "mul %[r0], %[r0], %[r2] \n\t" 211 "sra %[r1], %[r1], 16 \n\t" 212 "mul %[r1], %[r1], %[r2] \n\t" 213 #if defined(MIPS_DSP_R1_LE) 214 "shra_r.w %[r3], %[r3], 15 \n\t" 215 "shra_r.w %[r4], %[r4], 15 \n\t" 216 #else 217 "addiu %[r3], %[r3], 0x4000 \n\t" 218 "sra %[r3], %[r3], 15 \n\t" 219 "addiu %[r4], %[r4], 0x4000 \n\t" 220 "sra %[r4], %[r4], 15 \n\t" 221 #endif 222 "addiu %[matrix1_ptr], %[matrix1_ptr], 8 \n\t" 223 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" 224 "addiu %[n], %[n], -1 \n\t" 225 "addu %[sum32], %[sum32], %[r3] \n\t" 226 "addu %[sum32_2], %[sum32_2], %[r4] \n\t" 227 "addu %[sum32], %[sum32], %[r0] \n\t" 228 "bgtz %[n], 2b \n\t" 229 " addu %[sum32_2], %[sum32_2], %[r1] \n\t" 230 "sra %[sum32], %[sum32], 3 \n\t" 231 "sra %[sum32_2], %[sum32_2], 3 \n\t" 232 "addiu %[j], %[j], -1 \n\t" 233 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" 234 "sw %[sum32], 0(%[matrix_product]) \n\t" 235 "sw %[sum32_2], 4(%[matrix_product]) \n\t" 236 "bgtz %[j], 1b \n\t" 237 " addiu %[matrix_product], %[matrix_product], 8 \n\t" 238 ".set pop \n\t" 239 : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start), 240 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), 241 [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2), 242 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 243 [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product) 244 : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0), 245 [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step), 246 [matrix0_step2] "r" (matrix0_step2) 247 : "hi", "lo", "memory" 248 ); 249 } 250