1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // MIPS version of rescaling functions 11 // 12 // Author(s): Djordje Pesut (djordje.pesut (at) imgtec.com) 13 14 #include "src/dsp/dsp.h" 15 16 #if defined(WEBP_USE_MIPS32) && !defined(WEBP_REDUCE_SIZE) 17 18 #include <assert.h> 19 #include "src/utils/rescaler_utils.h" 20 21 //------------------------------------------------------------------------------ 22 // Row import 23 24 static void ImportRowShrink_MIPS32(WebPRescaler* const wrk, 25 const uint8_t* src) { 26 const int x_stride = wrk->num_channels; 27 const int x_out_max = wrk->dst_width * wrk->num_channels; 28 const int fx_scale = wrk->fx_scale; 29 const int x_add = wrk->x_add; 30 const int x_sub = wrk->x_sub; 31 const int x_stride1 = x_stride << 2; 32 int channel; 33 assert(!wrk->x_expand); 34 assert(!WebPRescalerInputDone(wrk)); 35 36 for (channel = 0; channel < x_stride; ++channel) { 37 const uint8_t* src1 = src + channel; 38 rescaler_t* frow = wrk->frow + channel; 39 int temp1, temp2, temp3; 40 int base, frac, sum; 41 int accum, accum1; 42 int loop_c = x_out_max - channel; 43 44 __asm__ volatile ( 45 "li %[temp1], 0x8000 \n\t" 46 "li %[temp2], 0x10000 \n\t" 47 "li %[sum], 0 \n\t" 48 "li %[accum], 0 \n\t" 49 "1: \n\t" 50 "addu %[accum], %[accum], %[x_add] \n\t" 51 "li %[base], 0 \n\t" 52 "blez %[accum], 3f \n\t" 53 "2: \n\t" 54 "lbu %[base], 0(%[src1]) \n\t" 55 "subu %[accum], %[accum], %[x_sub] \n\t" 56 "addu %[src1], %[src1], %[x_stride] \n\t" 57 "addu %[sum], %[sum], %[base] \n\t" 58 "bgtz %[accum], 2b \n\t" 59 "3: \n\t" 60 "negu %[accum1], %[accum] \n\t" 61 "mul %[frac], %[base], %[accum1] \n\t" 62 "mul %[temp3], %[sum], %[x_sub] \n\t" 63 "subu %[loop_c], %[loop_c], %[x_stride] \n\t" 64 "mult %[temp1], %[temp2] \n\t" 65 "maddu %[frac], %[fx_scale] \n\t" 66 "mfhi %[sum] \n\t" 67 "subu %[temp3], %[temp3], %[frac] \n\t" 68 "sw %[temp3], 0(%[frow]) \n\t" 69 "addu %[frow], %[frow], %[x_stride1] \n\t" 70 "bgtz %[loop_c], 1b \n\t" 71 : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3), 72 [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac), 73 [frow]"+r"(frow), [accum1]"=&r"(accum1), 74 [temp2]"=&r"(temp2), [temp1]"=&r"(temp1) 75 : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale), 76 [x_sub]"r"(x_sub), [x_add]"r"(x_add), 77 [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1) 78 : "memory", "hi", "lo" 79 ); 80 assert(accum == 0); 81 } 82 } 83 84 static void ImportRowExpand_MIPS32(WebPRescaler* const wrk, 85 const uint8_t* src) { 86 const int x_stride = wrk->num_channels; 87 const int x_out_max = wrk->dst_width * wrk->num_channels; 88 const int x_add = wrk->x_add; 89 const int x_sub = wrk->x_sub; 90 const int src_width = wrk->src_width; 91 const int x_stride1 = x_stride << 2; 92 int channel; 93 assert(wrk->x_expand); 94 assert(!WebPRescalerInputDone(wrk)); 95 96 for (channel = 0; channel < x_stride; ++channel) { 97 const uint8_t* src1 = src + channel; 98 rescaler_t* frow = wrk->frow + channel; 99 int temp1, temp2, temp3, temp4; 100 int frac; 101 int accum; 102 int x_out = channel; 103 104 __asm__ volatile ( 105 "addiu %[temp3], %[src_width], -1 \n\t" 106 "lbu %[temp2], 0(%[src1]) \n\t" 107 "addu %[src1], %[src1], %[x_stride] \n\t" 108 "bgtz %[temp3], 0f \n\t" 109 "addiu %[temp1], %[temp2], 0 \n\t" 110 "b 3f \n\t" 111 "0: \n\t" 112 "lbu %[temp1], 0(%[src1]) \n\t" 113 "3: \n\t" 114 "addiu %[accum], %[x_add], 0 \n\t" 115 "1: \n\t" 116 "subu %[temp3], %[temp2], %[temp1] \n\t" 117 "mul %[temp3], %[temp3], %[accum] \n\t" 118 "mul %[temp4], %[temp1], %[x_add] \n\t" 119 "addu %[temp3], %[temp4], %[temp3] \n\t" 120 "sw %[temp3], 0(%[frow]) \n\t" 121 "addu %[frow], %[frow], %[x_stride1] \n\t" 122 "addu %[x_out], %[x_out], %[x_stride] \n\t" 123 "subu %[temp3], %[x_out], %[x_out_max] \n\t" 124 "bgez %[temp3], 2f \n\t" 125 "subu %[accum], %[accum], %[x_sub] \n\t" 126 "bgez %[accum], 4f \n\t" 127 "addiu %[temp2], %[temp1], 0 \n\t" 128 "addu %[src1], %[src1], %[x_stride] \n\t" 129 "lbu %[temp1], 0(%[src1]) \n\t" 130 "addu %[accum], %[accum], %[x_add] \n\t" 131 "4: \n\t" 132 "b 1b \n\t" 133 "2: \n\t" 134 : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1), 135 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 136 [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow) 137 : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub), 138 [x_stride1]"r"(x_stride1), [src_width]"r"(src_width), 139 [x_out_max]"r"(x_out_max) 140 : "memory", "hi", "lo" 141 ); 142 assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); 143 } 144 } 145 146 //------------------------------------------------------------------------------ 147 // Row export 148 149 static void ExportRowExpand_MIPS32(WebPRescaler* const wrk) { 150 uint8_t* dst = wrk->dst; 151 rescaler_t* irow = wrk->irow; 152 const int x_out_max = wrk->dst_width * wrk->num_channels; 153 const rescaler_t* frow = wrk->frow; 154 int temp0, temp1, temp3, temp4, temp5, loop_end; 155 const int temp2 = (int)wrk->fy_scale; 156 const int temp6 = x_out_max << 2; 157 assert(!WebPRescalerOutputDone(wrk)); 158 assert(wrk->y_accum <= 0); 159 assert(wrk->y_expand); 160 assert(wrk->y_sub != 0); 161 if (wrk->y_accum == 0) { 162 __asm__ volatile ( 163 "li %[temp3], 0x10000 \n\t" 164 "li %[temp4], 0x8000 \n\t" 165 "addu %[loop_end], %[frow], %[temp6] \n\t" 166 "1: \n\t" 167 "lw %[temp0], 0(%[frow]) \n\t" 168 "addiu %[dst], %[dst], 1 \n\t" 169 "addiu %[frow], %[frow], 4 \n\t" 170 "mult %[temp3], %[temp4] \n\t" 171 "maddu %[temp0], %[temp2] \n\t" 172 "mfhi %[temp5] \n\t" 173 "sb %[temp5], -1(%[dst]) \n\t" 174 "bne %[frow], %[loop_end], 1b \n\t" 175 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 176 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 177 [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 178 : [temp2]"r"(temp2), [temp6]"r"(temp6) 179 : "memory", "hi", "lo" 180 ); 181 } else { 182 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); 183 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); 184 __asm__ volatile ( 185 "li %[temp3], 0x10000 \n\t" 186 "li %[temp4], 0x8000 \n\t" 187 "addu %[loop_end], %[frow], %[temp6] \n\t" 188 "1: \n\t" 189 "lw %[temp0], 0(%[frow]) \n\t" 190 "lw %[temp1], 0(%[irow]) \n\t" 191 "addiu %[dst], %[dst], 1 \n\t" 192 "mult %[temp3], %[temp4] \n\t" 193 "maddu %[A], %[temp0] \n\t" 194 "maddu %[B], %[temp1] \n\t" 195 "addiu %[frow], %[frow], 4 \n\t" 196 "addiu %[irow], %[irow], 4 \n\t" 197 "mfhi %[temp5] \n\t" 198 "mult %[temp3], %[temp4] \n\t" 199 "maddu %[temp5], %[temp2] \n\t" 200 "mfhi %[temp5] \n\t" 201 "sb %[temp5], -1(%[dst]) \n\t" 202 "bne %[frow], %[loop_end], 1b \n\t" 203 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 204 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 205 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 206 : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B) 207 : "memory", "hi", "lo" 208 ); 209 } 210 } 211 212 #if 0 // disabled for now. TODO(skal): make match the C-code 213 static void ExportRowShrink_MIPS32(WebPRescaler* const wrk) { 214 const int x_out_max = wrk->dst_width * wrk->num_channels; 215 uint8_t* dst = wrk->dst; 216 rescaler_t* irow = wrk->irow; 217 const rescaler_t* frow = wrk->frow; 218 const int yscale = wrk->fy_scale * (-wrk->y_accum); 219 int temp0, temp1, temp3, temp4, temp5, loop_end; 220 const int temp2 = (int)wrk->fxy_scale; 221 const int temp6 = x_out_max << 2; 222 223 assert(!WebPRescalerOutputDone(wrk)); 224 assert(wrk->y_accum <= 0); 225 assert(!wrk->y_expand); 226 assert(wrk->fxy_scale != 0); 227 if (yscale) { 228 __asm__ volatile ( 229 "li %[temp3], 0x10000 \n\t" 230 "li %[temp4], 0x8000 \n\t" 231 "addu %[loop_end], %[frow], %[temp6] \n\t" 232 "1: \n\t" 233 "lw %[temp0], 0(%[frow]) \n\t" 234 "mult %[temp3], %[temp4] \n\t" 235 "addiu %[frow], %[frow], 4 \n\t" 236 "maddu %[temp0], %[yscale] \n\t" 237 "mfhi %[temp1] \n\t" 238 "lw %[temp0], 0(%[irow]) \n\t" 239 "addiu %[dst], %[dst], 1 \n\t" 240 "addiu %[irow], %[irow], 4 \n\t" 241 "subu %[temp0], %[temp0], %[temp1] \n\t" 242 "mult %[temp3], %[temp4] \n\t" 243 "maddu %[temp0], %[temp2] \n\t" 244 "mfhi %[temp5] \n\t" 245 "sw %[temp1], -4(%[irow]) \n\t" 246 "sb %[temp5], -1(%[dst]) \n\t" 247 "bne %[frow], %[loop_end], 1b \n\t" 248 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 249 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 250 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 251 : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6) 252 : "memory", "hi", "lo" 253 ); 254 } else { 255 __asm__ volatile ( 256 "li %[temp3], 0x10000 \n\t" 257 "li %[temp4], 0x8000 \n\t" 258 "addu %[loop_end], %[irow], %[temp6] \n\t" 259 "1: \n\t" 260 "lw %[temp0], 0(%[irow]) \n\t" 261 "addiu %[dst], %[dst], 1 \n\t" 262 "addiu %[irow], %[irow], 4 \n\t" 263 "mult %[temp3], %[temp4] \n\t" 264 "maddu %[temp0], %[temp2] \n\t" 265 "mfhi %[temp5] \n\t" 266 "sw $zero, -4(%[irow]) \n\t" 267 "sb %[temp5], -1(%[dst]) \n\t" 268 "bne %[irow], %[loop_end], 1b \n\t" 269 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 270 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow), 271 [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 272 : [temp2]"r"(temp2), [temp6]"r"(temp6) 273 : "memory", "hi", "lo" 274 ); 275 } 276 } 277 #endif // 0 278 279 //------------------------------------------------------------------------------ 280 // Entry point 281 282 extern void WebPRescalerDspInitMIPS32(void); 283 284 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) { 285 WebPRescalerImportRowExpand = ImportRowExpand_MIPS32; 286 WebPRescalerImportRowShrink = ImportRowShrink_MIPS32; 287 WebPRescalerExportRowExpand = ExportRowExpand_MIPS32; 288 // WebPRescalerExportRowShrink = ExportRowShrink_MIPS32; 289 } 290 291 #else // !WEBP_USE_MIPS32 292 293 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32) 294 295 #endif // WEBP_USE_MIPS32 296