1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // MIPS version of rescaling functions 11 // 12 // Author(s): Djordje Pesut (djordje.pesut (at) imgtec.com) 13 14 #include "./dsp.h" 15 16 #if defined(WEBP_USE_MIPS32) 17 18 #include <assert.h> 19 #include "../utils/rescaler.h" 20 21 //------------------------------------------------------------------------------ 22 // Row import 23 24 static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) { 25 const int x_stride = wrk->num_channels; 26 const int x_out_max = wrk->dst_width * wrk->num_channels; 27 const int fx_scale = wrk->fx_scale; 28 const int x_add = wrk->x_add; 29 const int x_sub = wrk->x_sub; 30 const int x_stride1 = x_stride << 2; 31 int channel; 32 assert(!wrk->x_expand); 33 assert(!WebPRescalerInputDone(wrk)); 34 35 for (channel = 0; channel < x_stride; ++channel) { 36 const uint8_t* src1 = src + channel; 37 rescaler_t* frow = wrk->frow + channel; 38 int temp1, temp2, temp3; 39 int base, frac, sum; 40 int accum, accum1; 41 int loop_c = x_out_max - channel; 42 43 __asm__ volatile ( 44 "li %[temp1], 0x8000 \n\t" 45 "li %[temp2], 0x10000 \n\t" 46 "li %[sum], 0 \n\t" 47 "li %[accum], 0 \n\t" 48 "1: \n\t" 49 "addu %[accum], %[accum], %[x_add] \n\t" 50 "li %[base], 0 \n\t" 51 "blez %[accum], 3f \n\t" 52 "2: \n\t" 53 "lbu %[base], 0(%[src1]) \n\t" 54 "subu %[accum], %[accum], %[x_sub] \n\t" 55 "addu %[src1], %[src1], %[x_stride] \n\t" 56 "addu %[sum], %[sum], %[base] \n\t" 57 "bgtz %[accum], 2b \n\t" 58 "3: \n\t" 59 "negu %[accum1], %[accum] \n\t" 60 "mul %[frac], %[base], %[accum1] \n\t" 61 "mul %[temp3], %[sum], %[x_sub] \n\t" 62 "subu %[loop_c], %[loop_c], %[x_stride] \n\t" 63 "mult %[temp1], %[temp2] \n\t" 64 "maddu %[frac], %[fx_scale] \n\t" 65 "mfhi %[sum] \n\t" 66 "subu %[temp3], %[temp3], %[frac] \n\t" 67 "sw %[temp3], 0(%[frow]) \n\t" 68 "addu %[frow], %[frow], %[x_stride1] \n\t" 69 "bgtz %[loop_c], 1b \n\t" 70 : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3), 71 [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac), 72 [frow]"+r"(frow), [accum1]"=&r"(accum1), 73 [temp2]"=&r"(temp2), [temp1]"=&r"(temp1) 74 : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale), 75 [x_sub]"r"(x_sub), [x_add]"r"(x_add), 76 [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1) 77 : "memory", "hi", "lo" 78 ); 79 assert(accum == 0); 80 } 81 } 82 83 static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) { 84 const int x_stride = wrk->num_channels; 85 const int x_out_max = wrk->dst_width * wrk->num_channels; 86 const int x_add = wrk->x_add; 87 const int x_sub = wrk->x_sub; 88 const int src_width = wrk->src_width; 89 const int x_stride1 = x_stride << 2; 90 int channel; 91 assert(wrk->x_expand); 92 assert(!WebPRescalerInputDone(wrk)); 93 94 for (channel = 0; channel < x_stride; ++channel) { 95 const uint8_t* src1 = src + channel; 96 rescaler_t* frow = wrk->frow + channel; 97 int temp1, temp2, temp3, temp4; 98 int frac; 99 int accum; 100 int x_out = channel; 101 102 __asm__ volatile ( 103 "addiu %[temp3], %[src_width], -1 \n\t" 104 "lbu %[temp2], 0(%[src1]) \n\t" 105 "addu %[src1], %[src1], %[x_stride] \n\t" 106 "bgtz %[temp3], 0f \n\t" 107 "addiu %[temp1], %[temp2], 0 \n\t" 108 "b 3f \n\t" 109 "0: \n\t" 110 "lbu %[temp1], 0(%[src1]) \n\t" 111 "3: \n\t" 112 "addiu %[accum], %[x_add], 0 \n\t" 113 "1: \n\t" 114 "subu %[temp3], %[temp2], %[temp1] \n\t" 115 "mul %[temp3], %[temp3], %[accum] \n\t" 116 "mul %[temp4], %[temp1], %[x_add] \n\t" 117 "addu %[temp3], %[temp4], %[temp3] \n\t" 118 "sw %[temp3], 0(%[frow]) \n\t" 119 "addu %[frow], %[frow], %[x_stride1] \n\t" 120 "addu %[x_out], %[x_out], %[x_stride] \n\t" 121 "subu %[temp3], %[x_out], %[x_out_max] \n\t" 122 "bgez %[temp3], 2f \n\t" 123 "subu %[accum], %[accum], %[x_sub] \n\t" 124 "bgez %[accum], 4f \n\t" 125 "addiu %[temp2], %[temp1], 0 \n\t" 126 "addu %[src1], %[src1], %[x_stride] \n\t" 127 "lbu %[temp1], 0(%[src1]) \n\t" 128 "addu %[accum], %[accum], %[x_add] \n\t" 129 "4: \n\t" 130 "b 1b \n\t" 131 "2: \n\t" 132 : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1), 133 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 134 [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow) 135 : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub), 136 [x_stride1]"r"(x_stride1), [src_width]"r"(src_width), 137 [x_out_max]"r"(x_out_max) 138 : "memory", "hi", "lo" 139 ); 140 assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); 141 } 142 } 143 144 //------------------------------------------------------------------------------ 145 // Row export 146 147 static void ExportRowExpand(WebPRescaler* const wrk) { 148 uint8_t* dst = wrk->dst; 149 rescaler_t* irow = wrk->irow; 150 const int x_out_max = wrk->dst_width * wrk->num_channels; 151 const rescaler_t* frow = wrk->frow; 152 int temp0, temp1, temp3, temp4, temp5, loop_end; 153 const int temp2 = (int)wrk->fy_scale; 154 const int temp6 = x_out_max << 2; 155 assert(!WebPRescalerOutputDone(wrk)); 156 assert(wrk->y_accum <= 0); 157 assert(wrk->y_expand); 158 assert(wrk->y_sub != 0); 159 if (wrk->y_accum == 0) { 160 __asm__ volatile ( 161 "li %[temp3], 0x10000 \n\t" 162 "li %[temp4], 0x8000 \n\t" 163 "addu %[loop_end], %[frow], %[temp6] \n\t" 164 "1: \n\t" 165 "lw %[temp0], 0(%[frow]) \n\t" 166 "addiu %[dst], %[dst], 1 \n\t" 167 "addiu %[frow], %[frow], 4 \n\t" 168 "mult %[temp3], %[temp4] \n\t" 169 "maddu %[temp0], %[temp2] \n\t" 170 "mfhi %[temp5] \n\t" 171 "sb %[temp5], -1(%[dst]) \n\t" 172 "bne %[frow], %[loop_end], 1b \n\t" 173 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 174 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 175 [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 176 : [temp2]"r"(temp2), [temp6]"r"(temp6) 177 : "memory", "hi", "lo" 178 ); 179 } else { 180 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); 181 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); 182 __asm__ volatile ( 183 "li %[temp3], 0x10000 \n\t" 184 "li %[temp4], 0x8000 \n\t" 185 "addu %[loop_end], %[frow], %[temp6] \n\t" 186 "1: \n\t" 187 "lw %[temp0], 0(%[frow]) \n\t" 188 "lw %[temp1], 0(%[irow]) \n\t" 189 "addiu %[dst], %[dst], 1 \n\t" 190 "mult %[temp3], %[temp4] \n\t" 191 "maddu %[A], %[temp0] \n\t" 192 "maddu %[B], %[temp1] \n\t" 193 "addiu %[frow], %[frow], 4 \n\t" 194 "addiu %[irow], %[irow], 4 \n\t" 195 "mfhi %[temp5] \n\t" 196 "mult %[temp3], %[temp4] \n\t" 197 "maddu %[temp5], %[temp2] \n\t" 198 "mfhi %[temp5] \n\t" 199 "sb %[temp5], -1(%[dst]) \n\t" 200 "bne %[frow], %[loop_end], 1b \n\t" 201 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 202 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 203 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 204 : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B) 205 : "memory", "hi", "lo" 206 ); 207 } 208 } 209 210 static void ExportRowShrink(WebPRescaler* const wrk) { 211 const int x_out_max = wrk->dst_width * wrk->num_channels; 212 uint8_t* dst = wrk->dst; 213 rescaler_t* irow = wrk->irow; 214 const rescaler_t* frow = wrk->frow; 215 const int yscale = wrk->fy_scale * (-wrk->y_accum); 216 int temp0, temp1, temp3, temp4, temp5, loop_end; 217 const int temp2 = (int)wrk->fxy_scale; 218 const int temp6 = x_out_max << 2; 219 220 assert(!WebPRescalerOutputDone(wrk)); 221 assert(wrk->y_accum <= 0); 222 assert(!wrk->y_expand); 223 assert(wrk->fxy_scale != 0); 224 if (yscale) { 225 __asm__ volatile ( 226 "li %[temp3], 0x10000 \n\t" 227 "li %[temp4], 0x8000 \n\t" 228 "addu %[loop_end], %[frow], %[temp6] \n\t" 229 "1: \n\t" 230 "lw %[temp0], 0(%[frow]) \n\t" 231 "mult %[temp3], %[temp4] \n\t" 232 "addiu %[frow], %[frow], 4 \n\t" 233 "maddu %[temp0], %[yscale] \n\t" 234 "mfhi %[temp1] \n\t" 235 "lw %[temp0], 0(%[irow]) \n\t" 236 "addiu %[dst], %[dst], 1 \n\t" 237 "addiu %[irow], %[irow], 4 \n\t" 238 "subu %[temp0], %[temp0], %[temp1] \n\t" 239 "mult %[temp3], %[temp4] \n\t" 240 "maddu %[temp0], %[temp2] \n\t" 241 "mfhi %[temp5] \n\t" 242 "sw %[temp1], -4(%[irow]) \n\t" 243 "sb %[temp5], -1(%[dst]) \n\t" 244 "bne %[frow], %[loop_end], 1b \n\t" 245 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 246 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 247 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 248 : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6) 249 : "memory", "hi", "lo" 250 ); 251 } else { 252 __asm__ volatile ( 253 "li %[temp3], 0x10000 \n\t" 254 "li %[temp4], 0x8000 \n\t" 255 "addu %[loop_end], %[irow], %[temp6] \n\t" 256 "1: \n\t" 257 "lw %[temp0], 0(%[irow]) \n\t" 258 "addiu %[dst], %[dst], 1 \n\t" 259 "addiu %[irow], %[irow], 4 \n\t" 260 "mult %[temp3], %[temp4] \n\t" 261 "maddu %[temp0], %[temp2] \n\t" 262 "mfhi %[temp5] \n\t" 263 "sw $zero, -4(%[irow]) \n\t" 264 "sb %[temp5], -1(%[dst]) \n\t" 265 "bne %[irow], %[loop_end], 1b \n\t" 266 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 267 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow), 268 [dst]"+r"(dst), [loop_end]"=&r"(loop_end) 269 : [temp2]"r"(temp2), [temp6]"r"(temp6) 270 : "memory", "hi", "lo" 271 ); 272 } 273 } 274 275 //------------------------------------------------------------------------------ 276 // Entry point 277 278 extern void WebPRescalerDspInitMIPS32(void); 279 280 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) { 281 WebPRescalerImportRowExpand = ImportRowExpand; 282 WebPRescalerImportRowShrink = ImportRowShrink; 283 WebPRescalerExportRowExpand = ExportRowExpand; 284 WebPRescalerExportRowShrink = ExportRowShrink; 285 } 286 287 #else // !WEBP_USE_MIPS32 288 289 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32) 290 291 #endif // WEBP_USE_MIPS32 292