1 /* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkColorData.h" 9 #include "SkCpu.h" 10 #include "SkJumper.h" 11 #include "SkOnce.h" 12 #include "SkRasterPipeline.h" 13 #include "SkTemplates.h" 14 15 #if !defined(SK_JUMPER_USE_ASSEMBLY) 16 // We'll use __has_feature(memory_sanitizer) to detect MSAN. 17 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud. 18 #if !defined(__has_feature) 19 #define __has_feature(x) 0 20 #endif 21 22 #if 0 || __has_feature(memory_sanitizer) 23 #define SK_JUMPER_USE_ASSEMBLY 0 24 #else 25 #define SK_JUMPER_USE_ASSEMBLY 1 26 #endif 27 #endif 28 29 #define M(st) +1 30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M); 31 #undef M 32 33 #ifndef SK_JUMPER_DISABLE_8BIT 34 // Intentionally commented out; optional logging for local debugging. 35 #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64)) 36 #include <atomic> 37 38 #define M(st) #st, 39 static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) }; 40 #undef M 41 42 static std::atomic<int> gMissingStageCounters[kNumStages]; 43 44 static void log_missing(SkRasterPipeline::StockStage st) { 45 static SkOnce once; 46 once([] { atexit([] { 47 int total = 0; 48 for (int i = 0; i < kNumStages; i++) { 49 if (int count = gMissingStageCounters[i].load()) { 50 SkDebugf("%7d\t%s\n", count, kStageNames[i]); 51 total += count; 52 } 53 } 54 SkDebugf("%7d total\n", total); 55 }); }); 56 57 gMissingStageCounters[st]++; 58 } 59 #else 60 static void log_missing(SkRasterPipeline::StockStage) {} 61 #endif 62 #endif 63 64 // We can't express the real types of most stage functions portably, so we use a stand-in. 65 // We'll only ever call start_pipeline(), which then chains into the rest. 66 using StageFn = void(void); 67 using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**); 68 69 // Some platforms expect C "name" maps to asm "_name", others to "name". 70 #if defined(__APPLE__) 71 #define ASM(name, suffix) sk_##name##_##suffix 72 #else 73 #define ASM(name, suffix) _sk_##name##_##suffix 74 #endif 75 76 extern "C" { 77 78 #if !SK_JUMPER_USE_ASSEMBLY 79 // We'll just run baseline code. 80 81 #elif defined(__x86_64__) || defined(_M_X64) 82 StartPipelineFn ASM(start_pipeline, skx), 83 ASM(start_pipeline, hsw), 84 ASM(start_pipeline, avx), 85 ASM(start_pipeline, sse41), 86 ASM(start_pipeline, sse2), 87 ASM(start_pipeline, hsw_lowp), 88 ASM(start_pipeline,sse41_lowp), 89 ASM(start_pipeline, sse2_lowp); 90 91 StageFn ASM(just_return, skx), 92 ASM(just_return, hsw), 93 ASM(just_return, avx), 94 ASM(just_return, sse41), 95 ASM(just_return, sse2), 96 ASM(just_return, hsw_lowp), 97 ASM(just_return,sse41_lowp), 98 ASM(just_return, sse2_lowp); 99 100 #define M(st) StageFn ASM(st, skx), \ 101 ASM(st, hsw), \ 102 ASM(st, avx), \ 103 ASM(st,sse41), \ 104 ASM(st, sse2), \ 105 ASM(st, hsw_lowp), \ 106 ASM(st,sse41_lowp), \ 107 ASM(st, sse2_lowp); 108 SK_RASTER_PIPELINE_STAGES(M) 109 #undef M 110 111 #elif defined(__i386__) || defined(_M_IX86) 112 StartPipelineFn ASM(start_pipeline,sse2), 113 ASM(start_pipeline,sse2_lowp); 114 StageFn ASM(just_return,sse2), 115 ASM(just_return,sse2_lowp); 116 #define M(st) StageFn ASM(st,sse2), \ 117 ASM(st,sse2_lowp); 118 SK_RASTER_PIPELINE_STAGES(M) 119 #undef M 120 121 #endif 122 123 // Baseline code compiled as a normal part of Skia. 124 StartPipelineFn sk_start_pipeline; 125 StageFn sk_just_return; 126 #define M(st) StageFn sk_##st; 127 SK_RASTER_PIPELINE_STAGES(M) 128 #undef M 129 130 #if defined(JUMPER_HAS_NEON_LOWP) 131 StartPipelineFn sk_start_pipeline_lowp; 132 StageFn sk_just_return_lowp; 133 #define M(st) StageFn sk_##st##_lowp; 134 SK_RASTER_PIPELINE_STAGES(M) 135 #undef M 136 #endif 137 138 } 139 140 #if SK_JUMPER_USE_ASSEMBLY 141 #if defined(__x86_64__) || defined(_M_X64) 142 template <SkRasterPipeline::StockStage st> 143 static constexpr StageFn* hsw_lowp(); 144 145 template <SkRasterPipeline::StockStage st> 146 static constexpr StageFn* sse41_lowp(); 147 148 template <SkRasterPipeline::StockStage st> 149 static constexpr StageFn* sse2_lowp(); 150 151 #define LOWP(st) \ 152 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \ 153 return ASM(st,hsw_lowp); \ 154 } \ 155 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \ 156 return ASM(st,sse41_lowp); \ 157 } \ 158 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 159 return ASM(st,sse2_lowp); \ 160 } 161 #define NOPE(st) \ 162 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \ 163 return nullptr; \ 164 } \ 165 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \ 166 return nullptr; \ 167 } \ 168 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 169 return nullptr; \ 170 } 171 172 #elif defined(__i386__) || defined(_M_IX86) 173 template <SkRasterPipeline::StockStage st> 174 static constexpr StageFn* sse2_lowp(); 175 176 #define LOWP(st) \ 177 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 178 return ASM(st,sse2_lowp); \ 179 } 180 #define NOPE(st) \ 181 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 182 return nullptr; \ 183 } 184 185 #elif defined(JUMPER_HAS_NEON_LOWP) 186 template <SkRasterPipeline::StockStage st> 187 static constexpr StageFn* neon_lowp(); 188 189 #define LOWP(st) \ 190 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \ 191 return sk_##st##_lowp; \ 192 } 193 #define NOPE(st) \ 194 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \ 195 return nullptr; \ 196 } 197 198 #else 199 #define LOWP(st) 200 #define NOPE(st) 201 202 #endif 203 204 #define TODO(st) NOPE(st) // stages that should be implemented in lowp, but aren't. 205 206 NOPE(callback) 207 LOWP(move_src_dst) LOWP(move_dst_src) 208 NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst) 209 NOPE(unpremul) LOWP(premul) LOWP(premul_dst) 210 LOWP(force_opaque) LOWP(force_opaque_dst) 211 LOWP(set_rgb) LOWP(swap_rb) LOWP(invert) 212 NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb) 213 LOWP(black_color) LOWP(white_color) LOWP(uniform_color) 214 LOWP(seed_shader) NOPE(dither) 215 LOWP(load_a8) LOWP(load_a8_dst) LOWP(store_a8) LOWP(gather_a8) 216 LOWP(load_g8) LOWP(load_g8_dst) LOWP(gather_g8) 217 LOWP(load_565) LOWP(load_565_dst) LOWP(store_565) LOWP(gather_565) 218 LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444) 219 NOPE(load_f16) NOPE(load_f16_dst) NOPE(store_f16) NOPE(gather_f16) 220 NOPE(load_f32) NOPE(load_f32_dst) NOPE(store_f32) 221 LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888) 222 LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra) 223 NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102) 224 TODO(bilerp_clamp_8888) 225 TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be) 226 NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables) 227 NOPE(load_rgba) NOPE(store_rgba) 228 LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float) 229 LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float) 230 LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover) 231 LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover) 232 LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_) 233 NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference) 234 LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight) 235 NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity) 236 LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888) 237 LOWP(luminance_to_alpha) 238 LOWP(matrix_translate) LOWP(matrix_scale_translate) 239 LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3) 240 LOWP(matrix_perspective) 241 NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b) 242 NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst) 243 NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a) 244 NOPE(lab_to_xyz) 245 TODO(mirror_x) TODO(repeat_x) 246 TODO(mirror_y) TODO(repeat_y) 247 TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py) 248 TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x) 249 TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y) 250 TODO(save_xy) TODO(accumulate) 251 LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1) 252 LOWP(evenly_spaced_gradient) 253 LOWP(gradient) 254 LOWP(evenly_spaced_2_stop_gradient) 255 LOWP(xy_to_unit_angle) 256 LOWP(xy_to_radius) 257 TODO(negate_x) 258 TODO(xy_to_2pt_conical_strip) 259 TODO(xy_to_2pt_conical_focal_on_circle) 260 TODO(xy_to_2pt_conical_well_behaved) 261 TODO(xy_to_2pt_conical_greater) 262 TODO(xy_to_2pt_conical_smaller) 263 TODO(alter_2pt_conical_compensate_focal) 264 TODO(alter_2pt_conical_unswap) 265 TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask) 266 TODO(byte_tables) TODO(byte_tables_rgb) 267 NOPE(rgb_to_hsl) NOPE(hsl_to_rgb) 268 NOPE(clut_3D) NOPE(clut_4D) 269 NOPE(gauss_a_to_rgba) 270 271 #undef LOWP 272 #undef TODO 273 #undef NOPE 274 #endif 275 276 // Engines comprise everything we need to run SkRasterPipelines. 277 struct SkJumper_Engine { 278 StageFn* stages[kNumStages]; 279 StartPipelineFn* start_pipeline; 280 StageFn* just_return; 281 }; 282 283 // We'll default to this baseline engine, but try to choose a better one at runtime. 284 static const SkJumper_Engine kBaseline = { 285 #define M(stage) sk_##stage, 286 { SK_RASTER_PIPELINE_STAGES(M) }, 287 #undef M 288 sk_start_pipeline, 289 sk_just_return, 290 }; 291 static SkJumper_Engine gEngine = kBaseline; 292 static SkOnce gChooseEngineOnce; 293 294 static SkJumper_Engine choose_engine() { 295 #if !SK_JUMPER_USE_ASSEMBLY 296 // We'll just run baseline code. 297 298 #elif defined(__x86_64__) || defined(_M_X64) 299 #if !defined(_MSC_VER) // No _skx stages for Windows yet. 300 if (1 && SkCpu::Supports(SkCpu::SKX)) { 301 return { 302 #define M(stage) ASM(stage, skx), 303 { SK_RASTER_PIPELINE_STAGES(M) }, 304 M(start_pipeline) 305 M(just_return) 306 #undef M 307 }; 308 } 309 #endif 310 if (1 && SkCpu::Supports(SkCpu::HSW)) { 311 return { 312 #define M(stage) ASM(stage, hsw), 313 { SK_RASTER_PIPELINE_STAGES(M) }, 314 M(start_pipeline) 315 M(just_return) 316 #undef M 317 }; 318 } 319 if (1 && SkCpu::Supports(SkCpu::AVX)) { 320 return { 321 #define M(stage) ASM(stage, avx), 322 { SK_RASTER_PIPELINE_STAGES(M) }, 323 M(start_pipeline) 324 M(just_return) 325 #undef M 326 }; 327 } 328 if (1 && SkCpu::Supports(SkCpu::SSE41)) { 329 return { 330 #define M(stage) ASM(stage, sse41), 331 { SK_RASTER_PIPELINE_STAGES(M) }, 332 M(start_pipeline) 333 M(just_return) 334 #undef M 335 }; 336 } 337 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 338 return { 339 #define M(stage) ASM(stage, sse2), 340 { SK_RASTER_PIPELINE_STAGES(M) }, 341 M(start_pipeline) 342 M(just_return) 343 #undef M 344 }; 345 } 346 347 #elif defined(__i386__) || defined(_M_IX86) 348 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 349 return { 350 #define M(stage) ASM(stage, sse2), 351 { SK_RASTER_PIPELINE_STAGES(M) }, 352 M(start_pipeline) 353 M(just_return) 354 #undef M 355 }; 356 } 357 358 #endif 359 return kBaseline; 360 } 361 362 #ifndef SK_JUMPER_DISABLE_8BIT 363 static const SkJumper_Engine kNone = { 364 #define M(stage) nullptr, 365 { SK_RASTER_PIPELINE_STAGES(M) }, 366 #undef M 367 nullptr, 368 nullptr, 369 }; 370 static SkJumper_Engine gLowp = kNone; 371 static SkOnce gChooseLowpOnce; 372 373 static SkJumper_Engine choose_lowp() { 374 #if SK_JUMPER_USE_ASSEMBLY 375 #if defined(__x86_64__) || defined(_M_X64) 376 if (1 && SkCpu::Supports(SkCpu::HSW)) { 377 return { 378 #define M(st) hsw_lowp<SkRasterPipeline::st>(), 379 { SK_RASTER_PIPELINE_STAGES(M) }, 380 ASM(start_pipeline,hsw_lowp), 381 ASM(just_return ,hsw_lowp), 382 #undef M 383 }; 384 } 385 if (1 && SkCpu::Supports(SkCpu::SSE41)) { 386 return { 387 #define M(st) sse41_lowp<SkRasterPipeline::st>(), 388 { SK_RASTER_PIPELINE_STAGES(M) }, 389 ASM(start_pipeline,sse41_lowp), 390 ASM(just_return ,sse41_lowp), 391 #undef M 392 }; 393 } 394 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 395 return { 396 #define M(st) sse2_lowp<SkRasterPipeline::st>(), 397 { SK_RASTER_PIPELINE_STAGES(M) }, 398 ASM(start_pipeline,sse2_lowp), 399 ASM(just_return ,sse2_lowp), 400 #undef M 401 }; 402 } 403 #elif defined(__i386__) || defined(_M_IX86) 404 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 405 return { 406 #define M(st) sse2_lowp<SkRasterPipeline::st>(), 407 { SK_RASTER_PIPELINE_STAGES(M) }, 408 ASM(start_pipeline,sse2_lowp), 409 ASM(just_return ,sse2_lowp), 410 #undef M 411 }; 412 } 413 414 #elif defined(JUMPER_HAS_NEON_LOWP) 415 return { 416 #define M(st) neon_lowp<SkRasterPipeline::st>(), 417 { SK_RASTER_PIPELINE_STAGES(M) }, 418 sk_start_pipeline_lowp, 419 sk_just_return_lowp, 420 #undef M 421 }; 422 #endif 423 #endif 424 return kNone; 425 } 426 #endif 427 428 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const { 429 #ifndef SK_JUMPER_DISABLE_8BIT 430 gChooseLowpOnce([]{ gLowp = choose_lowp(); }); 431 432 // First try to build a lowp pipeline. If that fails, fall back to normal float gEngine. 433 void** reset_point = ip; 434 *--ip = (void*)gLowp.just_return; 435 for (const StageList* st = fStages; st; st = st->prev) { 436 if (st->stage == SkRasterPipeline::clamp_0 || 437 st->stage == SkRasterPipeline::clamp_1) { 438 continue; // No-ops in lowp. 439 } 440 if (StageFn* fn = gLowp.stages[st->stage]) { 441 if (st->ctx) { 442 *--ip = st->ctx; 443 } 444 *--ip = (void*)fn; 445 } else { 446 log_missing(st->stage); 447 ip = reset_point; 448 break; 449 } 450 } 451 if (ip != reset_point) { 452 return gLowp; 453 } 454 #endif 455 456 gChooseEngineOnce([]{ gEngine = choose_engine(); }); 457 // We're building the pipeline backwards, so we start with the final stage just_return. 458 *--ip = (void*)gEngine.just_return; 459 460 // Still going backwards, each stage's context pointer then its StageFn. 461 for (const StageList* st = fStages; st; st = st->prev) { 462 if (st->ctx) { 463 *--ip = st->ctx; 464 } 465 *--ip = (void*)gEngine.stages[st->stage]; 466 } 467 return gEngine; 468 } 469 470 void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const { 471 if (this->empty()) { 472 return; 473 } 474 475 // Best to not use fAlloc here... we can't bound how often run() will be called. 476 SkAutoSTMalloc<64, void*> program(fSlotsNeeded); 477 478 const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded); 479 engine.start_pipeline(x,y,x+w,y+h, program.get()); 480 } 481 482 std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const { 483 if (this->empty()) { 484 return [](size_t, size_t, size_t, size_t) {}; 485 } 486 487 void** program = fAlloc->makeArray<void*>(fSlotsNeeded); 488 const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded); 489 490 auto start_pipeline = engine.start_pipeline; 491 return [=](size_t x, size_t y, size_t w, size_t h) { 492 start_pipeline(x,y,x+w,y+h, program); 493 }; 494 } 495