1 /* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkColorData.h" 9 #include "SkCpu.h" 10 #include "SkJumper.h" 11 #include "SkOnce.h" 12 #include "SkRasterPipeline.h" 13 #include "SkTemplates.h" 14 15 #if !defined(SK_JUMPER_USE_ASSEMBLY) 16 // We'll use __has_feature(memory_sanitizer) to detect MSAN. 17 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud. 18 #if !defined(__has_feature) 19 #define __has_feature(x) 0 20 #endif 21 22 #if 0 || __has_feature(memory_sanitizer) 23 #define SK_JUMPER_USE_ASSEMBLY 0 24 #else 25 #define SK_JUMPER_USE_ASSEMBLY 1 26 #endif 27 #endif 28 29 #define M(st) +1 30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M); 31 #undef M 32 33 #ifndef SK_JUMPER_DISABLE_8BIT 34 // Intentionally commented out; optional logging for local debugging. 35 #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64)) 36 #include <atomic> 37 38 #define M(st) #st, 39 static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) }; 40 #undef M 41 42 static std::atomic<int> gMissingStageCounters[kNumStages]; 43 44 static void log_missing(SkRasterPipeline::StockStage st) { 45 static SkOnce once; 46 once([] { atexit([] { 47 int total = 0; 48 for (int i = 0; i < kNumStages; i++) { 49 if (int count = gMissingStageCounters[i].load()) { 50 SkDebugf("%7d\t%s\n", count, kStageNames[i]); 51 total += count; 52 } 53 } 54 SkDebugf("%7d total\n", total); 55 }); }); 56 57 gMissingStageCounters[st]++; 58 } 59 #else 60 static void log_missing(SkRasterPipeline::StockStage) {} 61 #endif 62 #endif 63 64 // We can't express the real types of most stage functions portably, so we use a stand-in. 65 // We'll only ever call start_pipeline(), which then chains into the rest. 66 using StageFn = void(void); 67 using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**); 68 69 // Some platforms expect C "name" maps to asm "_name", others to "name". 70 #if defined(__APPLE__) 71 #define ASM(name, suffix) sk_##name##_##suffix 72 #else 73 #define ASM(name, suffix) _sk_##name##_##suffix 74 #endif 75 76 extern "C" { 77 78 #if !SK_JUMPER_USE_ASSEMBLY 79 // We'll just run baseline code. 80 81 #elif defined(__x86_64__) || defined(_M_X64) 82 StartPipelineFn ASM(start_pipeline, skx), 83 ASM(start_pipeline, hsw), 84 ASM(start_pipeline, avx), 85 ASM(start_pipeline, sse41), 86 ASM(start_pipeline, sse2), 87 ASM(start_pipeline, hsw_lowp), 88 ASM(start_pipeline,sse41_lowp), 89 ASM(start_pipeline, sse2_lowp); 90 91 StageFn ASM(just_return, skx), 92 ASM(just_return, hsw), 93 ASM(just_return, avx), 94 ASM(just_return, sse41), 95 ASM(just_return, sse2), 96 ASM(just_return, hsw_lowp), 97 ASM(just_return,sse41_lowp), 98 ASM(just_return, sse2_lowp); 99 100 #define M(st) StageFn ASM(st, skx), \ 101 ASM(st, hsw), \ 102 ASM(st, avx), \ 103 ASM(st,sse41), \ 104 ASM(st, sse2), \ 105 ASM(st, hsw_lowp), \ 106 ASM(st,sse41_lowp), \ 107 ASM(st, sse2_lowp); 108 SK_RASTER_PIPELINE_STAGES(M) 109 #undef M 110 111 #elif defined(__i386__) || defined(_M_IX86) 112 StartPipelineFn ASM(start_pipeline,sse2), 113 ASM(start_pipeline,sse2_lowp); 114 StageFn ASM(just_return,sse2), 115 ASM(just_return,sse2_lowp); 116 #define M(st) StageFn ASM(st,sse2), \ 117 ASM(st,sse2_lowp); 118 SK_RASTER_PIPELINE_STAGES(M) 119 #undef M 120 121 #endif 122 123 // Baseline code compiled as a normal part of Skia. 124 StartPipelineFn sk_start_pipeline; 125 StageFn sk_just_return; 126 #define M(st) StageFn sk_##st; 127 SK_RASTER_PIPELINE_STAGES(M) 128 #undef M 129 130 #if defined(JUMPER_HAS_NEON_LOWP) 131 StartPipelineFn sk_start_pipeline_lowp; 132 StageFn sk_just_return_lowp; 133 #define M(st) StageFn sk_##st##_lowp; 134 SK_RASTER_PIPELINE_STAGES(M) 135 #undef M 136 #endif 137 138 } 139 140 #if SK_JUMPER_USE_ASSEMBLY 141 #if defined(__x86_64__) || defined(_M_X64) 142 template <SkRasterPipeline::StockStage st> 143 static constexpr StageFn* hsw_lowp(); 144 145 template <SkRasterPipeline::StockStage st> 146 static constexpr StageFn* sse41_lowp(); 147 148 template <SkRasterPipeline::StockStage st> 149 static constexpr StageFn* sse2_lowp(); 150 151 #define LOWP(st) \ 152 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \ 153 return ASM(st,hsw_lowp); \ 154 } \ 155 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \ 156 return ASM(st,sse41_lowp); \ 157 } \ 158 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 159 return ASM(st,sse2_lowp); \ 160 } 161 #define NOPE(st) \ 162 template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \ 163 return nullptr; \ 164 } \ 165 template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \ 166 return nullptr; \ 167 } \ 168 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 169 return nullptr; \ 170 } 171 172 #elif defined(__i386__) || defined(_M_IX86) 173 template <SkRasterPipeline::StockStage st> 174 static constexpr StageFn* sse2_lowp(); 175 176 #define LOWP(st) \ 177 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 178 return ASM(st,sse2_lowp); \ 179 } 180 #define NOPE(st) \ 181 template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \ 182 return nullptr; \ 183 } 184 185 #elif defined(JUMPER_HAS_NEON_LOWP) 186 template <SkRasterPipeline::StockStage st> 187 static constexpr StageFn* neon_lowp(); 188 189 #define LOWP(st) \ 190 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \ 191 return sk_##st##_lowp; \ 192 } 193 #define NOPE(st) \ 194 template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \ 195 return nullptr; \ 196 } 197 198 #else 199 #define LOWP(st) 200 #define NOPE(st) 201 202 #endif 203 204 #define TODO(st) NOPE(st) // stages that should be implemented in lowp, but aren't. 205 206 NOPE(callback) 207 LOWP(move_src_dst) LOWP(move_dst_src) 208 NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst) 209 NOPE(unpremul) LOWP(premul) LOWP(premul_dst) 210 LOWP(force_opaque) LOWP(force_opaque_dst) 211 LOWP(set_rgb) LOWP(swap_rb) LOWP(invert) 212 NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb) 213 LOWP(black_color) LOWP(white_color) LOWP(uniform_color) 214 LOWP(seed_shader) NOPE(dither) 215 LOWP(load_a8) LOWP(load_a8_dst) LOWP(store_a8) LOWP(gather_a8) 216 LOWP(load_g8) LOWP(load_g8_dst) LOWP(gather_g8) 217 LOWP(load_565) LOWP(load_565_dst) LOWP(store_565) LOWP(gather_565) 218 LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444) 219 NOPE(load_f16) NOPE(load_f16_dst) NOPE(store_f16) NOPE(gather_f16) 220 NOPE(load_f32) NOPE(load_f32_dst) NOPE(store_f32) 221 LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888) 222 LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra) 223 NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102) 224 TODO(bilerp_clamp_8888) 225 TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be) 226 NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables) 227 NOPE(load_rgba) NOPE(store_rgba) 228 LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float) 229 LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float) 230 LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover) 231 LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover) 232 LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_) 233 NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference) 234 LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight) 235 NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity) 236 LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888) 237 LOWP(luminance_to_alpha) 238 LOWP(matrix_translate) LOWP(matrix_scale_translate) 239 LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3) 240 LOWP(matrix_perspective) 241 NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b) 242 NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst) 243 NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a) 244 NOPE(lab_to_xyz) 245 TODO(mirror_x) TODO(repeat_x) 246 TODO(mirror_y) TODO(repeat_y) 247 LOWP(decal_x) LOWP(decal_y) LOWP(decal_x_and_y) 248 LOWP(check_decal_mask) 249 TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py) 250 TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x) 251 TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y) 252 TODO(save_xy) TODO(accumulate) 253 LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1) 254 LOWP(evenly_spaced_gradient) 255 LOWP(gradient) 256 LOWP(evenly_spaced_2_stop_gradient) 257 LOWP(xy_to_unit_angle) 258 LOWP(xy_to_radius) 259 TODO(negate_x) 260 TODO(xy_to_2pt_conical_strip) 261 TODO(xy_to_2pt_conical_focal_on_circle) 262 TODO(xy_to_2pt_conical_well_behaved) 263 TODO(xy_to_2pt_conical_greater) 264 TODO(xy_to_2pt_conical_smaller) 265 TODO(alter_2pt_conical_compensate_focal) 266 TODO(alter_2pt_conical_unswap) 267 TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask) 268 TODO(byte_tables) TODO(byte_tables_rgb) 269 NOPE(rgb_to_hsl) NOPE(hsl_to_rgb) 270 NOPE(clut_3D) NOPE(clut_4D) 271 NOPE(gauss_a_to_rgba) 272 273 #undef LOWP 274 #undef TODO 275 #undef NOPE 276 #endif 277 278 // Engines comprise everything we need to run SkRasterPipelines. 279 struct SkJumper_Engine { 280 StageFn* stages[kNumStages]; 281 StartPipelineFn* start_pipeline; 282 StageFn* just_return; 283 }; 284 285 // We'll default to this baseline engine, but try to choose a better one at runtime. 286 static const SkJumper_Engine kBaseline = { 287 #define M(stage) sk_##stage, 288 { SK_RASTER_PIPELINE_STAGES(M) }, 289 #undef M 290 sk_start_pipeline, 291 sk_just_return, 292 }; 293 static SkJumper_Engine gEngine = kBaseline; 294 static SkOnce gChooseEngineOnce; 295 296 static SkJumper_Engine choose_engine() { 297 #if !SK_JUMPER_USE_ASSEMBLY 298 // We'll just run baseline code. 299 300 #elif defined(__x86_64__) || defined(_M_X64) 301 #if !defined(_MSC_VER) // No _skx stages for Windows yet. 302 if (1 && SkCpu::Supports(SkCpu::SKX)) { 303 return { 304 #define M(stage) ASM(stage, skx), 305 { SK_RASTER_PIPELINE_STAGES(M) }, 306 M(start_pipeline) 307 M(just_return) 308 #undef M 309 }; 310 } 311 #endif 312 if (1 && SkCpu::Supports(SkCpu::HSW)) { 313 return { 314 #define M(stage) ASM(stage, hsw), 315 { SK_RASTER_PIPELINE_STAGES(M) }, 316 M(start_pipeline) 317 M(just_return) 318 #undef M 319 }; 320 } 321 if (1 && SkCpu::Supports(SkCpu::AVX)) { 322 return { 323 #define M(stage) ASM(stage, avx), 324 { SK_RASTER_PIPELINE_STAGES(M) }, 325 M(start_pipeline) 326 M(just_return) 327 #undef M 328 }; 329 } 330 if (1 && SkCpu::Supports(SkCpu::SSE41)) { 331 return { 332 #define M(stage) ASM(stage, sse41), 333 { SK_RASTER_PIPELINE_STAGES(M) }, 334 M(start_pipeline) 335 M(just_return) 336 #undef M 337 }; 338 } 339 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 340 return { 341 #define M(stage) ASM(stage, sse2), 342 { SK_RASTER_PIPELINE_STAGES(M) }, 343 M(start_pipeline) 344 M(just_return) 345 #undef M 346 }; 347 } 348 349 #elif defined(__i386__) || defined(_M_IX86) 350 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 351 return { 352 #define M(stage) ASM(stage, sse2), 353 { SK_RASTER_PIPELINE_STAGES(M) }, 354 M(start_pipeline) 355 M(just_return) 356 #undef M 357 }; 358 } 359 360 #endif 361 return kBaseline; 362 } 363 364 #ifndef SK_JUMPER_DISABLE_8BIT 365 static const SkJumper_Engine kNone = { 366 #define M(stage) nullptr, 367 { SK_RASTER_PIPELINE_STAGES(M) }, 368 #undef M 369 nullptr, 370 nullptr, 371 }; 372 static SkJumper_Engine gLowp = kNone; 373 static SkOnce gChooseLowpOnce; 374 375 static SkJumper_Engine choose_lowp() { 376 #if SK_JUMPER_USE_ASSEMBLY 377 #if defined(__x86_64__) || defined(_M_X64) 378 if (1 && SkCpu::Supports(SkCpu::HSW)) { 379 return { 380 #define M(st) hsw_lowp<SkRasterPipeline::st>(), 381 { SK_RASTER_PIPELINE_STAGES(M) }, 382 ASM(start_pipeline,hsw_lowp), 383 ASM(just_return ,hsw_lowp), 384 #undef M 385 }; 386 } 387 if (1 && SkCpu::Supports(SkCpu::SSE41)) { 388 return { 389 #define M(st) sse41_lowp<SkRasterPipeline::st>(), 390 { SK_RASTER_PIPELINE_STAGES(M) }, 391 ASM(start_pipeline,sse41_lowp), 392 ASM(just_return ,sse41_lowp), 393 #undef M 394 }; 395 } 396 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 397 return { 398 #define M(st) sse2_lowp<SkRasterPipeline::st>(), 399 { SK_RASTER_PIPELINE_STAGES(M) }, 400 ASM(start_pipeline,sse2_lowp), 401 ASM(just_return ,sse2_lowp), 402 #undef M 403 }; 404 } 405 #elif defined(__i386__) || defined(_M_IX86) 406 if (1 && SkCpu::Supports(SkCpu::SSE2)) { 407 return { 408 #define M(st) sse2_lowp<SkRasterPipeline::st>(), 409 { SK_RASTER_PIPELINE_STAGES(M) }, 410 ASM(start_pipeline,sse2_lowp), 411 ASM(just_return ,sse2_lowp), 412 #undef M 413 }; 414 } 415 416 #elif defined(JUMPER_HAS_NEON_LOWP) 417 return { 418 #define M(st) neon_lowp<SkRasterPipeline::st>(), 419 { SK_RASTER_PIPELINE_STAGES(M) }, 420 sk_start_pipeline_lowp, 421 sk_just_return_lowp, 422 #undef M 423 }; 424 #endif 425 #endif 426 return kNone; 427 } 428 #endif 429 430 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const { 431 #ifndef SK_JUMPER_DISABLE_8BIT 432 gChooseLowpOnce([]{ gLowp = choose_lowp(); }); 433 434 // First try to build a lowp pipeline. If that fails, fall back to normal float gEngine. 435 void** reset_point = ip; 436 *--ip = (void*)gLowp.just_return; 437 for (const StageList* st = fStages; st; st = st->prev) { 438 if (st->stage == SkRasterPipeline::clamp_0 || 439 st->stage == SkRasterPipeline::clamp_1) { 440 continue; // No-ops in lowp. 441 } 442 if (StageFn* fn = gLowp.stages[st->stage]) { 443 if (st->ctx) { 444 *--ip = st->ctx; 445 } 446 *--ip = (void*)fn; 447 } else { 448 log_missing(st->stage); 449 ip = reset_point; 450 break; 451 } 452 } 453 if (ip != reset_point) { 454 return gLowp; 455 } 456 #endif 457 458 gChooseEngineOnce([]{ gEngine = choose_engine(); }); 459 // We're building the pipeline backwards, so we start with the final stage just_return. 460 *--ip = (void*)gEngine.just_return; 461 462 // Still going backwards, each stage's context pointer then its StageFn. 463 for (const StageList* st = fStages; st; st = st->prev) { 464 if (st->ctx) { 465 *--ip = st->ctx; 466 } 467 *--ip = (void*)gEngine.stages[st->stage]; 468 } 469 return gEngine; 470 } 471 472 void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const { 473 if (this->empty()) { 474 return; 475 } 476 477 // Best to not use fAlloc here... we can't bound how often run() will be called. 478 SkAutoSTMalloc<64, void*> program(fSlotsNeeded); 479 480 const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded); 481 engine.start_pipeline(x,y,x+w,y+h, program.get()); 482 } 483 484 std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const { 485 if (this->empty()) { 486 return [](size_t, size_t, size_t, size_t) {}; 487 } 488 489 void** program = fAlloc->makeArray<void*>(fSlotsNeeded); 490 const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded); 491 492 auto start_pipeline = engine.start_pipeline; 493 return [=](size_t x, size_t y, size_t w, size_t h) { 494 start_pipeline(x,y,x+w,y+h, program); 495 }; 496 } 497