Home | History | Annotate | Download | only in jumper
      1 /*
      2  * Copyright 2017 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkColorData.h"
      9 #include "SkCpu.h"
     10 #include "SkJumper.h"
     11 #include "SkOnce.h"
     12 #include "SkRasterPipeline.h"
     13 #include "SkTemplates.h"
     14 
     15 #if !defined(SK_JUMPER_USE_ASSEMBLY)
     16     // We'll use __has_feature(memory_sanitizer) to detect MSAN.
     17     // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
     18     #if !defined(__has_feature)
     19         #define __has_feature(x) 0
     20     #endif
     21 
     22     #if 0 || __has_feature(memory_sanitizer)
     23         #define SK_JUMPER_USE_ASSEMBLY 0
     24     #else
     25         #define SK_JUMPER_USE_ASSEMBLY 1
     26     #endif
     27 #endif
     28 
     29 #define M(st) +1
     30 static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
     31 #undef M
     32 
     33 #ifndef SK_JUMPER_DISABLE_8BIT
     34     // Intentionally commented out; optional logging for local debugging.
     35     #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64))
     36         #include <atomic>
     37 
     38         #define M(st) #st,
     39         static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
     40         #undef M
     41 
     42         static std::atomic<int> gMissingStageCounters[kNumStages];
     43 
     44         static void log_missing(SkRasterPipeline::StockStage st) {
     45             static SkOnce once;
     46             once([] { atexit([] {
     47                 int total = 0;
     48                 for (int i = 0; i < kNumStages; i++) {
     49                     if (int count = gMissingStageCounters[i].load()) {
     50                         SkDebugf("%7d\t%s\n", count, kStageNames[i]);
     51                         total += count;
     52                     }
     53                 }
     54                 SkDebugf("%7d total\n", total);
     55             }); });
     56 
     57             gMissingStageCounters[st]++;
     58         }
     59     #else
     60         static void log_missing(SkRasterPipeline::StockStage) {}
     61     #endif
     62 #endif
     63 
     64 // We can't express the real types of most stage functions portably, so we use a stand-in.
     65 // We'll only ever call start_pipeline(), which then chains into the rest.
     66 using StageFn         = void(void);
     67 using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**);
     68 
     69 // Some platforms expect C "name" maps to asm "_name", others to "name".
     70 #if defined(__APPLE__)
     71     #define ASM(name, suffix)  sk_##name##_##suffix
     72 #else
     73     #define ASM(name, suffix) _sk_##name##_##suffix
     74 #endif
     75 
     76 extern "C" {
     77 
     78 #if !SK_JUMPER_USE_ASSEMBLY
     79     // We'll just run baseline code.
     80 
     81 #elif defined(__x86_64__) || defined(_M_X64)
     82     StartPipelineFn ASM(start_pipeline,       skx),
     83                     ASM(start_pipeline,       hsw),
     84                     ASM(start_pipeline,       avx),
     85                     ASM(start_pipeline,     sse41),
     86                     ASM(start_pipeline,      sse2),
     87                     ASM(start_pipeline,  hsw_lowp),
     88                     ASM(start_pipeline,sse41_lowp),
     89                     ASM(start_pipeline, sse2_lowp);
     90 
     91     StageFn ASM(just_return,       skx),
     92             ASM(just_return,       hsw),
     93             ASM(just_return,       avx),
     94             ASM(just_return,     sse41),
     95             ASM(just_return,      sse2),
     96             ASM(just_return,  hsw_lowp),
     97             ASM(just_return,sse41_lowp),
     98             ASM(just_return, sse2_lowp);
     99 
    100     #define M(st) StageFn ASM(st,  skx),      \
    101                           ASM(st,  hsw),      \
    102                           ASM(st,  avx),      \
    103                           ASM(st,sse41),      \
    104                           ASM(st, sse2),      \
    105                           ASM(st,  hsw_lowp), \
    106                           ASM(st,sse41_lowp), \
    107                           ASM(st, sse2_lowp);
    108         SK_RASTER_PIPELINE_STAGES(M)
    109     #undef M
    110 
    111 #elif defined(__i386__) || defined(_M_IX86)
    112     StartPipelineFn ASM(start_pipeline,sse2),
    113                     ASM(start_pipeline,sse2_lowp);
    114     StageFn ASM(just_return,sse2),
    115             ASM(just_return,sse2_lowp);
    116     #define M(st) StageFn ASM(st,sse2),      \
    117                           ASM(st,sse2_lowp);
    118         SK_RASTER_PIPELINE_STAGES(M)
    119     #undef M
    120 
    121 #endif
    122 
    123     // Baseline code compiled as a normal part of Skia.
    124     StartPipelineFn sk_start_pipeline;
    125     StageFn sk_just_return;
    126     #define M(st) StageFn sk_##st;
    127         SK_RASTER_PIPELINE_STAGES(M)
    128     #undef M
    129 
    130 #if defined(JUMPER_HAS_NEON_LOWP)
    131     StartPipelineFn sk_start_pipeline_lowp;
    132     StageFn sk_just_return_lowp;
    133     #define M(st) StageFn sk_##st##_lowp;
    134         SK_RASTER_PIPELINE_STAGES(M)
    135     #undef M
    136 #endif
    137 
    138 }
    139 
    140 #if SK_JUMPER_USE_ASSEMBLY
    141     #if defined(__x86_64__) || defined(_M_X64)
    142         template <SkRasterPipeline::StockStage st>
    143         static constexpr StageFn* hsw_lowp();
    144 
    145         template <SkRasterPipeline::StockStage st>
    146         static constexpr StageFn* sse41_lowp();
    147 
    148         template <SkRasterPipeline::StockStage st>
    149         static constexpr StageFn* sse2_lowp();
    150 
    151         #define LOWP(st) \
    152             template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
    153                 return ASM(st,hsw_lowp);                                        \
    154             }                                                                   \
    155             template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
    156                 return ASM(st,sse41_lowp);                                      \
    157             }                                                                   \
    158             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
    159                 return ASM(st,sse2_lowp);                                       \
    160             }
    161         #define NOPE(st) \
    162             template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
    163                 return nullptr;                                                 \
    164             }                                                                   \
    165             template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
    166                 return nullptr;                                                 \
    167             }                                                                   \
    168             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
    169                 return nullptr;                                                 \
    170             }
    171 
    172     #elif defined(__i386__) || defined(_M_IX86)
    173         template <SkRasterPipeline::StockStage st>
    174         static constexpr StageFn* sse2_lowp();
    175 
    176         #define LOWP(st) \
    177             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
    178                 return ASM(st,sse2_lowp);                                       \
    179             }
    180         #define NOPE(st) \
    181             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
    182                 return nullptr;                                                 \
    183             }
    184 
    185     #elif defined(JUMPER_HAS_NEON_LOWP)
    186         template <SkRasterPipeline::StockStage st>
    187         static constexpr StageFn* neon_lowp();
    188 
    189         #define LOWP(st)                                                         \
    190             template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
    191                 return sk_##st##_lowp;                                           \
    192             }
    193         #define NOPE(st)                                                         \
    194             template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
    195                 return nullptr;                                                  \
    196             }
    197 
    198     #else
    199         #define LOWP(st)
    200         #define NOPE(st)
    201 
    202     #endif
    203 
    204     #define TODO(st) NOPE(st)  // stages that should be implemented in lowp, but aren't.
    205 
    206     NOPE(callback)
    207     LOWP(move_src_dst) LOWP(move_dst_src)
    208     NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst)
    209     NOPE(unpremul) LOWP(premul) LOWP(premul_dst)
    210     LOWP(force_opaque) LOWP(force_opaque_dst)
    211     LOWP(set_rgb) LOWP(swap_rb) LOWP(invert)
    212     NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb)
    213     LOWP(black_color) LOWP(white_color) LOWP(uniform_color)
    214     LOWP(seed_shader) NOPE(dither)
    215     LOWP(load_a8)   LOWP(load_a8_dst)   LOWP(store_a8)   LOWP(gather_a8)
    216     LOWP(load_g8)   LOWP(load_g8_dst)                    LOWP(gather_g8)
    217     LOWP(load_565)  LOWP(load_565_dst)  LOWP(store_565)  LOWP(gather_565)
    218     LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444)
    219     NOPE(load_f16)  NOPE(load_f16_dst)  NOPE(store_f16)  NOPE(gather_f16)
    220     NOPE(load_f32)  NOPE(load_f32_dst)  NOPE(store_f32)
    221     LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
    222     LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
    223     NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102)
    224     TODO(bilerp_clamp_8888)
    225     TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
    226     NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
    227     NOPE(load_rgba) NOPE(store_rgba)
    228     LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float)
    229     LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float)
    230     LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover)
    231     LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover)
    232     LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_)
    233     NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference)
    234     LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight)
    235     NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity)
    236     LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888)
    237     LOWP(luminance_to_alpha)
    238     LOWP(matrix_translate) LOWP(matrix_scale_translate)
    239     LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3)
    240     LOWP(matrix_perspective)
    241     NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b)
    242     NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst)
    243     NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a)
    244     NOPE(lab_to_xyz)
    245                     TODO(mirror_x)   TODO(repeat_x)
    246                     TODO(mirror_y)   TODO(repeat_y)
    247     TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py)
    248     TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x)
    249     TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y)
    250     TODO(save_xy) TODO(accumulate)
    251     LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1)
    252     LOWP(evenly_spaced_gradient)
    253     LOWP(gradient)
    254     LOWP(evenly_spaced_2_stop_gradient)
    255     LOWP(xy_to_unit_angle)
    256     LOWP(xy_to_radius)
    257     TODO(negate_x)
    258     TODO(xy_to_2pt_conical_strip)
    259     TODO(xy_to_2pt_conical_focal_on_circle)
    260     TODO(xy_to_2pt_conical_well_behaved)
    261     TODO(xy_to_2pt_conical_greater)
    262     TODO(xy_to_2pt_conical_smaller)
    263     TODO(alter_2pt_conical_compensate_focal)
    264     TODO(alter_2pt_conical_unswap)
    265     TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask)
    266     TODO(byte_tables) TODO(byte_tables_rgb)
    267     NOPE(rgb_to_hsl) NOPE(hsl_to_rgb)
    268     NOPE(clut_3D) NOPE(clut_4D)
    269     NOPE(gauss_a_to_rgba)
    270 
    271     #undef LOWP
    272     #undef TODO
    273     #undef NOPE
    274 #endif
    275 
    276 // Engines comprise everything we need to run SkRasterPipelines.
    277 struct SkJumper_Engine {
    278     StageFn*         stages[kNumStages];
    279     StartPipelineFn* start_pipeline;
    280     StageFn*         just_return;
    281 };
    282 
    283 // We'll default to this baseline engine, but try to choose a better one at runtime.
    284 static const SkJumper_Engine kBaseline = {
    285 #define M(stage) sk_##stage,
    286     { SK_RASTER_PIPELINE_STAGES(M) },
    287 #undef M
    288     sk_start_pipeline,
    289     sk_just_return,
    290 };
    291 static SkJumper_Engine gEngine = kBaseline;
    292 static SkOnce gChooseEngineOnce;
    293 
    294 static SkJumper_Engine choose_engine() {
    295 #if !SK_JUMPER_USE_ASSEMBLY
    296     // We'll just run baseline code.
    297 
    298 #elif defined(__x86_64__) || defined(_M_X64)
    299     #if !defined(_MSC_VER)  // No _skx stages for Windows yet.
    300         if (1 && SkCpu::Supports(SkCpu::SKX)) {
    301             return {
    302             #define M(stage) ASM(stage, skx),
    303                 { SK_RASTER_PIPELINE_STAGES(M) },
    304                 M(start_pipeline)
    305                 M(just_return)
    306             #undef M
    307             };
    308         }
    309     #endif
    310     if (1 && SkCpu::Supports(SkCpu::HSW)) {
    311         return {
    312         #define M(stage) ASM(stage, hsw),
    313             { SK_RASTER_PIPELINE_STAGES(M) },
    314             M(start_pipeline)
    315             M(just_return)
    316         #undef M
    317         };
    318     }
    319     if (1 && SkCpu::Supports(SkCpu::AVX)) {
    320         return {
    321         #define M(stage) ASM(stage, avx),
    322             { SK_RASTER_PIPELINE_STAGES(M) },
    323             M(start_pipeline)
    324             M(just_return)
    325         #undef M
    326         };
    327     }
    328     if (1 && SkCpu::Supports(SkCpu::SSE41)) {
    329         return {
    330         #define M(stage) ASM(stage, sse41),
    331             { SK_RASTER_PIPELINE_STAGES(M) },
    332             M(start_pipeline)
    333             M(just_return)
    334         #undef M
    335         };
    336     }
    337     if (1 && SkCpu::Supports(SkCpu::SSE2)) {
    338         return {
    339         #define M(stage) ASM(stage, sse2),
    340             { SK_RASTER_PIPELINE_STAGES(M) },
    341             M(start_pipeline)
    342             M(just_return)
    343         #undef M
    344         };
    345     }
    346 
    347 #elif defined(__i386__) || defined(_M_IX86)
    348     if (1 && SkCpu::Supports(SkCpu::SSE2)) {
    349         return {
    350         #define M(stage) ASM(stage, sse2),
    351             { SK_RASTER_PIPELINE_STAGES(M) },
    352             M(start_pipeline)
    353             M(just_return)
    354         #undef M
    355         };
    356     }
    357 
    358 #endif
    359     return kBaseline;
    360 }
    361 
    362 #ifndef SK_JUMPER_DISABLE_8BIT
    363     static const SkJumper_Engine kNone = {
    364     #define M(stage) nullptr,
    365         { SK_RASTER_PIPELINE_STAGES(M) },
    366     #undef M
    367         nullptr,
    368         nullptr,
    369     };
    370     static SkJumper_Engine gLowp = kNone;
    371     static SkOnce gChooseLowpOnce;
    372 
    373     static SkJumper_Engine choose_lowp() {
    374     #if SK_JUMPER_USE_ASSEMBLY
    375         #if defined(__x86_64__) || defined(_M_X64)
    376             if (1 && SkCpu::Supports(SkCpu::HSW)) {
    377                 return {
    378                 #define M(st) hsw_lowp<SkRasterPipeline::st>(),
    379                     { SK_RASTER_PIPELINE_STAGES(M) },
    380                     ASM(start_pipeline,hsw_lowp),
    381                     ASM(just_return   ,hsw_lowp),
    382                 #undef M
    383                 };
    384             }
    385             if (1 && SkCpu::Supports(SkCpu::SSE41)) {
    386                 return {
    387                 #define M(st) sse41_lowp<SkRasterPipeline::st>(),
    388                     { SK_RASTER_PIPELINE_STAGES(M) },
    389                     ASM(start_pipeline,sse41_lowp),
    390                     ASM(just_return   ,sse41_lowp),
    391                 #undef M
    392                 };
    393             }
    394             if (1 && SkCpu::Supports(SkCpu::SSE2)) {
    395                 return {
    396                 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
    397                     { SK_RASTER_PIPELINE_STAGES(M) },
    398                     ASM(start_pipeline,sse2_lowp),
    399                     ASM(just_return   ,sse2_lowp),
    400                 #undef M
    401                 };
    402             }
    403         #elif defined(__i386__) || defined(_M_IX86)
    404             if (1 && SkCpu::Supports(SkCpu::SSE2)) {
    405                 return {
    406                 #define M(st) sse2_lowp<SkRasterPipeline::st>(),
    407                     { SK_RASTER_PIPELINE_STAGES(M) },
    408                     ASM(start_pipeline,sse2_lowp),
    409                     ASM(just_return   ,sse2_lowp),
    410                 #undef M
    411                 };
    412             }
    413 
    414         #elif defined(JUMPER_HAS_NEON_LOWP)
    415             return {
    416             #define M(st) neon_lowp<SkRasterPipeline::st>(),
    417                 { SK_RASTER_PIPELINE_STAGES(M) },
    418                 sk_start_pipeline_lowp,
    419                 sk_just_return_lowp,
    420             #undef M
    421             };
    422         #endif
    423     #endif
    424         return kNone;
    425     }
    426 #endif
    427 
    428 const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
    429 #ifndef SK_JUMPER_DISABLE_8BIT
    430     gChooseLowpOnce([]{ gLowp = choose_lowp(); });
    431 
    432     // First try to build a lowp pipeline.  If that fails, fall back to normal float gEngine.
    433     void** reset_point = ip;
    434     *--ip = (void*)gLowp.just_return;
    435     for (const StageList* st = fStages; st; st = st->prev) {
    436         if (st->stage == SkRasterPipeline::clamp_0 ||
    437             st->stage == SkRasterPipeline::clamp_1) {
    438             continue;  // No-ops in lowp.
    439         }
    440         if (StageFn* fn = gLowp.stages[st->stage]) {
    441             if (st->ctx) {
    442                 *--ip = st->ctx;
    443             }
    444             *--ip = (void*)fn;
    445         } else {
    446             log_missing(st->stage);
    447             ip = reset_point;
    448             break;
    449         }
    450     }
    451     if (ip != reset_point) {
    452         return gLowp;
    453     }
    454 #endif
    455 
    456     gChooseEngineOnce([]{ gEngine = choose_engine(); });
    457     // We're building the pipeline backwards, so we start with the final stage just_return.
    458     *--ip = (void*)gEngine.just_return;
    459 
    460     // Still going backwards, each stage's context pointer then its StageFn.
    461     for (const StageList* st = fStages; st; st = st->prev) {
    462         if (st->ctx) {
    463             *--ip = st->ctx;
    464         }
    465         *--ip = (void*)gEngine.stages[st->stage];
    466     }
    467     return gEngine;
    468 }
    469 
    470 void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
    471     if (this->empty()) {
    472         return;
    473     }
    474 
    475     // Best to not use fAlloc here... we can't bound how often run() will be called.
    476     SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
    477 
    478     const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
    479     engine.start_pipeline(x,y,x+w,y+h, program.get());
    480 }
    481 
    482 std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
    483     if (this->empty()) {
    484         return [](size_t, size_t, size_t, size_t) {};
    485     }
    486 
    487     void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
    488     const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
    489 
    490     auto start_pipeline = engine.start_pipeline;
    491     return [=](size_t x, size_t y, size_t w, size_t h) {
    492         start_pipeline(x,y,x+w,y+h, program);
    493     };
    494 }
    495