Home | History | Annotate | Download | only in kernels
      1 /*
      2  * Copyright 2018 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can
      5  * be found in the LICENSE file.
      6  *
      7  */
      8 
      9 //
     10 // NOTE THAT THE SEGMENT TTCK KERNEL IS ENTIRELY DEPENDENT ON THE
     11 // LAYOUT OF THE TTCK KEY.  IF THE TTCK KEY IS ALTERED THEN THIS
     12 // KERNEL WILL NEED TO BE UPDATED
     13 //
     14 
     15 #include "tile.h"
     16 #include "atomic_cl.h"
     17 #include "kernel_cl_12.h"
     18 #include "hs/cl/intel/gen8/u64/hs_config.h"
     19 #include "hs/cl/intel/hs_cl_macros.h"
     20 
     21 //
     22 //
     23 //
     24 
     25 #define HS_LANE_MASK (HS_SLAB_WIDTH - 1)
     26 
     27 //
     28 //
     29 //
     30 
     31 #define SKC_YX_NEQ(row,prev)                \
     32   (((as_uint2(r##row).hi ^ as_uint2(r##prev).hi) & SKC_TTCK_HI_MASK_YX) != 0)
     33 
     34 //
     35 //
     36 //
     37 
     38 __kernel
     39 __attribute__((intel_reqd_sub_group_size(HS_SLAB_WIDTH)))
     40 void
     41 skc_kernel_segment_ttck(__global HS_KEY_TYPE              * SKC_RESTRICT const vout,
     42                         __global uint                     * SKC_RESTRICT const indices,
     43                         __global SKC_ATOMIC_UINT volatile * SKC_RESTRICT const atomics)
     44 {
     45   uint const global_id = get_global_id(0);
     46   uint const gmem_base = (global_id >> HS_SLAB_WIDTH_LOG2) * HS_SLAB_KEYS;
     47   uint const gmem_idx  = gmem_base + (global_id & HS_LANE_MASK);
     48   uint const lane_idx  = gmem_base + (global_id & HS_LANE_MASK) * HS_SLAB_HEIGHT;
     49 
     50   //
     51   // LOAD ALL THE ROWS
     52   //
     53 #undef  HS_SLAB_ROW
     54 #define HS_SLAB_ROW(row,prev)                                           \
     55   HS_KEY_TYPE const r##row = (vout + gmem_idx)[prev * HS_SLAB_WIDTH];
     56 
     57   HS_SLAB_ROWS();
     58 
     59   //
     60   // LOAD LAST REGISTER FROM COLUMN TO LEFT
     61   //
     62   uint  diffs = 0;
     63   uint2 r0    = r1;
     64 
     65   if (gmem_base > 0) {
     66     // if this is the first key in any slab but the first then it
     67     // broadcast loads the last key in previous slab
     68     r0.hi = as_uint2(vout[gmem_base - 1]).hi;
     69   } else if (get_sub_group_local_id() == 0) {
     70     // if this is the first lane in the first slab
     71     diffs = 1;
     72   }
     73 
     74   // now shuffle in the last key from the column to the left
     75   r0.hi = intel_sub_group_shuffle_up(r0.hi,as_uint2(HS_REG_LAST(r)).hi,1);
     76 
     77   //
     78   // FIND ALL DIFFERENCES IN SLAB
     79   //
     80   uint valid = 0;
     81 
     82 #undef  HS_SLAB_ROW
     83 #define HS_SLAB_ROW(row,prev)                   \
     84   valid |= ((r##row != SKC_ULONG_MAX) << prev);
     85 
     86   HS_SLAB_ROWS();
     87 
     88 #undef  HS_SLAB_ROW
     89 #define HS_SLAB_ROW(row,prev)                   \
     90   diffs |= (SKC_YX_NEQ(row,prev) << prev);
     91 
     92   HS_SLAB_ROWS();
     93 
     94   //
     95   // SUM UP THE DIFFERENCES
     96   //
     97   uint const valid_diffs = valid & diffs;
     98   uint const count       = popcount(valid_diffs);
     99   uint const inclusive   = sub_group_scan_inclusive_add(count);
    100   uint const exclusive   = inclusive - count;
    101 
    102   //
    103   // RESERVE SPACE IN THE INDICES ARRAY
    104   //
    105   uint next = 0;
    106 
    107   if (get_sub_group_local_id() == HS_SLAB_WIDTH-1)
    108     next = atomic_add(atomics+1,inclusive); // FIXME -- need a symbolic offset
    109 
    110   // distribute base across subgroup
    111   next = exclusive + sub_group_broadcast(next,HS_SLAB_WIDTH-1);
    112 
    113   //
    114   // STORE THE INDICES
    115   //
    116 #undef  HS_SLAB_ROW
    117 #define HS_SLAB_ROW(row,prev)                   \
    118   if (valid_diffs & (1 << prev))                \
    119     indices[next++] = lane_idx + prev;
    120 
    121   HS_SLAB_ROWS();
    122 
    123   //
    124   // TRANSPOSE THE SLAB AND STORE IT
    125   //
    126   HS_TRANSPOSE_SLAB();
    127 }
    128 
    129 //
    130 //
    131 //
    132