1 // 2 // Copyright 2016 Google Inc. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 // 7 8 #ifndef HS_CUDA_CONFIG_ONCE 9 #define HS_CUDA_CONFIG_ONCE 10 11 #define HS_SLAB_THREADS_LOG2 5 12 #define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) 13 #define HS_SLAB_WIDTH_LOG2 5 14 #define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) 15 #define HS_SLAB_HEIGHT 8 16 #define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) 17 #define HS_REG_LAST(c) c##8 18 #define HS_KEY_TYPE_PRETTY u64 19 #define HS_KEY_WORDS 2 20 #define HS_VAL_WORDS 0 21 #define HS_BS_SLABS 16 22 #define HS_BS_SLABS_LOG2_RU 4 23 #define HS_BC_SLABS_LOG2_MAX 4 24 #define HS_FM_BLOCK_HEIGHT 1 25 #define HS_FM_SCALE_MIN 0 26 #define HS_FM_SCALE_MAX 0 27 #define HS_HM_BLOCK_HEIGHT 1 28 #define HS_HM_SCALE_MIN 0 29 #define HS_HM_SCALE_MAX 0 30 #define HS_EMPTY 31 32 #define HS_NVIDIA_SM35 33 34 #define HS_SLAB_ROWS() \ 35 HS_SLAB_ROW( 1, 0 ) \ 36 HS_SLAB_ROW( 2, 1 ) \ 37 HS_SLAB_ROW( 3, 2 ) \ 38 HS_SLAB_ROW( 4, 3 ) \ 39 HS_SLAB_ROW( 5, 4 ) \ 40 HS_SLAB_ROW( 6, 5 ) \ 41 HS_SLAB_ROW( 7, 6 ) \ 42 HS_SLAB_ROW( 8, 7 ) \ 43 HS_EMPTY 44 45 #define HS_TRANSPOSE_SLAB() \ 46 HS_TRANSPOSE_STAGE( 1 ) \ 47 HS_TRANSPOSE_STAGE( 2 ) \ 48 HS_TRANSPOSE_STAGE( 3 ) \ 49 HS_TRANSPOSE_STAGE( 4 ) \ 50 HS_TRANSPOSE_STAGE( 5 ) \ 51 HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ 52 HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ 53 HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ 54 HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ 55 HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ 56 HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ 57 HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ 58 HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ 59 HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ 60 HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ 61 HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ 62 HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ 63 HS_TRANSPOSE_BLEND( u, v, 4, 2, 1 ) \ 64 HS_TRANSPOSE_BLEND( u, v, 4, 4, 3 ) \ 65 HS_TRANSPOSE_BLEND( u, v, 4, 6, 5 ) \ 66 HS_TRANSPOSE_BLEND( u, v, 4, 8, 7 ) \ 67 HS_TRANSPOSE_BLEND( v, w, 5, 3, 1 ) \ 68 HS_TRANSPOSE_BLEND( v, w, 5, 4, 2 ) \ 69 HS_TRANSPOSE_BLEND( v, w, 5, 7, 5 ) \ 70 HS_TRANSPOSE_BLEND( v, w, 5, 8, 6 ) \ 71 HS_TRANSPOSE_REMAP( w, 1, 1 ) \ 72 HS_TRANSPOSE_REMAP( w, 2, 3 ) \ 73 HS_TRANSPOSE_REMAP( w, 3, 5 ) \ 74 HS_TRANSPOSE_REMAP( w, 4, 7 ) \ 75 HS_TRANSPOSE_REMAP( w, 5, 2 ) \ 76 HS_TRANSPOSE_REMAP( w, 6, 4 ) \ 77 HS_TRANSPOSE_REMAP( w, 7, 6 ) \ 78 HS_TRANSPOSE_REMAP( w, 8, 8 ) \ 79 HS_EMPTY 80 81 #endif 82 83 // 84 // 85 // 86 87