Home | History | Annotate | Download | only in u64
      1 /*
      2  * Copyright 2018 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can
      5  * be found in the LICENSE file.
      6  *
      7  */
      8 
      9 #pragma once
     10 
     11 //
     12 //
     13 //
     14 
     15 #include <cuda.h>
     16 #include <stdint.h>
     17 #include <stdbool.h>
     18 
     19 //
     20 // Info about the algorithm configuration.
     21 //
     22 
     23 void
     24 hs_cuda_info_u64(uint32_t * const key_words,
     25                  uint32_t * const val_words,
     26                  uint32_t * const slab_height,
     27                  uint32_t * const slab_width_log2);
     28 
     29 //
     30 // Determine what padding will be applied to the input and output
     31 // buffers.
     32 //
     33 // Always check to see if the allocated buffers are large enough.
     34 //
     35 // count                    : number of keys
     36 // count + count_padded_in  : additional keys required for sorting
     37 // count + count_padded_out : additional keys required for merging
     38 //
     39 
     40 void
     41 hs_cuda_pad_u64(uint32_t   const count,
     42                 uint32_t * const count_padded_in,
     43                 uint32_t * const count_padded_out);
     44 
     45 //
     46 // Sort the keys in the vin buffer and store them in the vout buffer.
     47 //
     48 // If vout is NULL then the sort will be performed in place.
     49 //
     50 // The implementation assumes the command queue is out-of-order.
     51 //
     52 
     53 void
     54 hs_cuda_sort_u64(uint64_t * const vin,
     55                  uint64_t * const vout,
     56                  uint32_t   const count,
     57                  uint32_t   const count_padded_in,
     58                  uint32_t   const count_padded_out,
     59                  bool       const linearize,
     60                  cudaStream_t     stream0,  // primary stream
     61                  cudaStream_t     stream1,  // auxilary streams
     62                  cudaStream_t     stream2); // for concurrency
     63 
     64 //
     65 //
     66 //
     67