Home | History | Annotate | Download | only in gpu

Lines Matching refs:thunk

20 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
84 // Same as `EmitTargetElementLoop`, but in given `thunk` rather than
88 KernelThunk* thunk);
91 // Builds the appropriate thunk for the instruction hlo and returns the owning
93 // of the returned Thunk object.
94 std::unique_ptr<Thunk> BuildThunk(const HloInstruction* hlo);
151 // Emits code to initialize buffer of `inst` in given `thunk`.
152 Status EmitInitializer(const HloInstruction* inst, KernelThunk* thunk);
156 // Thunk object.
157 std::unique_ptr<Thunk> BuildKernelThunk(const HloInstruction* inst);
160 std::unique_ptr<Thunk> BuildFftThunk(const HloInstruction* inst);
163 // to make sure `inst` outlives the lifetime of the returned Thunk object.
164 std::unique_ptr<Thunk> BuildGemmThunk(const HloInstruction* inst);
166 // Returns a thunk that calls host-to-device cuMemcpy to implement `inst`.
167 std::unique_ptr<Thunk> BuildHostToDeviceCopyThunk(const HloInstruction* inst);
169 // Returns a thunk that calls device-to-device cuMemcpy to implement `inst`.
170 std::unique_ptr<Thunk> BuildDeviceToDeviceCopyThunk(
175 std::unique_ptr<Thunk> BuildInfeedThunk(const HloInstruction* inst);
177 // Returns a WhileThunk that invokes thunk sequences for 'condition' and
179 std::unique_ptr<Thunk> BuildWhileThunk(const HloInstruction* hlo);
181 // Returns a ForThunk which executes 'loop_limit' invocations of a thunk
183 std::unique_ptr<Thunk> BuildForThunk(const HloInstruction* hlo,
186 // Returns a ConditionalThunk that executes the thunk sequence for
189 std::unique_ptr<Thunk> BuildConditionalThunk(const HloInstruction* hlo);
193 // Returns the last generated thunk.
194 Thunk* LastThunk() const { return thunk_sequence_->back().get(); }
196 // The thunk sequence this IrEmitter generates for the input computation.