Home | History | Annotate | Download | only in generators

Lines Matching full:emitter

28 def GenerateZipLanes(emitter, registers, zip_lanes, input_address, stride):
32 emitter: ARM/NEON emitter.
53 emitter.EmitAdd(address_register, last_address_register, stride)
67 def GenerateClearAggregators(emitter, lanes):
69 emitter.EmitVMov('i16', lane.aggregator, emitter.ImmediateConstant(0))
72 def GenerateLoadAggregateStore(emitter, lanes, output_address, alignment):
74 emitter.EmitNewline()
75 emitter.EmitComment('Load Aggregate Store.')
78 emitter.EmitVLoad(
80 emitter.DereferenceIncrement(lane.input_address, alignment))
84 emitter.EmitVAddw('u8', lane.aggregator, lane.aggregator, lane.load)
87 emitter.EmitVStoreA('1.8', store_registers,
88 emitter.DereferenceIncrement(output_address, 64))
92 emitter, leftovers, lanes, output_address):
94 emitter.EmitNewline()
95 emitter.EmitComment('Leftover Load Aggregate Store.')
99 emitter.EmitVMov('i8', lane.load, emitter.ImmediateConstant(0))
104 emitter.EmitVLoad('1.8', emitter.Lane(lane.load, 0),
105 emitter.Dereference(lane.input_address, None))
109 emitter.EmitVLoad('1.16', emitter.Lane(lane.load, 0),
110 emitter.Dereference(lane.input_address, None))
114 emitter.EmitVLoad(
115 '1.16', emitter.Lane(lane.load, 0),
116 emitter.DereferenceIncrement(lane.input_address, None))
119 emitter.EmitVLoad('1.8', emitter.Lane(lane.load, 2),
120 emitter.Dereference(lane.input_address, None))
124 emitter.EmitVLoad('1.32', emitter.Lane(lane.load, 0),
125 emitter.Dereference(lane.input_address, None))
129 emitter.EmitVLoad(
130 '1.32', emitter.Lane(lane.load, 0),
131 emitter.DereferenceIncrement(lane.input_address, None))
134 emitter.EmitVLoad('1.8', emitter.Lane(lane.load, 4),
135 emitter.Dereference(lane.input_address, None))
139 emitter.EmitVLoad(
140 '1.32', emitter.Lane(lane.load, 0),
141 emitter.DereferenceIncrement(lane.input_address, None))
144 emitter.EmitVLoad('1.16', emitter.Lane(lane.load, 2),
145 emitter.Dereference(lane.input_address, None))
149 emitter.EmitVLoad(
150 '1.32', emitter.Lane(lane.load, 0),
151 emitter.DereferenceIncrement(lane.input_address, None))
154 emitter.EmitVLoad(
155 '1.16', emitter.Lane(lane.load, 2),
156 emitter.DereferenceIncrement(lane.input_address, None))
159 emitter.EmitVLoad('1.8', emitter.Lane(lane.load, 6),
160 emitter.Dereference(lane.input_address, None))
167 emitter.EmitVAddw('u8', lane.aggregator, lane.aggregator, lane.load)
171 emitter.EmitVStoreA('1.8', store_registers,
172 emitter.DereferenceIncrement(output_address, 64))
175 def GenerateAggregatorReduction(emitter,
182 emitter.EmitNewline()
183 emitter.EmitComment('Aggregator Reduction.')
186 emitter.EmitVMov('32', emitter.Lane(multiplier, 0), multiplicative_offset)
188 emitter.EmitVDup('32', offset, additive_offset)
192 emitter.EmitVPaddl('u16', lane.aggregator, lane.aggregator)
197 emitter.EmitVPadd('u32',
207 emitter.EmitVPadd('u32', low, lane_temps[0], lane_temps[0])
209 emitter.EmitVPadd('u32', low, lane_temps[0], lane_temps[1])
211 emitter.EmitVPadd('u32', low, lane_temps[0], lane_temps[1])
212 emitter.EmitVPadd('u32', high, lane_temps[2], lane_temps[2])
214 emitter.EmitVPadd('u32', low, lane_temps[0], lane_temps[1])
215 emitter.EmitVPadd('u32', high, lane_temps[2], lane_temps[3])
220 emitter.EmitVMul('i32', temp, temp, emitter.Lane(multiplier, 0))
221 emitter.EmitVAdd('i32', temp, temp, offset)
224 emitter.EmitVStore(
225 '1.32', emitter.Lane(low, 0), emitter.Dereference(output_address, None))
227 emitter.EmitVStore('1.32', low, emitter.Dereference(output_address, 64))
229 emitter.EmitVStore(
230 '1.32', low, emitter.DereferenceIncrement(output_address, 64))
231 emitter.EmitVStore(
232 '1.32', emitter.Lane(high, 0),
233 emitter.Dereference(output_address, None))
235 emitter.EmitVStore(
236 '1.32', low, emitter.DereferenceIncrement(output_address, 64))
237 emitter.EmitVStore('1.32', high, emitter.Dereference(output_address, 64))
240 def GenerateZipNx8(emitter, zip_lanes, leftovers, aligned):
249 emitter.EmitFunctionBeginA(name,
257 emitter.EmitAssert('count %% 8 == %d' % leftovers)
258 emitter.EmitAssert('count <= 2048')
259 emitter.EmitAssert('count >= 8')
260 emitter.EmitAssert('reinterpret_cast<std::uintptr_t>(destination) % 8 == 0')
262 emitter.EmitAssert('reinterpret_cast<std::uintptr_t>(source) % 8 == 0')
264 emitter.EmitAssert('stride % 8 == 0')
265 emitter.EmitAsmBegin()
272 lanes = GenerateZipLanes(emitter,
279 emitter.EmitSub(count, count, emitter.ImmediateConstant(leftovers))
281 GenerateClearAggregators(emitter, lanes)
283 emitter.EmitNewline()
284 emitter.EmitNumericalLabel(1)
285 emitter.EmitSubs(count, count, emitter.ImmediateConstant(8))
288 emitter, lanes, output_address, 64 if aligned else None)
290 emitter.EmitNewline()
291 emitter.EmitBneBack(1)
295 emitter, leftovers, lanes, output_address)
297 GenerateAggregatorReduction(emitter,
304 emitter.EmitAsmEnd(registers.MappedParameters(),
307 emitter.EmitFunctionEnd()
310 def GenerateFunctions(emitter):
314 GenerateZipNx8(emitter, lanes, leftovers, aligned)
315 emitter.EmitNewline()