Home | History | Annotate | Download | only in kernels

Lines Matching refs:packet_size

29           typename nocontract_t, typename contract_t, int Side, int packet_size,
37 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
47 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
56 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
238 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
458 typename nocontract_t, typename contract_t, int Side, int packet_size,
466 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
478 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
487 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
628 typename nocontract_t, typename contract_t, int packet_size,
639 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
648 nocontract_t, contract_t, packet_size, inner_dim_contiguous,
667 const Index peeled_k = (depth / packet_size) * packet_size;
677 if ((packet_size % 4) == 0 && !non_standard_patches) {
679 if ((patch_depth % packet_size) == 0) {
718 eigen_assert((max_depth - startDepth) % packet_size == 0);
719 for (Index d = startDepth; d < max_depth; d += packet_size) {
731 pstoreu(block + 0 * packet_size, kernel.packet[0]);
732 pstoreu(block + 1 * packet_size, kernel.packet[1]);
733 pstoreu(block + 2 * packet_size, kernel.packet[2]);
734 pstoreu(block + 3 * packet_size, kernel.packet[3]);
735 block += 4 * packet_size;
736 k += packet_size;
741 for (; k < peeled_k; k += packet_size) {
748 pstoreu(block + 0 * packet_size, kernel.packet[0]);
749 pstoreu(block + 1 * packet_size, kernel.packet[1]);
750 pstoreu(block + 2 * packet_size, kernel.packet[2]);
751 pstoreu(block + 3 * packet_size, kernel.packet[3]);
752 block += 4 * packet_size;
755 for (; k < peeled_k; k += packet_size) {
762 pstoreu(block + 0 * packet_size, kernel.packet[0]);
763 pstoreu(block + 1 * packet_size, kernel.packet[1]);
764 pstoreu(block + 2 * packet_size, kernel.packet[2]);
765 pstoreu(block + 3 * packet_size, kernel.packet[3]);
766 block += 4 * packet_size;