Home | History | Annotate | Download | only in standalone

Lines Matching defs:Cell

160 // 3 cells, each cell having dimensions (width=3, depth=4), laid out in
187 // a cell (see explanation above).
191 // out in a cell. That is, a CellOrder together with actual dimensions.
209 typedef tCellFormat Cell;
211 static const int kWidth = kCells * Cell::kWidth;
212 static const int kDepth = Cell::kDepth;
222 static_assert(Lhs::Cell::kDepth == Rhs::Cell::kDepth, "");
223 static const int kDepth = Lhs::Cell::kDepth;
224 static const int kRows = Lhs::Cell::kWidth * Lhs::kCells;
225 static const int kCols = Rhs::Cell::kWidth * Rhs::kCells;
242 // Returns the offset into a cell, at which a given coefficient is stored.
276 // Load 1 Rhs cell of size 2x4
305 // A 2x4 cell of Rhs is stored in 16bit in d0--d1 (q0).
848 // Load 1 Rhs cell of size 1x4
1106 // Load 1 Rhs cell of size 1x4
1191 // A 1x4 cell of Rhs is stored in d0--d1 (q0).
1223 // Load Rhs cell
1228 // Load 1st Lhs Cell
1234 "vldr d4, [%[lhs_ptr], #16]\n" // Load 1st half of 2nd Lhs cell
1235 "vmov d1, r2, r3\n" // Prepare 2nd half of Rhs cell
1236 "vmla.f32 q4, q1, d0[0]\n" // Multiply 1st Lhs cell with column 0
1237 "ldr r2, [%[lhs_ptr], #24]\n" // Load 2nd half of 2nd Lhs cell, part 1
1238 "vmla.f32 q5, q1, d0[1]\n" // Multiply 1st Lhs cell with column 1
1239 "ldr r3, [%[lhs_ptr], #28]\n" // Load 2nd half of 2nd Lhs cell, part 2
1240 "vmla.f32 q6, q1, d1[0]\n" // Multiply 1st Lhs cell with column 2
1243 "vldr d6, [%[lhs_ptr], #32]\n" // Load 1st half of 3rd Lhs cell
1244 "vmov d5, r2, r3\n" // Prepare 2nd half of 2nd Lhs cell
1245 "vmla.f32 q7, q1, d1[1]\n" // Multiply 1st Lhs cell with column 3
1246 "ldr r2, [%[lhs_ptr], #40]\n" // Load 2nd half of 3rd Lhs cell, part 1
1247 "vmla.f32 q8, q2, d0[0]\n" // Multiply 2nd Lhs cell with column 0
1248 "ldr r3, [%[lhs_ptr], #44]\n" // Load 2nd half of 3rd Lhs cell, part 2
1249 "vmla.f32 q9, q2, d0[1]\n" // Multiply 2nd Lhs cell with column 1
1250 "add %[rhs_ptr], %[rhs_ptr], #16\n" // Move forward by 1 Rhs cell
1252 "vldr d2, [%[lhs_ptr], #48]\n" // Load 1st half of 1st Lhs cell of next
1254 "vmov d7, r2, r3\n" // Prepare 2nd half of 3rd Lhs cell
1255 "vmla.f32 q10, q2, d1[0]\n" // Multiply 2nd Lhs cell with column 2
1256 "ldr r2, [%[lhs_ptr], #56]\n" // Load 2nd half of 1st Lhs cell of next
1258 "vmla.f32 q12, q3, d0[0]\n" // Multiply 3rd Lhs cell with column 0
1259 "ldr r3, [%[lhs_ptr], #60]\n" // Load 2nd half of 1st Lhs cell of next
1261 "vmla.f32 q13, q3, d0[1]\n" // Multiply 3rd Lhs cell with column 1
1264 "vldr d0, [%[rhs_ptr]]\n" // Load 1st half of Rhs cell of next
1266 "vmov d3, r2, r3\n" // Prepare 2nd half of 1st Lhs cell of next
1268 "vmla.f32 q11, q2, d1[1]\n" // Multiply 2nd Lhs cell with column 3
1269 "ldr r2, [%[rhs_ptr], #8]\n" // Load 2nd half of Rhs cell of next
1271 "vmla.f32 q14, q3, d1[0]\n" // Multiply 3rd Lhs cell with column 2
1272 "ldr r3, [%[rhs_ptr], #12]\n" // Load 2nd half of Rhs cell of next
1274 "vmla.f32 q15, q3, d1[1]\n" // Multiply 3rd Lhs cell with column 3
1334 // A 1x4 cell of Rhs is stored in d0--d1 (q0).
1366 // Load Rhs cell
1371 // Load 1st Lhs Cell
1380 "vldr d4, [%[lhs_ptr], #32]\n" // Load 1st half of 2nd Lhs cell
1381 "vmov d1, r2, r3\n" // Prepare 2nd half of Rhs cell
1382 "vmla.f32 q4, q1, d0[0]\n" // Multiply 1st Lhs cell with column 0
1383 "ldr r2, [%[lhs_ptr], #40]\n" // Load 2nd half of 2nd Lhs cell, part 1
1384 "vmla.f32 q5, q1, d0[1]\n" // Multiply 1st Lhs cell with column 1
1385 "ldr r3, [%[lhs_ptr], #44]\n" // Load 2nd half of 2nd Lhs cell, part 2
1386 "vmla.f32 q6, q1, d1[0]\n" // Multiply 1st Lhs cell with column 2
1388 "vldr d6, [%[lhs_ptr], #64]\n" // Load 1st half of 3rd Lhs cell
1389 "vmov d5, r2, r3\n" // Prepare 2nd half of 2nd Lhs cell
1390 "vmla.f32 q7, q1, d1[1]\n" // Multiply 1st Lhs cell with column 3
1391 "ldr r2, [%[lhs_ptr], #72]\n" // Load 2nd half of 3rd Lhs cell, part 1
1392 "vmla.f32 q8, q2, d0[0]\n" // Multiply 2nd Lhs cell with column 0
1393 "ldr r3, [%[lhs_ptr], #76]\n" // Load 2nd half of 3rd Lhs cell, part 2
1394 "vmla.f32 q9, q2, d0[1]\n" // Multiply 2nd Lhs cell with column 1
1396 "vldr d2, [%[lhs_ptr], #16]\n" // Load 1st half of 1st Lhs cell of next
1398 "vmov d7, r2, r3\n" // Prepare 2nd half of 3rd Lhs cell
1399 "vmla.f32 q10, q2, d1[0]\n" // Multiply 2nd Lhs cell with column 2
1400 "ldr r2, [%[lhs_ptr], #24]\n" // Load 2nd half of 1st Lhs cell of next
1402 "vmla.f32 q12, q3, d0[0]\n" // Multiply 3rd Lhs cell with column 0
1403 "ldr r3, [%[lhs_ptr], #28]\n" // Load 2nd half of 1st Lhs cell of next
1405 "vmla.f32 q13, q3, d0[1]\n" // Multiply 3rd Lhs cell with column 1
1407 "vldr d0, [%[rhs_ptr], #16]\n" // Load 1st half of Rhs cell of next
1409 "vmov d3, r2, r3\n" // Prepare 2nd half of 1st Lhs cell of next
1411 "vmla.f32 q11, q2, d1[1]\n" // Multiply 2nd Lhs cell with column 3
1412 "ldr r2, [%[rhs_ptr], #24]\n" // Load 2nd half of Rhs cell of next
1414 "vmla.f32 q14, q3, d1[0]\n" // Multiply 3rd Lhs cell with column 2
1415 "ldr r3, [%[rhs_ptr], #28]\n" // Load 2nd half of Rhs cell of next
1417 "vmla.f32 q15, q3, d1[1]\n" // Multiply 3rd Lhs cell with column 3
1420 "vldr d4, [%[lhs_ptr], #48]\n" // Load 1st half of 2nd Lhs cell
1421 "vmov d1, r2, r3\n" // Prepare 2nd half of Rhs cell
1422 "vmla.f32 q4, q1, d0[0]\n" // Multiply 1st Lhs cell with column 0
1423 "ldr r2, [%[lhs_ptr], #56]\n" // Load 2nd half of 2nd Lhs cell, part 1
1424 "vmla.f32 q5, q1, d0[1]\n" // Multiply 1st Lhs cell with column 1
1425 "ldr r3, [%[lhs_ptr], #60]\n" // Load 2nd half of 2nd Lhs cell, part 2
1426 "vmla.f32 q6, q1, d1[0]\n" // Multiply 1st Lhs cell with column 2
1429 "vldr d6, [%[lhs_ptr], #80]\n" // Load 1st half of 3rd Lhs cell
1430 "vmov d5, r2, r3\n" // Prepare 2nd half of 2nd Lhs cell
1431 "vmla.f32 q7, q1, d1[1]\n" // Multiply 1st Lhs cell with column 3
1432 "ldr r2, [%[lhs_ptr], #88]\n" // Load 2nd half of 3rd Lhs cell, part 1
1433 "vmla.f32 q8, q2, d0[0]\n" // Multiply 2nd Lhs cell with column 0
1434 "ldr r3, [%[lhs_ptr], #92]\n" // Load 2nd half of 3rd Lhs cell, part 2
1435 "vmla.f32 q9, q2, d0[1]\n" // Multiply 2nd Lhs cell with column 1
1436 "add %[rhs_ptr], %[rhs_ptr], #32\n" // Move forward by 1 Rhs cell
1438 "vldr d2, [%[lhs_ptr], #96]\n" // Load 1st half of 1st Lhs cell of next
1440 "vmov d7, r2, r3\n" // Prepare 2nd half of 3rd Lhs cell
1441 "vmla.f32 q10, q2, d1[0]\n" // Multiply 2nd Lhs cell with column 2
1442 "ldr r2, [%[lhs_ptr], #104]\n" // Load 2nd half of 1st Lhs cell of next
1444 "vmla.f32 q12, q3, d0[0]\n" // Multiply 3rd Lhs cell with column 0
1445 "ldr r3, [%[lhs_ptr], #108]\n" // Load 2nd half of 1st Lhs cell of next
1447 "vmla.f32 q13, q3, d0[1]\n" // Multiply 3rd Lhs cell with column 1
1450 "vldr d0, [%[rhs_ptr]]\n" // Load 1st half of Rhs cell of next
1452 "vmov d3, r2, r3\n" // Prepare 2nd half of 1st Lhs cell of next
1454 "vmla.f32 q11, q2, d1[1]\n" // Multiply 2nd Lhs cell with column 3
1455 "ldr r2, [%[rhs_ptr], #8]\n" // Load 2nd half of Rhs cell of next
1457 "vmla.f32 q14, q3, d1[0]\n" // Multiply 3rd Lhs cell with column 2
1458 "ldr r3, [%[rhs_ptr], #12]\n" // Load 2nd half of Rhs cell of next
1460 "vmla.f32 q15, q3, d1[1]\n" // Multiply 3rd Lhs cell with column 3
1562 // Load 1 Rhs cell of size 1x4
1659 // Load 1 Rhs cell of size 1x4
1735 // Load 1 Rhs cell of size 2x8
2547 // Load 2 Rhs cell of size 1x4 each
2669 // Load 2 Rhs cell of size 1x4 each
2799 // Load 2 Rhs cell of size 1x4 each
2917 // The start of the loop assumes first Rhs cell is already loaded, so
2921 // And the same for the first Lhs cell.
2927 // Start the MACs at the head of the loop - 1st cell from each side
2931 "ld1 {v1.4s}, [%[rhs_ptr]], #16\n" // Load second Rhs cell.
2934 "ld1 {v3.4s}, [%[lhs_ptr]], #16\n" // Load second Lhs cell.
2937 "ld1 {v4.4s}, [%[lhs_ptr]], #16\n" // Load third Lhs cell.
2940 "ld1 {v2.4s}, [%[lhs_ptr]], #16\n" // Done with first Lhs cell - load
2954 "ld1 {v0.4s}, [%[rhs_ptr]], #16\n" // Done with the first Rhs cell -
3072 "ldr d0, [%[rhs_ptr]]\n" // Bottom half of first Rhs cell
3081 "ld1 {v2.4s}, [%[lhs_ptr]], #16\n" // first Lhs cell
3090 // (second Rhs cell).
3102 // Second block. Start loading v3 (second Lhs cell), finish loading v1.
3111 // Third block. Start loading v4 (third Lhs cell), finish loading v3.
3120 // Fourth block. v2 (first Lhs cell) is now finished with, so start
3413 Format::Lhs::Cell::kWidth * Format::kDepth;
3417 Format::Rhs::Cell::kWidth * Format::kDepth;
3419 // Now we are inside one cell of the Lhs and inside one cell
3423 for (int ri = 0; ri < Format::Lhs::Cell::kWidth; ri++) {
3424 for (int ci = 0; ci < Format::Rhs::Cell::kWidth; ci++) {
3427 OffsetIntoCell<typename Format::Lhs::Cell>(ri, di);
3430 OffsetIntoCell<typename Format::Rhs::Cell>(ci, di);
3432 accum_ptr + (ri + rc * Format::Lhs::Cell::kWidth) +
3433 (ci + cc * Format::Rhs::Cell::kWidth) * Format::kRows;