1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.cts.rsblas; 18 19 import android.renderscript.*; 20 import android.util.Log; 21 import java.util.ArrayList; 22 23 public class IntrinsicBLAS extends IntrinsicBase { 24 private ScriptIntrinsicBLAS mBLAS; 25 private BLASData mBLASData; 26 private boolean mInitialized = false; 27 28 private ArrayList<Allocation> mMatrixS; 29 private final float alphaS = 1.0f; 30 private final float betaS = 1.0f; 31 32 private ArrayList<Allocation> mMatrixD; 33 private final double alphaD = 1.0; 34 private final double betaD = 1.0; 35 36 private ArrayList<Allocation> mMatrixC; 37 private final Float2 alphaC = new Float2(1.0f, 0.0f); 38 private final Float2 betaC = new Float2(1.0f, 0.0f); 39 40 private ArrayList<Allocation> mMatrixZ; 41 private final Double2 alphaZ = new Double2(1.0, 0.0); 42 private final Double2 betaZ = new Double2(1.0, 0.0); 43 44 private int[] mTranspose = {ScriptIntrinsicBLAS.NO_TRANSPOSE, 45 ScriptIntrinsicBLAS.TRANSPOSE, 46 ScriptIntrinsicBLAS.CONJ_TRANSPOSE, 47 0}; 48 49 private int[] mUplo = {ScriptIntrinsicBLAS.UPPER, 50 ScriptIntrinsicBLAS.LOWER, 51 0}; 52 53 private int[] mDiag = {ScriptIntrinsicBLAS.NON_UNIT, 54 ScriptIntrinsicBLAS.UNIT, 55 0}; 56 57 private int[] mSide = {ScriptIntrinsicBLAS.LEFT, 58 ScriptIntrinsicBLAS.RIGHT, 59 0}; 60 61 private int[] mInc = {0, 1, 2}; 62 private int[] mK = {-1, 0, 1}; 63 private int[] mDim = {1, 2, 3, 256}; 64 65 @Override 66 protected void setUp() throws Exception { 67 super.setUp(); 68 69 // Now populate the test Matrixes and Vectors. 70 if (!mInitialized) { 71 mBLASData = new BLASData(); 72 mBLASData.loadData(mCtx); 73 mBLAS = ScriptIntrinsicBLAS.create(mRS); 74 mMatrixS = new ArrayList<Allocation>(); 75 mMatrixD = new ArrayList<Allocation>(); 76 mMatrixC = new ArrayList<Allocation>(); 77 mMatrixZ = new ArrayList<Allocation>(); 78 for (int x : mDim) { 79 for (int y : mDim) { 80 mMatrixS.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), x, y))); 81 mMatrixD.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), x, y))); 82 mMatrixC.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), x, y))); 83 mMatrixZ.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), x, y))); 84 } 85 } 86 // Also need Allocation with mismatch Element. 87 Allocation misAlloc = Allocation.createTyped(mRS, Type.createXY(mRS, Element.U8(mRS), 1, 1)); 88 mMatrixS.add(misAlloc); 89 mMatrixD.add(misAlloc); 90 mMatrixC.add(misAlloc); 91 mMatrixZ.add(misAlloc); 92 mInitialized = true; 93 } 94 } 95 96 @Override 97 protected void tearDown() throws Exception { 98 super.tearDown(); 99 } 100 101 // Calculate the square of the L2 norm of a matrix. 102 private double calcL2Norm(float[] input) { 103 double l2Norm = 0; 104 for (int i = 0; i < input.length; ++i) { 105 l2Norm += input[i] * input[i]; 106 } 107 return l2Norm; 108 } 109 110 private double calcL2Norm(double[] input) { 111 double l2Norm = 0; 112 for (int i = 0; i < input.length; ++i) { 113 l2Norm += input[i] * input[i]; 114 } 115 return l2Norm; 116 } 117 118 // Routine to verify if matrix are equivalent. 119 private void verifyMatrix(Allocation ref, Allocation out) { 120 verifyMatrix(ref, out, false); 121 } 122 123 // Use L2 norm of a matrix as the scale to determine whether two matrices are equivalent: 124 // if the absolute square error of any elements is smaller than the average L2 Norm 125 // per element times an allowed error range (1e-6), then the two matrices are considered equivalent. 126 // Criterion: (a[i,j] - a'[i,j])^2 < epsilon * ||A||/(M*N) 127 // M, N: the dimensions of the matrix; epsilon: allowed relative error. 128 private void verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix) { 129 double l2Norm; 130 int size; 131 Element e = ref.getType().getElement(); 132 if (e.isCompatible(Element.F32(mRS)) || e.isCompatible(Element.F32_2(mRS))) { 133 size = out.getBytesSize() / 4; 134 float[] outArr = new float[size]; 135 float[] refArr = new float[size]; 136 out.copyTo(outArr); 137 ref.copyTo(refArr); 138 139 double l2NormOut = calcL2Norm(outArr); 140 double l2NormRef = calcL2Norm(refArr); 141 l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size; 142 } else { 143 size = out.getBytesSize() / 8; 144 double[] outArr = new double[size]; 145 double[] refArr = new double[size]; 146 out.copyTo(outArr); 147 ref.copyTo(refArr); 148 149 double l2NormOut = calcL2Norm(outArr); 150 double l2NormRef = calcL2Norm(refArr); 151 l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size; 152 } 153 mVerify.invoke_verifyMatrix(ref, out, l2Norm, isUpperMatrix); 154 } 155 156 157 private boolean validateSide(int Side) { 158 if (Side != ScriptIntrinsicBLAS.LEFT && Side != ScriptIntrinsicBLAS.RIGHT) { 159 return false; 160 } 161 return true; 162 } 163 164 private boolean validateTranspose(int Trans) { 165 if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE && 166 Trans != ScriptIntrinsicBLAS.TRANSPOSE && 167 Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) { 168 return false; 169 } 170 return true; 171 } 172 173 private boolean validateConjTranspose(int Trans) { 174 if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE && 175 Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) { 176 return false; 177 } 178 return true; 179 } 180 181 private boolean validateDiag(int Diag) { 182 if (Diag != ScriptIntrinsicBLAS.NON_UNIT && 183 Diag != ScriptIntrinsicBLAS.UNIT) { 184 return false; 185 } 186 return true; 187 } 188 189 private boolean validateUplo(int Uplo) { 190 if (Uplo != ScriptIntrinsicBLAS.UPPER && 191 Uplo != ScriptIntrinsicBLAS.LOWER) { 192 return false; 193 } 194 return true; 195 } 196 197 private boolean validateVecInput(Allocation X) { 198 if (X.getType().getY() > 2) { 199 // For testing vector, need a mismatch Y for complete test coverage. 200 return false; 201 } 202 return true; 203 } 204 205 private boolean validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 206 if (!validateTranspose(TransA)) { 207 return false; 208 } 209 int M = A.getType().getY(); 210 int N = A.getType().getX(); 211 if (!A.getType().getElement().isCompatible(e) || 212 !X.getType().getElement().isCompatible(e) || 213 !Y.getType().getElement().isCompatible(e)) { 214 return false; 215 } 216 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 217 return false; 218 } 219 220 if (incX <= 0 || incY <= 0) { 221 return false; 222 } 223 int expectedXDim = -1, expectedYDim = -1; 224 if (TransA == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 225 expectedXDim = 1 + (N - 1) * incX; 226 expectedYDim = 1 + (M - 1) * incY; 227 } else { 228 expectedXDim = 1 + (M - 1) * incX; 229 expectedYDim = 1 + (N - 1) * incY; 230 } 231 if (X.getType().getX() != expectedXDim || 232 Y.getType().getX() != expectedYDim) { 233 return false; 234 } 235 return true; 236 } 237 238 private void xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix) { 239 for (Allocation matA : mMatrix) { 240 for (Allocation vecX : mMatrix) { 241 if (!validateVecInput(vecX)) { 242 continue; 243 } 244 for (Allocation vecY : mMatrix) { 245 if (!validateVecInput(vecY)) { 246 continue; 247 } 248 Element elemA = matA.getType().getElement(); 249 if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY)) { 250 try { 251 if (elemA.isCompatible(Element.F32(mRS))) { 252 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY); 253 } else if (elemA.isCompatible(Element.F64(mRS))) { 254 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY); 255 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 256 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY); 257 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 258 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 259 } 260 } catch (RSRuntimeException e) { 261 fail("should NOT throw RSRuntimeException"); 262 } 263 } else { 264 try { 265 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY); 266 fail("should throw RSRuntimeException for SGEMV"); 267 } catch (RSRuntimeException e) { 268 } 269 try { 270 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY); 271 fail("should throw RSRuntimeException for DGEMV"); 272 } catch (RSRuntimeException e) { 273 } 274 try { 275 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY); 276 fail("should throw RSRuntimeException for CGEMV"); 277 } catch (RSRuntimeException e) { 278 } 279 try { 280 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 281 fail("should throw RSRuntimeException for ZGEMV"); 282 } catch (RSRuntimeException e) { 283 } 284 } 285 } 286 } 287 } 288 } 289 290 public void L2_xGEMV_API(ArrayList<Allocation> mMatrix) { 291 for (int trans : mTranspose) { 292 for (int incX : mInc) { 293 xGEMV_API_test(trans, incX, incX, mMatrix); 294 } 295 } 296 } 297 298 public void test_L2_SGEMV_API() { 299 L2_xGEMV_API(mMatrixS); 300 } 301 302 public void test_L2_DGEMV_API() { 303 L2_xGEMV_API(mMatrixD); 304 } 305 306 public void test_L2_CGEMV_API() { 307 L2_xGEMV_API(mMatrixC); 308 } 309 310 public void test_L2_ZGEMV_API() { 311 L2_xGEMV_API(mMatrixZ); 312 } 313 314 public void test_L2_SGEMV_Correctness() { 315 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 316 int incX = 1; 317 int incY = 1; 318 319 // Populate input allocations 320 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 321 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 322 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 323 matrixAS.copyFrom(mBLASData.L2_sGEMV_A_mn); 324 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1); 325 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1); 326 327 // Test for the default case: NO_TRANS 328 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 329 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 330 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N); 331 verifyMatrix(vectorYRef, vectorYS); 332 333 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 334 trans = ScriptIntrinsicBLAS.TRANSPOSE; 335 // Reload vector Y, since it was overwritten by BLAS. 336 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1); 337 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 338 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 339 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 340 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_T); 341 verifyMatrix(vectorYRef, vectorXS); 342 343 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 344 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1); 345 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 346 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_H); 347 verifyMatrix(vectorYRef, vectorXS); 348 349 // Test for incX = 2 & incY = 3; 350 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 351 incX = 2; 352 incY = 3; 353 int dimX = 1 + (mBLASData.dN - 1) * incX; 354 int dimY = 1 + (mBLASData.dM - 1) * incY; 355 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 356 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 357 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n2); 358 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m2); 359 360 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 361 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 362 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N2); 363 verifyMatrix(vectorYRef, vectorYS); 364 365 mRS.finish(); 366 checkError(); 367 } 368 369 public void test_L2_DGEMV_Correctness() { 370 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 371 int incX = 1; 372 int incY = 1; 373 374 // Populate input allocations 375 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 376 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 377 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 378 matrixAD.copyFrom(mBLASData.L2_dGEMV_A_mn); 379 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1); 380 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1); 381 382 // Test for the default case: NO_TRANS 383 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 384 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 385 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N); 386 verifyMatrix(vectorYRef, vectorYD); 387 388 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 389 trans = ScriptIntrinsicBLAS.TRANSPOSE; 390 // Reload vector Y, since it was overwritten by BLAS. 391 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1); 392 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 393 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 394 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 395 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_T); 396 verifyMatrix(vectorYRef, vectorXD); 397 398 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 399 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1); 400 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 401 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_H); 402 verifyMatrix(vectorYRef, vectorXD); 403 404 // Test for incX = 2 & incY = 3; 405 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 406 incX = 2; 407 incY = 3; 408 int dimX = 1 + (mBLASData.dN - 1) * incX; 409 int dimY = 1 + (mBLASData.dM - 1) * incY; 410 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 411 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 412 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n2); 413 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m2); 414 415 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 416 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 417 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N2); 418 verifyMatrix(vectorYRef, vectorYD); 419 420 mRS.finish(); 421 checkError(); 422 } 423 424 public void test_L2_CGEMV_Correctness() { 425 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 426 int incX = 1; 427 int incY = 1; 428 429 // Populate input allocations 430 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 431 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 432 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 433 matrixAC.copyFrom(mBLASData.L2_cGEMV_A_mn); 434 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1); 435 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1); 436 437 // Test for the default case: NO_TRANS 438 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 439 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 440 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N); 441 verifyMatrix(vectorYRef, vectorYC); 442 443 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 444 trans = ScriptIntrinsicBLAS.TRANSPOSE; 445 // Reload vector Y, since it was overwritten by BLAS. 446 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1); 447 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 448 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 449 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 450 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_T); 451 verifyMatrix(vectorYRef, vectorXC); 452 453 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 454 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1); 455 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 456 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_H); 457 verifyMatrix(vectorYRef, vectorXC); 458 459 // Test for incX = 2 & incY = 3; 460 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 461 incX = 2; 462 incY = 3; 463 int dimX = 1 + (mBLASData.dN - 1) * incX; 464 int dimY = 1 + (mBLASData.dM - 1) * incY; 465 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 466 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 467 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n2); 468 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m2); 469 470 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 471 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 472 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N2); 473 verifyMatrix(vectorYRef, vectorYC); 474 475 mRS.finish(); 476 checkError(); 477 } 478 479 public void test_L2_ZGEMV_Correctness() { 480 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 481 int incX = 1; 482 int incY = 1; 483 484 // Populate input allocations 485 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 486 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 487 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 488 matrixAZ.copyFrom(mBLASData.L2_zGEMV_A_mn); 489 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1); 490 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1); 491 492 // Test for the default case: NO_TRANS 493 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 494 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 495 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N); 496 verifyMatrix(vectorYRef, vectorYZ); 497 498 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 499 trans = ScriptIntrinsicBLAS.TRANSPOSE; 500 // Reload vector Y, since it was overwritten by BLAS. 501 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1); 502 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 503 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 504 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 505 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_T); 506 verifyMatrix(vectorYRef, vectorXZ); 507 508 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 509 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1); 510 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 511 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_H); 512 verifyMatrix(vectorYRef, vectorXZ); 513 514 // Test for incX = 2 & incY = 3; 515 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 516 incX = 2; 517 incY = 3; 518 int dimX = 1 + (mBLASData.dN - 1) * incX; 519 int dimY = 1 + (mBLASData.dM - 1) * incY; 520 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 521 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 522 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n2); 523 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m2); 524 525 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 526 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 527 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N2); 528 verifyMatrix(vectorYRef, vectorYZ); 529 530 mRS.finish(); 531 checkError(); 532 } 533 534 535 536 private void xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix) { 537 for (Allocation matA : mMatrix) { 538 for (Allocation vecX : mMatrix) { 539 if (!validateVecInput(vecX)) { 540 continue; 541 } 542 for (Allocation vecY : mMatrix) { 543 if (!validateVecInput(vecY)) { 544 continue; 545 } 546 Element elemA = matA.getType().getElement(); 547 if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY) && KU >= 0 && KL >= 0) { 548 try { 549 if (elemA.isCompatible(Element.F32(mRS))) { 550 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY); 551 } else if (elemA.isCompatible(Element.F64(mRS))) { 552 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY); 553 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 554 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY); 555 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 556 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 557 } 558 } catch (RSRuntimeException e) { 559 fail("should NOT throw RSRuntimeException"); 560 } 561 } else { 562 try { 563 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY); 564 fail("should throw RSRuntimeException for SGBMV"); 565 } catch (RSRuntimeException e) { 566 } 567 try { 568 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY); 569 fail("should throw RSRuntimeException for DGBMV"); 570 } catch (RSRuntimeException e) { 571 } 572 try { 573 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY); 574 fail("should throw RSRuntimeException for CGBMV"); 575 } catch (RSRuntimeException e) { 576 } 577 try { 578 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 579 fail("should throw RSRuntimeException for ZGBMV"); 580 } catch (RSRuntimeException e) { 581 } 582 } 583 } 584 } 585 } 586 } 587 588 public void L2_xGBMV_API(ArrayList<Allocation> mMatrix) { 589 for (int trans : mTranspose) { 590 for (int incX : mInc) { 591 for (int K : mK) { 592 xGBMV_API_test(trans, K, K, incX, incX, mMatrix); 593 } 594 } 595 } 596 } 597 598 public void test_L2_SGBMV_API() { 599 L2_xGBMV_API(mMatrixS); 600 } 601 602 public void test_L2_DGBMV_API() { 603 L2_xGBMV_API(mMatrixD); 604 } 605 606 public void test_L2_CGBMV_API() { 607 L2_xGBMV_API(mMatrixC); 608 } 609 610 public void test_L2_ZGBMV_API() { 611 L2_xGBMV_API(mMatrixZ); 612 } 613 614 public void test_L2_SGBMV_Correctness() { 615 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 616 int incX = 1; 617 int incY = 1; 618 619 // Populate input allocations 620 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 621 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 622 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 623 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_sGBMV_A_mn); 624 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1); 625 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1); 626 627 // Test for the default case: NO_TRANS 628 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 629 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 630 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N); 631 verifyMatrix(vectorYRef, vectorYS); 632 633 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 634 trans = ScriptIntrinsicBLAS.TRANSPOSE; 635 // Reload vector Y, since it was overwritten by BLAS. 636 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1); 637 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 638 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 639 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 640 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_T); 641 verifyMatrix(vectorYRef, vectorXS); 642 643 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 644 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1); 645 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 646 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_H); 647 verifyMatrix(vectorYRef, vectorXS); 648 649 // Test for incX = 2 & incY = 3; 650 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 651 incX = 2; 652 incY = 3; 653 int dimX = 1 + (mBLASData.dN - 1) * incX; 654 int dimY = 1 + (mBLASData.dM - 1) * incY; 655 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 656 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 657 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n2); 658 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m2); 659 660 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 661 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 662 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N2); 663 verifyMatrix(vectorYRef, vectorYS); 664 665 mRS.finish(); 666 checkError(); 667 } 668 669 public void test_L2_DGBMV_Correctness() { 670 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 671 int incX = 1; 672 int incY = 1; 673 674 // Populate input allocations 675 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 676 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 677 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 678 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_dGBMV_A_mn); 679 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1); 680 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1); 681 682 // Test for the default case: NO_TRANS 683 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 684 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 685 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N); 686 verifyMatrix(vectorYRef, vectorYD); 687 688 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 689 trans = ScriptIntrinsicBLAS.TRANSPOSE; 690 // Reload vector Y, since it was overwritten by BLAS. 691 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1); 692 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 693 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 694 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 695 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_T); 696 verifyMatrix(vectorYRef, vectorXD); 697 698 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 699 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1); 700 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 701 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_H); 702 verifyMatrix(vectorYRef, vectorXD); 703 704 // Test for incX = 2 & incY = 3; 705 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 706 incX = 2; 707 incY = 3; 708 int dimX = 1 + (mBLASData.dN - 1) * incX; 709 int dimY = 1 + (mBLASData.dM - 1) * incY; 710 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 711 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 712 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n2); 713 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m2); 714 715 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 716 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 717 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N2); 718 verifyMatrix(vectorYRef, vectorYD); 719 720 mRS.finish(); 721 checkError(); 722 } 723 724 public void test_L2_CGBMV_Correctness() { 725 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 726 int incX = 1; 727 int incY = 1; 728 729 // Populate input allocations 730 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 731 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 732 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 733 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_cGBMV_A_mn); 734 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1); 735 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1); 736 737 // Test for the default case: NO_TRANS 738 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 739 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 740 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N); 741 verifyMatrix(vectorYRef, vectorYC); 742 743 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 744 trans = ScriptIntrinsicBLAS.TRANSPOSE; 745 // Reload vector Y, since it was overwritten by BLAS. 746 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1); 747 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 748 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 749 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 750 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_T); 751 verifyMatrix(vectorYRef, vectorXC); 752 753 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 754 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1); 755 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 756 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_H); 757 verifyMatrix(vectorYRef, vectorXC); 758 759 // Test for incX = 2 & incY = 3; 760 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 761 incX = 2; 762 incY = 3; 763 int dimX = 1 + (mBLASData.dN - 1) * incX; 764 int dimY = 1 + (mBLASData.dM - 1) * incY; 765 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 766 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 767 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n2); 768 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m2); 769 770 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 771 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 772 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N2); 773 verifyMatrix(vectorYRef, vectorYC); 774 775 mRS.finish(); 776 checkError(); 777 } 778 779 public void test_L2_ZGBMV_Correctness() { 780 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 781 int incX = 1; 782 int incY = 1; 783 784 // Populate input allocations 785 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 786 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 787 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 788 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_zGBMV_A_mn); 789 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1); 790 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1); 791 792 // Test for the default case: NO_TRANS 793 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 794 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 795 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N); 796 verifyMatrix(vectorYRef, vectorYZ); 797 798 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 799 trans = ScriptIntrinsicBLAS.TRANSPOSE; 800 // Reload vector Y, since it was overwritten by BLAS. 801 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1); 802 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 803 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 804 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 805 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_T); 806 verifyMatrix(vectorYRef, vectorXZ); 807 808 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 809 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1); 810 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY); 811 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_H); 812 verifyMatrix(vectorYRef, vectorXZ); 813 814 // Test for incX = 2 & incY = 3; 815 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 816 incX = 2; 817 incY = 3; 818 int dimX = 1 + (mBLASData.dN - 1) * incX; 819 int dimY = 1 + (mBLASData.dM - 1) * incY; 820 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 821 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 822 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n2); 823 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m2); 824 825 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 826 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 827 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N2); 828 verifyMatrix(vectorYRef, vectorYZ); 829 830 mRS.finish(); 831 checkError(); 832 } 833 834 835 private void xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 836 for (Allocation matA : mMatrix) { 837 for (Allocation vecX : mMatrix) { 838 if (!validateVecInput(vecX)) { 839 continue; 840 } 841 for (Allocation vecY : mMatrix) { 842 if (!validateVecInput(vecY)) { 843 continue; 844 } 845 Element elemA = matA.getType().getElement(); 846 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 847 try { 848 if (elemA.isCompatible(Element.F32_2(mRS))) { 849 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 850 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 851 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 852 } 853 } catch (RSRuntimeException e) { 854 fail("should NOT throw RSRuntimeException"); 855 } 856 } else { 857 try { 858 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 859 fail("should throw RSRuntimeException for CHEMV"); 860 } catch (RSRuntimeException e) { 861 } 862 try { 863 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 864 fail("should throw RSRuntimeException for ZHEMV"); 865 } catch (RSRuntimeException e) { 866 } 867 } 868 } 869 } 870 } 871 } 872 873 public void L2_xHEMV_API(ArrayList<Allocation> mMatrix) { 874 for (int Uplo : mUplo) { 875 for (int incX : mInc) { 876 xHEMV_API_test(Uplo, incX, incX, mMatrix); 877 } 878 } 879 } 880 881 public void test_L2_CHEMV_API() { 882 L2_xHEMV_API(mMatrixC); 883 } 884 885 public void test_L2_ZHEMV_API() { 886 L2_xHEMV_API(mMatrixZ); 887 } 888 889 public void test_L2_CHEMV_Correctness() { 890 int uplo = ScriptIntrinsicBLAS.UPPER; 891 int incX = 1; 892 int incY = 1; 893 894 // Populate input allocations 895 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 896 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 897 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 898 matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn); 899 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1); 900 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1); 901 902 // Test for the default case: 903 mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 904 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 905 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N); 906 verifyMatrix(vectorYRef, vectorYC); 907 908 // Test for incX = 2 & incY = 3; 909 incX = 2; 910 incY = 3; 911 int dimX = 1 + (mBLASData.dN - 1) * incX; 912 int dimY = 1 + (mBLASData.dN - 1) * incY; 913 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 914 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 915 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2); 916 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2); 917 918 mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 919 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 920 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2); 921 verifyMatrix(vectorYRef, vectorYC); 922 923 mRS.finish(); 924 checkError(); 925 } 926 927 public void test_L2_ZHEMV_Correctness() { 928 int uplo = ScriptIntrinsicBLAS.UPPER; 929 int incX = 1; 930 int incY = 1; 931 932 // Populate input allocations 933 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 934 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 935 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 936 matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn); 937 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1); 938 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1); 939 940 // Test for the default case: NO_TRANS 941 mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 942 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 943 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N); 944 verifyMatrix(vectorYRef, vectorYZ); 945 946 // Test for incX = 2 & incY = 3; 947 incX = 2; 948 incY = 3; 949 int dimX = 1 + (mBLASData.dN - 1) * incX; 950 int dimY = 1 + (mBLASData.dN - 1) * incY; 951 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 952 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 953 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2); 954 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2); 955 956 mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 957 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 958 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2); 959 verifyMatrix(vectorYRef, vectorYZ); 960 961 mRS.finish(); 962 checkError(); 963 } 964 965 966 967 private void xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) { 968 for (Allocation matA : mMatrix) { 969 for (Allocation vecX : mMatrix) { 970 if (!validateVecInput(vecX)) { 971 continue; 972 } 973 for (Allocation vecY : mMatrix) { 974 if (!validateVecInput(vecY)) { 975 continue; 976 } 977 Element elemA = matA.getType().getElement(); 978 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA) && K >= 0) { 979 try { 980 if (elemA.isCompatible(Element.F32_2(mRS))) { 981 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY); 982 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 983 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 984 } 985 } catch (RSRuntimeException e) { 986 fail("should NOT throw RSRuntimeException"); 987 } 988 } else { 989 try { 990 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY); 991 fail("should throw RSRuntimeException for CHBMV"); 992 } catch (RSRuntimeException e) { 993 } 994 try { 995 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 996 fail("should throw RSRuntimeException for ZHBMV"); 997 } catch (RSRuntimeException e) { 998 } 999 } 1000 } 1001 } 1002 } 1003 } 1004 1005 public void L2_xHBMV_API(ArrayList<Allocation> mMatrix) { 1006 for (int Uplo : mUplo) { 1007 for (int K : mK) { 1008 for (int incX : mInc) { 1009 xHBMV_API_test(Uplo, K, incX, incX, mMatrix); 1010 } 1011 } 1012 } 1013 } 1014 1015 public void test_L2_CHBMV_API() { 1016 L2_xHBMV_API(mMatrixC); 1017 } 1018 1019 public void test_L2_ZHBMV_API() { 1020 L2_xHBMV_API(mMatrixZ); 1021 } 1022 1023 public void test_L2_CHBMV_Correctness() { 1024 int uplo = ScriptIntrinsicBLAS.UPPER; 1025 int incX = 1; 1026 int incY = 1; 1027 1028 // Populate input allocations 1029 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 1030 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1031 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1032 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cHBMV_A_nn); 1033 vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n1); 1034 vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n1); 1035 1036 // Test for the default case: 1037 mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1038 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1039 vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N); 1040 verifyMatrix(vectorYRef, vectorYC); 1041 1042 // Test for incX = 2 & incY = 3; 1043 incX = 2; 1044 incY = 3; 1045 int dimX = 1 + (mBLASData.dN - 1) * incX; 1046 int dimY = 1 + (mBLASData.dN - 1) * incY; 1047 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1048 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1049 vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n2); 1050 vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n2); 1051 1052 mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1053 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1054 vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N2); 1055 verifyMatrix(vectorYRef, vectorYC); 1056 1057 mRS.finish(); 1058 checkError(); 1059 } 1060 1061 public void test_L2_ZHBMV_Correctness() { 1062 int uplo = ScriptIntrinsicBLAS.UPPER; 1063 int incX = 1; 1064 int incY = 1; 1065 1066 // Populate input allocations 1067 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 1068 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1069 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1070 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zHBMV_A_nn); 1071 vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n1); 1072 vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n1); 1073 1074 // Test for the default case: NO_TRANS 1075 mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1076 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1077 vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N); 1078 verifyMatrix(vectorYRef, vectorYZ); 1079 1080 // Test for incX = 2 & incY = 3; 1081 incX = 2; 1082 incY = 3; 1083 int dimX = 1 + (mBLASData.dN - 1) * incX; 1084 int dimY = 1 + (mBLASData.dN - 1) * incY; 1085 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1086 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1087 vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n2); 1088 vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n2); 1089 1090 mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1091 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1092 vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N2); 1093 verifyMatrix(vectorYRef, vectorYZ); 1094 1095 mRS.finish(); 1096 checkError(); 1097 } 1098 1099 1100 private void xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1101 for (Allocation matA : mMatrix) { 1102 for (Allocation vecX : mMatrix) { 1103 if (!validateVecInput(vecX)) { 1104 continue; 1105 } 1106 for (Allocation vecY : mMatrix) { 1107 if (!validateVecInput(vecY)) { 1108 continue; 1109 } 1110 Element elemA = matA.getType().getElement(); 1111 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 1112 try { 1113 if (elemA.isCompatible(Element.F32_2(mRS))) { 1114 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 1115 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 1116 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 1117 } 1118 } catch (RSRuntimeException e) { 1119 fail("should NOT throw RSRuntimeException"); 1120 } 1121 } else { 1122 try { 1123 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 1124 fail("should throw RSRuntimeException for CHPMV"); 1125 } catch (RSRuntimeException e) { 1126 } 1127 try { 1128 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 1129 fail("should throw RSRuntimeException for ZHPMV"); 1130 } catch (RSRuntimeException e) { 1131 } 1132 } 1133 } 1134 } 1135 } 1136 } 1137 1138 public void L2_xHPMV_API(ArrayList<Allocation> mMatrix) { 1139 for (int Uplo : mUplo) { 1140 for (int incX : mInc) { 1141 xHPMV_API_test(Uplo, incX, incX, mMatrix); 1142 } 1143 } 1144 } 1145 1146 public void test_L2_CHPMV_API() { 1147 L2_xHPMV_API(mMatrixC); 1148 } 1149 1150 public void test_L2_ZHPMV_API() { 1151 L2_xHPMV_API(mMatrixZ); 1152 } 1153 1154 public void test_L2_CHPMV_Correctness() { 1155 int uplo = ScriptIntrinsicBLAS.UPPER; 1156 int incX = 1; 1157 int incY = 1; 1158 1159 // Populate input allocations 1160 int N = mBLASData.dN; 1161 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 1162 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1163 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1164 matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn_pu); 1165 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1); 1166 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1); 1167 1168 // Test for the default case: 1169 mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1170 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1171 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N); 1172 verifyMatrix(vectorYRef, vectorYC); 1173 1174 // Test for incX = 2 & incY = 3; 1175 incX = 2; 1176 incY = 3; 1177 int dimX = 1 + (N - 1) * incX; 1178 int dimY = 1 + (N - 1) * incY; 1179 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1180 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1181 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2); 1182 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2); 1183 1184 mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1185 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1186 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2); 1187 verifyMatrix(vectorYRef, vectorYC); 1188 1189 mRS.finish(); 1190 checkError(); 1191 } 1192 1193 public void test_L2_ZHPMV_Correctness() { 1194 int uplo = ScriptIntrinsicBLAS.UPPER; 1195 int incX = 1; 1196 int incY = 1; 1197 1198 // Populate input allocations 1199 int N = mBLASData.dN; 1200 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 1201 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1202 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1203 matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn_pu); 1204 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1); 1205 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1); 1206 1207 // Test for the default case: NO_TRANS 1208 mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1209 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1210 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N); 1211 verifyMatrix(vectorYRef, vectorYZ); 1212 1213 // Test for incX = 2 & incY = 3; 1214 incX = 2; 1215 incY = 3; 1216 int dimX = 1 + (N - 1) * incX; 1217 int dimY = 1 + (N - 1) * incY; 1218 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1219 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1220 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2); 1221 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2); 1222 1223 mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1224 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1225 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2); 1226 verifyMatrix(vectorYRef, vectorYZ); 1227 1228 mRS.finish(); 1229 checkError(); 1230 } 1231 1232 1233 private boolean validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 1234 if (!validateUplo(Uplo)) { 1235 return false; 1236 } 1237 int N = A.getType().getY(); 1238 if (A.getType().getX() != N) { 1239 return false; 1240 } 1241 if (!A.getType().getElement().isCompatible(e) || 1242 !X.getType().getElement().isCompatible(e) || 1243 !Y.getType().getElement().isCompatible(e) ) { 1244 return false; 1245 } 1246 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1247 return false; 1248 } 1249 1250 if (incX <= 0 || incY <= 0) { 1251 return false; 1252 } 1253 int expectedXDim = 1 + (N - 1) * incX; 1254 if (X.getType().getX() != expectedXDim) { 1255 return false; 1256 } 1257 int expectedYDim = 1 + (N - 1) * incY; 1258 if (Y.getType().getX() != expectedYDim) { 1259 return false; 1260 } 1261 return true; 1262 } 1263 1264 private void xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1265 for (Allocation matA : mMatrix) { 1266 for (Allocation vecX : mMatrix) { 1267 if (!validateVecInput(vecX)) { 1268 continue; 1269 } 1270 for (Allocation vecY : mMatrix) { 1271 if (!validateVecInput(vecY)) { 1272 continue; 1273 } 1274 Element elemA = matA.getType().getElement(); 1275 if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) { 1276 try { 1277 if (elemA.isCompatible(Element.F32(mRS))) { 1278 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1279 } else if (elemA.isCompatible(Element.F64(mRS))) { 1280 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1281 } 1282 } catch (RSRuntimeException e) { 1283 fail("should NOT throw RSRuntimeException"); 1284 } 1285 } else { 1286 try { 1287 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1288 fail("should throw RSRuntimeException for SSYMV"); 1289 } catch (RSRuntimeException e) { 1290 } 1291 try { 1292 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1293 fail("should throw RSRuntimeException for DSYMV"); 1294 } catch (RSRuntimeException e) { 1295 } 1296 } 1297 } 1298 } 1299 } 1300 } 1301 1302 public void L2_xSYMV_API(ArrayList<Allocation> mMatrix) { 1303 for (int Uplo : mUplo) { 1304 for (int incX : mInc) { 1305 xSYMV_API_test(Uplo, incX, incX, mMatrix); 1306 } 1307 } 1308 } 1309 1310 public void test_L2_SSYMV_API() { 1311 L2_xSYMV_API(mMatrixS); 1312 } 1313 1314 public void test_L2_DSYMV_API() { 1315 L2_xSYMV_API(mMatrixD); 1316 } 1317 1318 public void test_L2_SSYMV_Correctness() { 1319 int uplo = ScriptIntrinsicBLAS.UPPER; 1320 int incX = 1; 1321 int incY = 1; 1322 1323 // Populate input allocations 1324 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1325 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1326 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1327 matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn); 1328 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1); 1329 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1); 1330 1331 // Test for the default case: 1332 mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1333 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1334 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N); 1335 verifyMatrix(vectorYRef, vectorYS); 1336 1337 // Test for incX = 2 & incY = 3; 1338 incX = 2; 1339 incY = 3; 1340 int dimX = 1 + (mBLASData.dN - 1) * incX; 1341 int dimY = 1 + (mBLASData.dN - 1) * incY; 1342 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1343 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1344 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2); 1345 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2); 1346 1347 mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1348 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1349 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2); 1350 verifyMatrix(vectorYRef, vectorYS); 1351 1352 mRS.finish(); 1353 checkError(); 1354 } 1355 1356 public void test_L2_DSYMV_Correctness() { 1357 int uplo = ScriptIntrinsicBLAS.UPPER; 1358 int incX = 1; 1359 int incY = 1; 1360 1361 // Populate input allocations 1362 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1363 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1364 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1365 matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn); 1366 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1); 1367 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1); 1368 1369 // Test for the default case: 1370 mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1371 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1372 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N); 1373 verifyMatrix(vectorYRef, vectorYD); 1374 1375 // Test for incX = 2 & incY = 3; 1376 incX = 2; 1377 incY = 3; 1378 int dimX = 1 + (mBLASData.dN - 1) * incX; 1379 int dimY = 1 + (mBLASData.dN - 1) * incY; 1380 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1381 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1382 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2); 1383 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2); 1384 1385 mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1386 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1387 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2); 1388 verifyMatrix(vectorYRef, vectorYD); 1389 1390 mRS.finish(); 1391 checkError(); 1392 } 1393 1394 1395 1396 private void xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) { 1397 for (Allocation matA : mMatrix) { 1398 for (Allocation vecX : mMatrix) { 1399 if (!validateVecInput(vecX)) { 1400 continue; 1401 } 1402 for (Allocation vecY : mMatrix) { 1403 if (!validateVecInput(vecY)) { 1404 continue; 1405 } 1406 Element elemA = matA.getType().getElement(); 1407 if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY) && K >= 0) { 1408 try { 1409 if (elemA.isCompatible(Element.F32(mRS))) { 1410 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY); 1411 } else if (elemA.isCompatible(Element.F64(mRS))) { 1412 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY); 1413 } 1414 } catch (RSRuntimeException e) { 1415 fail("should NOT throw RSRuntimeException"); 1416 } 1417 } else { 1418 try { 1419 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY); 1420 fail("should throw RSRuntimeException for SSBMV"); 1421 } catch (RSRuntimeException e) { 1422 } 1423 try { 1424 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY); 1425 fail("should throw RSRuntimeException for DSBMV"); 1426 } catch (RSRuntimeException e) { 1427 } 1428 } 1429 } 1430 } 1431 } 1432 } 1433 1434 public void L2_xSBMV_API(ArrayList<Allocation> mMatrix) { 1435 for (int Uplo : mUplo) { 1436 for (int K : mK) { 1437 for (int incX : mInc) { 1438 xSBMV_API_test(Uplo, K, incX, incX, mMatrix); 1439 } 1440 } 1441 } 1442 } 1443 1444 public void test_L2_SSBMV_API() { 1445 L2_xSBMV_API(mMatrixS); 1446 } 1447 1448 public void test_L2_DSBMV_API() { 1449 L2_xSBMV_API(mMatrixD); 1450 } 1451 1452 public void test_L2_SSBMV_Correctness() { 1453 int uplo = ScriptIntrinsicBLAS.UPPER; 1454 int incX = 1; 1455 int incY = 1; 1456 1457 // Populate input allocations 1458 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1459 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1460 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1461 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sSBMV_A_nn); 1462 vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n1); 1463 vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n1); 1464 1465 // Test for the default case: 1466 mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1467 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1468 vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N); 1469 verifyMatrix(vectorYRef, vectorYS); 1470 1471 // Test for incX = 2 & incY = 3; 1472 incX = 2; 1473 incY = 3; 1474 int dimX = 1 + (mBLASData.dN - 1) * incX; 1475 int dimY = 1 + (mBLASData.dN - 1) * incY; 1476 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1477 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1478 vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n2); 1479 vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n2); 1480 1481 mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1482 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1483 vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N2); 1484 verifyMatrix(vectorYRef, vectorYS); 1485 1486 mRS.finish(); 1487 checkError(); 1488 } 1489 1490 public void test_L2_DSBMV_Correctness() { 1491 int uplo = ScriptIntrinsicBLAS.UPPER; 1492 int incX = 1; 1493 int incY = 1; 1494 1495 // Populate input allocations 1496 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1497 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1498 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1499 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dSBMV_A_nn); 1500 vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n1); 1501 vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n1); 1502 1503 // Test for the default case: 1504 mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1505 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1506 vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N); 1507 verifyMatrix(vectorYRef, vectorYD); 1508 1509 // Test for incX = 2 & incY = 3; 1510 incX = 2; 1511 incY = 3; 1512 int dimX = 1 + (mBLASData.dN - 1) * incX; 1513 int dimY = 1 + (mBLASData.dN - 1) * incY; 1514 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1515 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1516 vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n2); 1517 vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n2); 1518 1519 mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1520 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1521 vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N2); 1522 verifyMatrix(vectorYRef, vectorYD); 1523 1524 mRS.finish(); 1525 checkError(); 1526 } 1527 1528 1529 private boolean validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1530 if (!validateUplo(Uplo)) { 1531 return false; 1532 } 1533 if (!Ap.getType().getElement().isCompatible(e) || 1534 !X.getType().getElement().isCompatible(e) || 1535 !Y.getType().getElement().isCompatible(e)) { 1536 return false; 1537 } 1538 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1539 return false; 1540 } 1541 1542 if (Ap.getType().getY() > 1) { 1543 return false; 1544 } 1545 1546 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1547 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1548 return false; 1549 } 1550 if (incX <= 0 || incY <= 0) { 1551 return false; 1552 } 1553 int expectedXDim = 1 + (N - 1) * incX; 1554 if (X.getType().getX() != expectedXDim) { 1555 return false; 1556 } 1557 int expectedYDim = 1 + (N - 1) * incY; 1558 if (Y.getType().getX() != expectedYDim) { 1559 return false; 1560 } 1561 1562 return true; 1563 } 1564 1565 private void xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1566 for (Allocation matA : mMatrix) { 1567 for (Allocation vecX : mMatrix) { 1568 if (!validateVecInput(vecX)) { 1569 continue; 1570 } 1571 for (Allocation vecY : mMatrix) { 1572 if (!validateVecInput(vecY)) { 1573 continue; 1574 } 1575 Element elemA = matA.getType().getElement(); 1576 if (validateSPMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) { 1577 try { 1578 if (elemA.isCompatible(Element.F32(mRS))) { 1579 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1580 } else if (elemA.isCompatible(Element.F64(mRS))) { 1581 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1582 } 1583 } catch (RSRuntimeException e) { 1584 fail("should NOT throw RSRuntimeException"); 1585 } 1586 } else { 1587 try { 1588 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1589 fail("should throw RSRuntimeException for SSPMV"); 1590 } catch (RSRuntimeException e) { 1591 } 1592 try { 1593 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1594 fail("should throw RSRuntimeException for DSPMV"); 1595 } catch (RSRuntimeException e) { 1596 } 1597 } 1598 } 1599 } 1600 } 1601 } 1602 1603 public void L2_xSPMV_API(ArrayList<Allocation> mMatrix) { 1604 for (int Uplo : mUplo) { 1605 for (int incX : mInc) { 1606 xSPMV_API_test(Uplo, incX, incX, mMatrix); 1607 } 1608 } 1609 } 1610 1611 public void test_L2_SSPMV_API() { 1612 L2_xSPMV_API(mMatrixS); 1613 } 1614 1615 public void test_L2_DSPMV_API() { 1616 L2_xSPMV_API(mMatrixD); 1617 } 1618 1619 public void test_L2_SSPMV_Correctness() { 1620 int uplo = ScriptIntrinsicBLAS.UPPER; 1621 int incX = 1; 1622 int incY = 1; 1623 1624 // Populate input allocations 1625 int N = mBLASData.dN; 1626 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 1627 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1628 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1629 matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn_pu); 1630 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1); 1631 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1); 1632 1633 // Test for the default case: 1634 mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1635 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1636 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N); 1637 verifyMatrix(vectorYRef, vectorYS); 1638 1639 // Test for incX = 2 & incY = 3; 1640 incX = 2; 1641 incY = 3; 1642 int dimX = 1 + (N - 1) * incX; 1643 int dimY = 1 + (N - 1) * incY; 1644 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1645 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1646 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2); 1647 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2); 1648 1649 mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1650 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1651 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2); 1652 verifyMatrix(vectorYRef, vectorYS); 1653 1654 mRS.finish(); 1655 checkError(); 1656 } 1657 1658 public void test_L2_DSPMV_Correctness() { 1659 int uplo = ScriptIntrinsicBLAS.UPPER; 1660 int incX = 1; 1661 int incY = 1; 1662 1663 // Populate input allocations 1664 int N = mBLASData.dN; 1665 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 1666 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1667 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1668 matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn_pu); 1669 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1); 1670 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1); 1671 1672 // Test for the default case: 1673 mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1674 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1675 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N); 1676 verifyMatrix(vectorYRef, vectorYD); 1677 1678 // Test for incX = 2 & incY = 3; 1679 incX = 2; 1680 incY = 3; 1681 int dimX = 1 + (N - 1) * incX; 1682 int dimY = 1 + (N - 1) * incY; 1683 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1684 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1685 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2); 1686 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2); 1687 1688 mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1689 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1690 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2); 1691 verifyMatrix(vectorYRef, vectorYD); 1692 1693 mRS.finish(); 1694 checkError(); 1695 } 1696 1697 1698 1699 private boolean validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX) { 1700 if (!validateUplo(Uplo)) { 1701 return false; 1702 } 1703 if (!validateTranspose(TransA)) { 1704 return false; 1705 } 1706 if (!validateDiag(Diag)) { 1707 return false; 1708 } 1709 int N = A.getType().getY(); 1710 if (A.getType().getX() != N) { 1711 return false; 1712 } 1713 if (!A.getType().getElement().isCompatible(e) || 1714 !X.getType().getElement().isCompatible(e)) { 1715 return false; 1716 } 1717 if (X.getType().getY() > 1) { 1718 return false; 1719 } 1720 1721 if (incX <= 0) { 1722 return false; 1723 } 1724 int expectedXDim = 1 + (N - 1) * incX; 1725 if (X.getType().getX() != expectedXDim) { 1726 return false; 1727 } 1728 return true; 1729 } 1730 1731 private void xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 1732 for (Allocation matA : mMatrix) { 1733 for (Allocation vecX : mMatrix) { 1734 if (!validateVecInput(vecX)) { 1735 continue; 1736 } 1737 Element elemA = matA.getType().getElement(); 1738 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 1739 try { 1740 if (elemA.isCompatible(Element.F32(mRS))) { 1741 mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX); 1742 } else if (elemA.isCompatible(Element.F64(mRS))) { 1743 mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1744 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 1745 mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1746 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 1747 mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1748 } 1749 } catch (RSRuntimeException e) { 1750 fail("should NOT throw RSRuntimeException"); 1751 } 1752 } else { 1753 try { 1754 mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX); 1755 fail("should throw RSRuntimeException for STRMV"); 1756 } catch (RSRuntimeException e) { 1757 } 1758 try { 1759 mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1760 fail("should throw RSRuntimeException for DTRMV"); 1761 } catch (RSRuntimeException e) { 1762 } 1763 try { 1764 mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1765 fail("should throw RSRuntimeException for CTRMV"); 1766 } catch (RSRuntimeException e) { 1767 } 1768 try { 1769 mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1770 fail("should throw RSRuntimeException for ZTRMV"); 1771 } catch (RSRuntimeException e) { 1772 } 1773 } 1774 } 1775 } 1776 } 1777 1778 public void L2_xTRMV_API(ArrayList<Allocation> mMatrix) { 1779 for (int Uplo : mUplo) { 1780 for (int TransA : mTranspose) { 1781 for (int Diag : mDiag) { 1782 for (int incX : mInc) { 1783 xTRMV_API_test(Uplo, TransA, Diag, incX, mMatrix); 1784 } 1785 } 1786 } 1787 } 1788 } 1789 1790 public void test_L2_STRMV_API() { 1791 L2_xTRMV_API(mMatrixS); 1792 } 1793 1794 public void test_L2_DTRMV_API() { 1795 L2_xTRMV_API(mMatrixD); 1796 } 1797 1798 public void test_L2_CTRMV_API() { 1799 L2_xTRMV_API(mMatrixC); 1800 } 1801 1802 public void test_L2_ZTRMV_API() { 1803 L2_xTRMV_API(mMatrixZ); 1804 } 1805 1806 public void test_L2_STRMV_Correctness() { 1807 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1808 int uplo = ScriptIntrinsicBLAS.UPPER; 1809 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1810 int incX = 1; 1811 1812 // Populate input allocations 1813 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1814 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1815 matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn); 1816 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1817 1818 // Test for the default case: NO_TRANS 1819 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1820 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1821 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN); 1822 verifyMatrix(vectorXRef, vectorXS); 1823 1824 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1825 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1826 // Reload vector X, since it was overwritten by BLAS. 1827 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1828 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1829 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT); 1830 verifyMatrix(vectorXRef, vectorXS); 1831 1832 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1833 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1834 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1835 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH); 1836 verifyMatrix(vectorXRef, vectorXS); 1837 1838 // Test for incX = 2; 1839 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1840 incX = 2; 1841 int dimX = 1 + (mBLASData.dN - 1) * incX; 1842 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1843 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2); 1844 1845 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1846 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1847 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2); 1848 verifyMatrix(vectorXRef, vectorXS); 1849 1850 mRS.finish(); 1851 checkError(); 1852 } 1853 1854 public void test_L2_DTRMV_Correctness() { 1855 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1856 int uplo = ScriptIntrinsicBLAS.UPPER; 1857 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1858 int incX = 1; 1859 1860 // Populate input allocations 1861 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1862 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1863 matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn); 1864 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1865 1866 // Test for the default case: NO_TRANS 1867 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1868 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1869 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN); 1870 verifyMatrix(vectorXRef, vectorXD); 1871 1872 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1873 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1874 // Reload vector X, since it was overwritten by BLAS. 1875 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1876 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1877 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT); 1878 verifyMatrix(vectorXRef, vectorXD); 1879 1880 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1881 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1882 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1883 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH); 1884 verifyMatrix(vectorXRef, vectorXD); 1885 1886 // Test for incX = 2; 1887 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1888 incX = 2; 1889 int dimX = 1 + (mBLASData.dN - 1) * incX; 1890 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1891 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2); 1892 1893 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1894 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1895 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2); 1896 verifyMatrix(vectorXRef, vectorXD); 1897 1898 mRS.finish(); 1899 checkError(); 1900 } 1901 1902 public void test_L2_CTRMV_Correctness() { 1903 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1904 int uplo = ScriptIntrinsicBLAS.UPPER; 1905 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1906 int incX = 1; 1907 1908 // Populate input allocations 1909 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 1910 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1911 matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn); 1912 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1913 1914 // Test for the default case: NO_TRANS 1915 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1916 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1917 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN); 1918 verifyMatrix(vectorXRef, vectorXC); 1919 1920 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1921 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1922 // Reload vector X, since it was overwritten by BLAS. 1923 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1924 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1925 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT); 1926 verifyMatrix(vectorXRef, vectorXC); 1927 1928 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1929 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1930 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1931 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH); 1932 verifyMatrix(vectorXRef, vectorXC); 1933 1934 // Test for incX = 2; 1935 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1936 incX = 2; 1937 int dimX = 1 + (mBLASData.dN - 1) * incX; 1938 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1939 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2); 1940 1941 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1942 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1943 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2); 1944 verifyMatrix(vectorXRef, vectorXC); 1945 1946 mRS.finish(); 1947 checkError(); 1948 } 1949 1950 public void test_L2_ZTRMV_Correctness() { 1951 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1952 int uplo = ScriptIntrinsicBLAS.UPPER; 1953 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1954 int incX = 1; 1955 1956 // Populate input allocations 1957 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 1958 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1959 matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn); 1960 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1961 1962 // Test for the default case: NO_TRANS 1963 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1964 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1965 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN); 1966 verifyMatrix(vectorXRef, vectorXZ); 1967 1968 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1969 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1970 // Reload vector X, since it was overwritten by BLAS. 1971 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1972 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1973 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT); 1974 verifyMatrix(vectorXRef, vectorXZ); 1975 1976 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1977 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1978 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1979 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH); 1980 verifyMatrix(vectorXRef, vectorXZ); 1981 1982 // Test for incX = 2; 1983 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1984 incX = 2; 1985 int dimX = 1 + (mBLASData.dN - 1) * incX; 1986 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1987 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2); 1988 1989 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1990 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1991 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2); 1992 verifyMatrix(vectorXRef, vectorXZ); 1993 1994 mRS.finish(); 1995 checkError(); 1996 } 1997 1998 1999 2000 private void xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) { 2001 for (Allocation matA : mMatrix) { 2002 for (Allocation vecX : mMatrix) { 2003 Element elemA = matA.getType().getElement(); 2004 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) { 2005 try { 2006 if (elemA.isCompatible(Element.F32(mRS))) { 2007 mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2008 } else if (elemA.isCompatible(Element.F64(mRS))) { 2009 mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2010 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2011 mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2012 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2013 mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2014 } 2015 } catch (RSRuntimeException e) { 2016 fail("should NOT throw RSRuntimeException"); 2017 } 2018 } else { 2019 try { 2020 mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2021 fail("should throw RSRuntimeException for STBMV"); 2022 } catch (RSRuntimeException e) { 2023 } 2024 try { 2025 mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2026 fail("should throw RSRuntimeException for DTBMV"); 2027 } catch (RSRuntimeException e) { 2028 } 2029 try { 2030 mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2031 fail("should throw RSRuntimeException for CTBMV"); 2032 } catch (RSRuntimeException e) { 2033 } 2034 try { 2035 mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2036 fail("should throw RSRuntimeException for ZTBMV"); 2037 } catch (RSRuntimeException e) { 2038 } 2039 } 2040 } 2041 } 2042 } 2043 2044 public void L2_xTBMV_API(ArrayList<Allocation> mMatrix) { 2045 for (int Uplo : mUplo) { 2046 for (int TransA : mTranspose) { 2047 for (int Diag : mDiag) { 2048 for (int K : mK) { 2049 for (int incX : mInc) { 2050 xTBMV_API_test(Uplo, TransA, Diag, K, incX, mMatrix); 2051 } 2052 } 2053 } 2054 } 2055 } 2056 } 2057 2058 public void test_L2_STBMV_API() { 2059 L2_xTBMV_API(mMatrixS); 2060 } 2061 2062 public void test_L2_DTBMV_API() { 2063 L2_xTBMV_API(mMatrixD); 2064 } 2065 2066 public void test_L2_CTBMV_API() { 2067 L2_xTBMV_API(mMatrixC); 2068 } 2069 2070 public void test_L2_ZTBMV_API() { 2071 L2_xTBMV_API(mMatrixZ); 2072 } 2073 2074 public void test_L2_STBMV_Correctness() { 2075 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2076 int uplo = ScriptIntrinsicBLAS.UPPER; 2077 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2078 int incX = 1; 2079 2080 // Populate input allocations 2081 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2082 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2083 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBMV_A_nn); 2084 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2085 2086 // Test for the default case: NO_TRANS 2087 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2088 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2089 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN); 2090 verifyMatrix(vectorXRef, vectorXS); 2091 2092 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2093 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2094 // Reload vector X, since it was overwritten by BLAS. 2095 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2096 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2097 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UT); 2098 verifyMatrix(vectorXRef, vectorXS); 2099 2100 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2101 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2102 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2103 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UH); 2104 verifyMatrix(vectorXRef, vectorXS); 2105 2106 // Test for incX = 2; 2107 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2108 incX = 2; 2109 int dimX = 1 + (mBLASData.dN - 1) * incX; 2110 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2111 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n2); 2112 2113 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2114 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2115 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN2); 2116 verifyMatrix(vectorXRef, vectorXS); 2117 2118 mRS.finish(); 2119 checkError(); 2120 } 2121 2122 public void test_L2_DTBMV_Correctness() { 2123 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2124 int uplo = ScriptIntrinsicBLAS.UPPER; 2125 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2126 int incX = 1; 2127 2128 // Populate input allocations 2129 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2130 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2131 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBMV_A_nn); 2132 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2133 2134 // Test for the default case: NO_TRANS 2135 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2136 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2137 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN); 2138 verifyMatrix(vectorXRef, vectorXD); 2139 2140 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2141 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2142 // Reload vector X, since it was overwritten by BLAS. 2143 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2144 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2145 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UT); 2146 verifyMatrix(vectorXRef, vectorXD); 2147 2148 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2149 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2150 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2151 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UH); 2152 verifyMatrix(vectorXRef, vectorXD); 2153 2154 // Test for incX = 2; 2155 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2156 incX = 2; 2157 int dimX = 1 + (mBLASData.dN - 1) * incX; 2158 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2159 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n2); 2160 2161 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2162 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2163 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN2); 2164 verifyMatrix(vectorXRef, vectorXD); 2165 2166 mRS.finish(); 2167 checkError(); 2168 } 2169 2170 public void test_L2_CTBMV_Correctness() { 2171 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2172 int uplo = ScriptIntrinsicBLAS.UPPER; 2173 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2174 int incX = 1; 2175 2176 // Populate input allocations 2177 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 2178 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2179 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBMV_A_nn); 2180 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2181 2182 // Test for the default case: NO_TRANS 2183 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2184 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2185 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN); 2186 verifyMatrix(vectorXRef, vectorXC); 2187 2188 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2189 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2190 // Reload vector X, since it was overwritten by BLAS. 2191 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2192 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2193 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UT); 2194 verifyMatrix(vectorXRef, vectorXC); 2195 2196 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2197 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2198 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2199 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UH); 2200 verifyMatrix(vectorXRef, vectorXC); 2201 2202 // Test for incX = 2; 2203 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2204 incX = 2; 2205 int dimX = 1 + (mBLASData.dN - 1) * incX; 2206 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2207 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n2); 2208 2209 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2210 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2211 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN2); 2212 verifyMatrix(vectorXRef, vectorXC); 2213 2214 mRS.finish(); 2215 checkError(); 2216 } 2217 2218 public void test_L2_ZTBMV_Correctness() { 2219 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2220 int uplo = ScriptIntrinsicBLAS.UPPER; 2221 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2222 int incX = 1; 2223 2224 // Populate input allocations 2225 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 2226 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2227 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBMV_A_nn); 2228 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2229 2230 // Test for the default case: NO_TRANS 2231 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2232 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2233 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN); 2234 verifyMatrix(vectorXRef, vectorXZ); 2235 2236 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2237 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2238 // Reload vector X, since it was overwritten by BLAS. 2239 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2240 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2241 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UT); 2242 verifyMatrix(vectorXRef, vectorXZ); 2243 2244 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2245 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2246 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2247 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UH); 2248 verifyMatrix(vectorXRef, vectorXZ); 2249 2250 // Test for incX = 2; 2251 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2252 incX = 2; 2253 int dimX = 1 + (mBLASData.dN - 1) * incX; 2254 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2255 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n2); 2256 2257 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2258 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2259 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN2); 2260 verifyMatrix(vectorXRef, vectorXZ); 2261 2262 mRS.finish(); 2263 checkError(); 2264 } 2265 2266 2267 private boolean validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX) { 2268 if (!validateUplo(Uplo)) { 2269 return false; 2270 } 2271 if (!validateTranspose(TransA)) { 2272 return false; 2273 } 2274 if (!validateDiag(Diag)) { 2275 return false; 2276 } 2277 if (!Ap.getType().getElement().isCompatible(e) || 2278 !X.getType().getElement().isCompatible(e)) { 2279 return false; 2280 } 2281 if (X.getType().getY() > 1) { 2282 return false; 2283 } 2284 2285 if (Ap.getType().getY() > 1) { 2286 return false; 2287 } 2288 2289 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 2290 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 2291 return false; 2292 } 2293 if (incX <= 0) { 2294 return false; 2295 } 2296 int expectedXDim = 1 + (N - 1) * incX; 2297 if (X.getType().getX() != expectedXDim) { 2298 return false; 2299 } 2300 2301 return true; 2302 } 2303 2304 private void xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 2305 for (Allocation matA : mMatrix) { 2306 for (Allocation vecX : mMatrix) { 2307 if (!validateVecInput(vecX)) { 2308 continue; 2309 } 2310 Element elemA = matA.getType().getElement(); 2311 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 2312 try { 2313 if (elemA.isCompatible(Element.F32(mRS))) { 2314 mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX); 2315 } else if (elemA.isCompatible(Element.F64(mRS))) { 2316 mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2317 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2318 mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2319 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2320 mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2321 } 2322 } catch (RSRuntimeException e) { 2323 fail("should NOT throw RSRuntimeException"); 2324 } 2325 } else { 2326 try { 2327 mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX); 2328 fail("should throw RSRuntimeException for STPMV"); 2329 } catch (RSRuntimeException e) { 2330 } 2331 try { 2332 mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2333 fail("should throw RSRuntimeException for DTPMV"); 2334 } catch (RSRuntimeException e) { 2335 } 2336 try { 2337 mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2338 fail("should throw RSRuntimeException for CTPMV"); 2339 } catch (RSRuntimeException e) { 2340 } 2341 try { 2342 mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2343 fail("should throw RSRuntimeException for ZTPMV"); 2344 } catch (RSRuntimeException e) { 2345 } 2346 } 2347 } 2348 } 2349 } 2350 2351 public void L2_xTPMV_API(ArrayList<Allocation> mMatrix) { 2352 for (int Uplo : mUplo) { 2353 for (int TransA : mTranspose) { 2354 for (int Diag : mDiag) { 2355 for (int incX : mInc) { 2356 xTPMV_API_test(Uplo, TransA, Diag, incX, mMatrix); 2357 } 2358 } 2359 } 2360 } 2361 } 2362 2363 public void test_L2_STPMV_API() { 2364 L2_xTPMV_API(mMatrixS); 2365 } 2366 2367 public void test_L2_DTPMV_API() { 2368 L2_xTPMV_API(mMatrixD); 2369 } 2370 2371 public void test_L2_CTPMV_API() { 2372 L2_xTPMV_API(mMatrixC); 2373 } 2374 2375 public void test_L2_ZTPMV_API() { 2376 L2_xTPMV_API(mMatrixZ); 2377 } 2378 2379 public void test_L2_STPMV_Correctness() { 2380 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2381 int uplo = ScriptIntrinsicBLAS.UPPER; 2382 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2383 int incX = 1; 2384 2385 // Populate input allocations 2386 int N = mBLASData.dN; 2387 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 2388 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 2389 matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn_pu); 2390 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2391 2392 // Test for the default case: NO_TRANS 2393 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2394 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 2395 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN); 2396 verifyMatrix(vectorXRef, vectorXS); 2397 2398 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2399 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2400 // Reload vector X, since it was overwritten by BLAS. 2401 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2402 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2403 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT); 2404 verifyMatrix(vectorXRef, vectorXS); 2405 2406 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2407 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2408 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2409 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH); 2410 verifyMatrix(vectorXRef, vectorXS); 2411 2412 // Test for incX = 2; 2413 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2414 incX = 2; 2415 int dimX = 1 + (N - 1) * incX; 2416 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2417 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2); 2418 2419 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2420 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2421 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2); 2422 verifyMatrix(vectorXRef, vectorXS); 2423 2424 mRS.finish(); 2425 checkError(); 2426 } 2427 2428 public void test_L2_DTPMV_Correctness() { 2429 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2430 int uplo = ScriptIntrinsicBLAS.UPPER; 2431 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2432 int incX = 1; 2433 2434 // Populate input allocations 2435 int N = mBLASData.dN; 2436 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 2437 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 2438 matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn_pu); 2439 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2440 2441 // Test for the default case: NO_TRANS 2442 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2443 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 2444 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN); 2445 verifyMatrix(vectorXRef, vectorXD); 2446 2447 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2448 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2449 // Reload vector X, since it was overwritten by BLAS. 2450 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2451 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2452 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT); 2453 verifyMatrix(vectorXRef, vectorXD); 2454 2455 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2456 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2457 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2458 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH); 2459 verifyMatrix(vectorXRef, vectorXD); 2460 2461 // Test for incX = 2; 2462 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2463 incX = 2; 2464 int dimX = 1 + (N - 1) * incX; 2465 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2466 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2); 2467 2468 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2469 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2470 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2); 2471 verifyMatrix(vectorXRef, vectorXD); 2472 2473 mRS.finish(); 2474 checkError(); 2475 } 2476 2477 public void test_L2_CTPMV_Correctness() { 2478 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2479 int uplo = ScriptIntrinsicBLAS.UPPER; 2480 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2481 int incX = 1; 2482 2483 // Populate input allocations 2484 int N = mBLASData.dN; 2485 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 2486 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 2487 matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn_pu); 2488 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2489 2490 // Test for the default case: NO_TRANS 2491 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2492 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 2493 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN); 2494 verifyMatrix(vectorXRef, vectorXC); 2495 2496 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2497 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2498 // Reload vector X, since it was overwritten by BLAS. 2499 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2500 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2501 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT); 2502 verifyMatrix(vectorXRef, vectorXC); 2503 2504 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2505 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2506 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2507 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH); 2508 verifyMatrix(vectorXRef, vectorXC); 2509 2510 // Test for incX = 2; 2511 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2512 incX = 2; 2513 int dimX = 1 + (N - 1) * incX; 2514 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2515 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2); 2516 2517 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2518 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2519 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2); 2520 verifyMatrix(vectorXRef, vectorXC); 2521 2522 mRS.finish(); 2523 checkError(); 2524 } 2525 2526 public void test_L2_ZTPMV_Correctness() { 2527 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2528 int uplo = ScriptIntrinsicBLAS.UPPER; 2529 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2530 int incX = 1; 2531 2532 // Populate input allocations 2533 int N = mBLASData.dN; 2534 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 2535 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 2536 matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn_pu); 2537 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2538 2539 // Test for the default case: NO_TRANS 2540 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2541 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 2542 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN); 2543 verifyMatrix(vectorXRef, vectorXZ); 2544 2545 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2546 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2547 // Reload vector X, since it was overwritten by BLAS. 2548 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2549 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2550 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT); 2551 verifyMatrix(vectorXRef, vectorXZ); 2552 2553 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2554 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2555 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2556 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH); 2557 verifyMatrix(vectorXRef, vectorXZ); 2558 2559 // Test for incX = 2; 2560 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2561 incX = 2; 2562 int dimX = 1 + (N - 1) * incX; 2563 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2564 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2); 2565 2566 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2567 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2568 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2); 2569 verifyMatrix(vectorXRef, vectorXZ); 2570 2571 mRS.finish(); 2572 checkError(); 2573 } 2574 2575 2576 private void xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 2577 for (Allocation matA : mMatrix) { 2578 for (Allocation vecX : mMatrix) { 2579 if (!validateVecInput(vecX)) { 2580 continue; 2581 } 2582 Element elemA = matA.getType().getElement(); 2583 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 2584 try { 2585 if (elemA.isCompatible(Element.F32(mRS))) { 2586 mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX); 2587 } else if (elemA.isCompatible(Element.F64(mRS))) { 2588 mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2589 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2590 mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2591 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2592 mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2593 } 2594 } catch (RSRuntimeException e) { 2595 fail("should NOT throw RSRuntimeException"); 2596 } 2597 } else { 2598 try { 2599 mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX); 2600 fail("should throw RSRuntimeException for STRSV"); 2601 } catch (RSRuntimeException e) { 2602 } 2603 try { 2604 mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2605 fail("should throw RSRuntimeException for DTRSV"); 2606 } catch (RSRuntimeException e) { 2607 } 2608 try { 2609 mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2610 fail("should throw RSRuntimeException for CTRSV"); 2611 } catch (RSRuntimeException e) { 2612 } 2613 try { 2614 mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2615 fail("should throw RSRuntimeException for ZTRSV"); 2616 } catch (RSRuntimeException e) { 2617 } 2618 } 2619 } 2620 } 2621 } 2622 2623 public void L2_xTRSV_API(ArrayList<Allocation> mMatrix) { 2624 for (int Uplo : mUplo) { 2625 for (int TransA : mTranspose) { 2626 for (int Diag : mDiag) { 2627 for (int incX : mInc) { 2628 xTRSV_API_test(Uplo, TransA, Diag, incX, mMatrix); 2629 } 2630 } 2631 } 2632 } 2633 } 2634 2635 public void test_L2_STRSV_API() { 2636 L2_xTRSV_API(mMatrixS); 2637 } 2638 2639 public void test_L2_DTRSV_API() { 2640 L2_xTRSV_API(mMatrixD); 2641 } 2642 2643 public void test_L2_CTRSV_API() { 2644 L2_xTRSV_API(mMatrixC); 2645 } 2646 2647 public void test_L2_ZTRSV_API() { 2648 L2_xTRSV_API(mMatrixZ); 2649 } 2650 2651 public void test_L2_STRSV_Correctness() { 2652 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2653 int uplo = ScriptIntrinsicBLAS.UPPER; 2654 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2655 int incX = 1; 2656 2657 // Populate input allocations 2658 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2659 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2660 matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn); 2661 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2662 2663 // Test for the default case: NO_TRANS 2664 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2665 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2666 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN); 2667 verifyMatrix(vectorXRef, vectorXS); 2668 2669 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2670 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2671 // Reload vector X, since it was overwritten by BLAS. 2672 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2673 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2674 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT); 2675 verifyMatrix(vectorXRef, vectorXS); 2676 2677 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2678 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2679 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2680 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH); 2681 verifyMatrix(vectorXRef, vectorXS); 2682 2683 // Test for incX = 2; 2684 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2685 incX = 2; 2686 int dimX = 1 + (mBLASData.dN - 1) * incX; 2687 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2688 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2); 2689 2690 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2691 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2692 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2); 2693 verifyMatrix(vectorXRef, vectorXS); 2694 2695 mRS.finish(); 2696 checkError(); 2697 } 2698 2699 public void test_L2_DTRSV_Correctness() { 2700 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2701 int uplo = ScriptIntrinsicBLAS.UPPER; 2702 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2703 int incX = 1; 2704 2705 // Populate input allocations 2706 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2707 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2708 matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn); 2709 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2710 2711 // Test for the default case: NO_TRANS 2712 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2713 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2714 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN); 2715 verifyMatrix(vectorXRef, vectorXD); 2716 2717 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2718 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2719 // Reload vector X, since it was overwritten by BLAS. 2720 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2721 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2722 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT); 2723 verifyMatrix(vectorXRef, vectorXD); 2724 2725 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2726 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2727 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2728 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH); 2729 verifyMatrix(vectorXRef, vectorXD); 2730 2731 // Test for incX = 2; 2732 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2733 incX = 2; 2734 int dimX = 1 + (mBLASData.dN - 1) * incX; 2735 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2736 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2); 2737 2738 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2739 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2740 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2); 2741 verifyMatrix(vectorXRef, vectorXD); 2742 2743 mRS.finish(); 2744 checkError(); 2745 } 2746 2747 public void test_L2_CTRSV_Correctness() { 2748 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2749 int uplo = ScriptIntrinsicBLAS.UPPER; 2750 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2751 int incX = 1; 2752 2753 // Populate input allocations 2754 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 2755 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2756 matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn); 2757 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2758 2759 // Test for the default case: NO_TRANS 2760 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2761 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2762 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN); 2763 verifyMatrix(vectorXRef, vectorXC); 2764 2765 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2766 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2767 // Reload vector X, since it was overwritten by BLAS. 2768 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2769 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2770 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT); 2771 verifyMatrix(vectorXRef, vectorXC); 2772 2773 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2774 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2775 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2776 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH); 2777 verifyMatrix(vectorXRef, vectorXC); 2778 2779 // Test for incX = 2; 2780 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2781 incX = 2; 2782 int dimX = 1 + (mBLASData.dN - 1) * incX; 2783 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2784 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2); 2785 2786 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2787 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2788 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2); 2789 verifyMatrix(vectorXRef, vectorXC); 2790 2791 mRS.finish(); 2792 checkError(); 2793 } 2794 2795 public void test_L2_ZTRSV_Correctness() { 2796 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2797 int uplo = ScriptIntrinsicBLAS.UPPER; 2798 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2799 int incX = 1; 2800 2801 // Populate input allocations 2802 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 2803 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2804 matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn); 2805 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2806 2807 // Test for the default case: NO_TRANS 2808 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2809 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2810 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN); 2811 verifyMatrix(vectorXRef, vectorXZ); 2812 2813 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2814 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2815 // Reload vector X, since it was overwritten by BLAS. 2816 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2817 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2818 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT); 2819 verifyMatrix(vectorXRef, vectorXZ); 2820 2821 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2822 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2823 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2824 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH); 2825 verifyMatrix(vectorXRef, vectorXZ); 2826 2827 // Test for incX = 2; 2828 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2829 incX = 2; 2830 int dimX = 1 + (mBLASData.dN - 1) * incX; 2831 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2832 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2); 2833 2834 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2835 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2836 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2); 2837 verifyMatrix(vectorXRef, vectorXZ); 2838 2839 mRS.finish(); 2840 checkError(); 2841 } 2842 2843 2844 private void xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) { 2845 for (Allocation matA : mMatrix) { 2846 for (Allocation vecX : mMatrix) { 2847 if (!validateVecInput(vecX)) { 2848 continue; 2849 } 2850 Element elemA = matA.getType().getElement(); 2851 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) { 2852 try { 2853 if (elemA.isCompatible(Element.F32(mRS))) { 2854 mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2855 } else if (elemA.isCompatible(Element.F64(mRS))) { 2856 mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2857 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2858 mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2859 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2860 mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2861 } 2862 } catch (RSRuntimeException e) { 2863 fail("should NOT throw RSRuntimeException"); 2864 } 2865 } else { 2866 try { 2867 mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2868 fail("should throw RSRuntimeException for STBSV"); 2869 } catch (RSRuntimeException e) { 2870 } 2871 try { 2872 mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2873 fail("should throw RSRuntimeException for DTBSV"); 2874 } catch (RSRuntimeException e) { 2875 } 2876 try { 2877 mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2878 fail("should throw RSRuntimeException for CTBSV"); 2879 } catch (RSRuntimeException e) { 2880 } 2881 try { 2882 mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2883 fail("should throw RSRuntimeException for ZTBSV"); 2884 } catch (RSRuntimeException e) { 2885 } 2886 } 2887 } 2888 } 2889 } 2890 2891 public void L2_xTBSV_API(ArrayList<Allocation> mMatrix) { 2892 for (int Uplo : mUplo) { 2893 for (int TransA : mTranspose) { 2894 for (int Diag : mDiag) { 2895 for (int K : mK) { 2896 for (int incX : mInc) { 2897 xTBSV_API_test(Uplo, TransA, Diag, K, incX, mMatrix); 2898 } 2899 } 2900 } 2901 } 2902 } 2903 } 2904 2905 public void test_L2_STBSV_API() { 2906 L2_xTBSV_API(mMatrixS); 2907 } 2908 2909 public void test_L2_DTBSV_API() { 2910 L2_xTBSV_API(mMatrixD); 2911 } 2912 2913 public void test_L2_CTBSV_API() { 2914 L2_xTBSV_API(mMatrixC); 2915 } 2916 2917 public void test_L2_ZTBSV_API() { 2918 L2_xTBSV_API(mMatrixZ); 2919 } 2920 2921 public void test_L2_STBSV_Correctness() { 2922 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2923 int uplo = ScriptIntrinsicBLAS.UPPER; 2924 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2925 int incX = 1; 2926 2927 // Populate input allocations 2928 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2929 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2930 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBSV_A_nn); 2931 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2932 2933 // Test for the default case: NO_TRANS 2934 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2935 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2936 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN); 2937 verifyMatrix(vectorXRef, vectorXS); 2938 2939 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2940 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2941 // Reload vector X, since it was overwritten by BLAS. 2942 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2943 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2944 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UT); 2945 verifyMatrix(vectorXRef, vectorXS); 2946 2947 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2948 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2949 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2950 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UH); 2951 verifyMatrix(vectorXRef, vectorXS); 2952 2953 // Test for incX = 2; 2954 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2955 incX = 2; 2956 int dimX = 1 + (mBLASData.dN - 1) * incX; 2957 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2958 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n2); 2959 2960 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2961 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2962 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN2); 2963 verifyMatrix(vectorXRef, vectorXS); 2964 2965 mRS.finish(); 2966 checkError(); 2967 } 2968 2969 public void test_L2_DTBSV_Correctness() { 2970 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2971 int uplo = ScriptIntrinsicBLAS.UPPER; 2972 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2973 int incX = 1; 2974 2975 // Populate input allocations 2976 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2977 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2978 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBSV_A_nn); 2979 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2980 2981 // Test for the default case: NO_TRANS 2982 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2983 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2984 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN); 2985 verifyMatrix(vectorXRef, vectorXD); 2986 2987 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2988 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2989 // Reload vector X, since it was overwritten by BLAS. 2990 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2991 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2992 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UT); 2993 verifyMatrix(vectorXRef, vectorXD); 2994 2995 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2996 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2997 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2998 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UH); 2999 verifyMatrix(vectorXRef, vectorXD); 3000 3001 // Test for incX = 2; 3002 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3003 incX = 2; 3004 int dimX = 1 + (mBLASData.dN - 1) * incX; 3005 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3006 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n2); 3007 3008 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 3009 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3010 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN2); 3011 verifyMatrix(vectorXRef, vectorXD); 3012 3013 mRS.finish(); 3014 checkError(); 3015 } 3016 3017 public void test_L2_CTBSV_Correctness() { 3018 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3019 int uplo = ScriptIntrinsicBLAS.UPPER; 3020 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3021 int incX = 1; 3022 3023 // Populate input allocations 3024 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3025 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3026 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBSV_A_nn); 3027 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3028 3029 // Test for the default case: NO_TRANS 3030 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3031 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3032 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN); 3033 verifyMatrix(vectorXRef, vectorXC); 3034 3035 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3036 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3037 // Reload vector X, since it was overwritten by BLAS. 3038 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3039 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3040 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UT); 3041 verifyMatrix(vectorXRef, vectorXC); 3042 3043 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3044 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3045 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3046 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UH); 3047 verifyMatrix(vectorXRef, vectorXC); 3048 3049 // Test for incX = 2; 3050 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3051 incX = 2; 3052 int dimX = 1 + (mBLASData.dN - 1) * incX; 3053 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3054 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n2); 3055 3056 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3057 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3058 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN2); 3059 verifyMatrix(vectorXRef, vectorXC); 3060 3061 mRS.finish(); 3062 checkError(); 3063 } 3064 3065 public void test_L2_ZTBSV_Correctness() { 3066 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3067 int uplo = ScriptIntrinsicBLAS.UPPER; 3068 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3069 int incX = 1; 3070 3071 // Populate input allocations 3072 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3073 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3074 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBSV_A_nn); 3075 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3076 3077 // Test for the default case: NO_TRANS 3078 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3079 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3080 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN); 3081 verifyMatrix(vectorXRef, vectorXZ); 3082 3083 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3084 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3085 // Reload vector X, since it was overwritten by BLAS. 3086 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3087 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3088 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UT); 3089 verifyMatrix(vectorXRef, vectorXZ); 3090 3091 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3092 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3093 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3094 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UH); 3095 verifyMatrix(vectorXRef, vectorXZ); 3096 3097 // Test for incX = 2; 3098 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3099 incX = 2; 3100 int dimX = 1 + (mBLASData.dN - 1) * incX; 3101 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3102 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n2); 3103 3104 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3105 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3106 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN2); 3107 verifyMatrix(vectorXRef, vectorXZ); 3108 3109 mRS.finish(); 3110 checkError(); 3111 } 3112 3113 3114 private void xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 3115 for (Allocation matA : mMatrix) { 3116 for (Allocation vecX : mMatrix) { 3117 if (!validateVecInput(vecX)) { 3118 continue; 3119 } 3120 Element elemA = matA.getType().getElement(); 3121 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 3122 try { 3123 if (elemA.isCompatible(Element.F32(mRS))) { 3124 mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX); 3125 } else if (elemA.isCompatible(Element.F64(mRS))) { 3126 mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3127 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 3128 mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3129 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3130 mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3131 } 3132 } catch (RSRuntimeException e) { 3133 fail("should NOT throw RSRuntimeException"); 3134 } 3135 } else { 3136 try { 3137 mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX); 3138 fail("should throw RSRuntimeException for STPSV"); 3139 } catch (RSRuntimeException e) { 3140 } 3141 try { 3142 mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3143 fail("should throw RSRuntimeException for DTPSV"); 3144 } catch (RSRuntimeException e) { 3145 } 3146 try { 3147 mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3148 fail("should throw RSRuntimeException for CTPSV"); 3149 } catch (RSRuntimeException e) { 3150 } 3151 try { 3152 mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3153 fail("should throw RSRuntimeException for ZTPSV"); 3154 } catch (RSRuntimeException e) { 3155 } 3156 } 3157 } 3158 } 3159 } 3160 3161 public void L2_xTPSV_API(ArrayList<Allocation> mMatrix) { 3162 for (int Uplo : mUplo) { 3163 for (int TransA : mTranspose) { 3164 for (int Diag : mDiag) { 3165 for (int incX : mInc) { 3166 xTPSV_API_test(Uplo, TransA, Diag, incX, mMatrix); 3167 } 3168 } 3169 } 3170 } 3171 } 3172 3173 public void test_L2_STPSV_API() { 3174 L2_xTPSV_API(mMatrixS); 3175 } 3176 3177 public void test_L2_DTPSV_API() { 3178 L2_xTPSV_API(mMatrixD); 3179 } 3180 3181 public void test_L2_CTPSV_API() { 3182 L2_xTPSV_API(mMatrixC); 3183 } 3184 3185 public void test_L2_ZTPSV_API() { 3186 L2_xTPSV_API(mMatrixZ); 3187 } 3188 3189 public void test_L2_STPSV_Correctness() { 3190 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3191 int uplo = ScriptIntrinsicBLAS.UPPER; 3192 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3193 int incX = 1; 3194 3195 // Populate input allocations 3196 int N = mBLASData.dN; 3197 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 3198 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 3199 matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn_pu); 3200 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3201 3202 // Test for the default case: NO_TRANS 3203 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3204 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 3205 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN); 3206 verifyMatrix(vectorXRef, vectorXS); 3207 3208 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3209 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3210 // Reload vector X, since it was overwritten by BLAS. 3211 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3212 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3213 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT); 3214 verifyMatrix(vectorXRef, vectorXS); 3215 3216 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3217 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3218 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3219 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH); 3220 verifyMatrix(vectorXRef, vectorXS); 3221 3222 // Test for incX = 2; 3223 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3224 incX = 2; 3225 int dimX = 1 + (N - 1) * incX; 3226 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3227 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2); 3228 3229 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3230 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3231 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2); 3232 verifyMatrix(vectorXRef, vectorXS); 3233 3234 mRS.finish(); 3235 checkError(); 3236 } 3237 3238 public void test_L2_DTPSV_Correctness() { 3239 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3240 int uplo = ScriptIntrinsicBLAS.UPPER; 3241 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3242 int incX = 1; 3243 3244 // Populate input allocations 3245 int N = mBLASData.dN; 3246 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 3247 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 3248 matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn_pu); 3249 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3250 3251 // Test for the default case: NO_TRANS 3252 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3253 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 3254 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN); 3255 verifyMatrix(vectorXRef, vectorXD); 3256 3257 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3258 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3259 // Reload vector X, since it was overwritten by BLAS. 3260 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3261 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3262 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT); 3263 verifyMatrix(vectorXRef, vectorXD); 3264 3265 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3266 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3267 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3268 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH); 3269 verifyMatrix(vectorXRef, vectorXD); 3270 3271 // Test for incX = 2; 3272 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3273 incX = 2; 3274 int dimX = 1 + (N - 1) * incX; 3275 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3276 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2); 3277 3278 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3279 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3280 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2); 3281 verifyMatrix(vectorXRef, vectorXD); 3282 3283 mRS.finish(); 3284 checkError(); 3285 } 3286 3287 public void test_L2_CTPSV_Correctness() { 3288 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3289 int uplo = ScriptIntrinsicBLAS.UPPER; 3290 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3291 int incX = 1; 3292 3293 // Populate input allocations 3294 int N = mBLASData.dN; 3295 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3296 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3297 matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn_pu); 3298 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3299 3300 // Test for the default case: NO_TRANS 3301 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3302 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3303 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN); 3304 verifyMatrix(vectorXRef, vectorXC); 3305 3306 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3307 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3308 // Reload vector X, since it was overwritten by BLAS. 3309 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3310 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3311 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT); 3312 verifyMatrix(vectorXRef, vectorXC); 3313 3314 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3315 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3316 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3317 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH); 3318 verifyMatrix(vectorXRef, vectorXC); 3319 3320 // Test for incX = 2; 3321 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3322 incX = 2; 3323 int dimX = 1 + (N - 1) * incX; 3324 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3325 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2); 3326 3327 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3328 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3329 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2); 3330 verifyMatrix(vectorXRef, vectorXC); 3331 3332 mRS.finish(); 3333 checkError(); 3334 } 3335 3336 public void test_L2_ZTPSV_Correctness() { 3337 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3338 int uplo = ScriptIntrinsicBLAS.UPPER; 3339 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3340 int incX = 1; 3341 3342 // Populate input allocations 3343 int N = mBLASData.dN; 3344 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 3345 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 3346 matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn_pu); 3347 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3348 3349 // Test for the default case: NO_TRANS 3350 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3351 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 3352 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN); 3353 verifyMatrix(vectorXRef, vectorXZ); 3354 3355 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3356 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3357 // Reload vector X, since it was overwritten by BLAS. 3358 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3359 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3360 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT); 3361 verifyMatrix(vectorXRef, vectorXZ); 3362 3363 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3364 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3365 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3366 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH); 3367 verifyMatrix(vectorXRef, vectorXZ); 3368 3369 // Test for incX = 2; 3370 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3371 incX = 2; 3372 int dimX = 1 + (N - 1) * incX; 3373 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3374 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2); 3375 3376 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3377 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3378 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2); 3379 verifyMatrix(vectorXRef, vectorXZ); 3380 3381 mRS.finish(); 3382 checkError(); 3383 } 3384 3385 3386 private boolean validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 3387 if (!A.getType().getElement().isCompatible(e) || 3388 !X.getType().getElement().isCompatible(e) || 3389 !Y.getType().getElement().isCompatible(e) ) { 3390 return false; 3391 } 3392 3393 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 3394 return false; 3395 } 3396 3397 int M = A.getType().getY(); 3398 int N = A.getType().getX(); 3399 3400 if (N < 1 || M < 1) { 3401 return false; 3402 } 3403 if (incX <= 0 || incY <= 0) { 3404 return false; 3405 } 3406 int expectedXDim = 1 + (M - 1) * incX; 3407 if (X.getType().getX() != expectedXDim) { 3408 return false; 3409 } 3410 int expectedYDim = 1 + (N - 1) * incY; 3411 if (Y.getType().getX() != expectedYDim) { 3412 return false; 3413 } 3414 return true; 3415 } 3416 3417 3418 private void xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3419 for (Allocation matA : mMatrix) { 3420 for (Allocation vecX : mMatrix) { 3421 if (!validateVecInput(vecX)) { 3422 continue; 3423 } 3424 for (Allocation vecY : mMatrix) { 3425 if (!validateVecInput(vecY)) { 3426 continue; 3427 } 3428 Element elemA = matA.getType().getElement(); 3429 if (validateGER(elemA, vecX, incX, vecY, incY, matA)) { 3430 try { 3431 if (elemA.isCompatible(Element.F32(mRS))) { 3432 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA); 3433 } else if (elemA.isCompatible(Element.F64(mRS))) { 3434 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA); 3435 } 3436 } catch (RSRuntimeException e) { 3437 fail("should NOT throw RSRuntimeException"); 3438 } 3439 } else { 3440 try { 3441 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA); 3442 fail("should throw RSRuntimeException for SGER"); 3443 } catch (RSRuntimeException e) { 3444 } 3445 try { 3446 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA); 3447 fail("should throw RSRuntimeException for DGER"); 3448 } catch (RSRuntimeException e) { 3449 } 3450 } 3451 } 3452 } 3453 } 3454 } 3455 3456 private void L2_xGER_API(ArrayList<Allocation> mMatrix) { 3457 for (int incX : mInc) { 3458 for (int incY : mInc) { 3459 xGERU_API_test(incX, incY, mMatrix); 3460 } 3461 } 3462 } 3463 3464 public void test_L2_SGER_API() { 3465 L2_xGER_API(mMatrixS); 3466 } 3467 3468 public void test_L2_DGER_API() { 3469 L2_xGER_API(mMatrixD); 3470 } 3471 3472 public void test_L2_SGER_Correctness() { 3473 int incX = 1; 3474 int incY = 1; 3475 3476 // Populate input allocations 3477 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 3478 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 3479 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 3480 matrixAS.copyFrom(mBLASData.L2_sGER_A_mn); 3481 vectorXS.copyFrom(mBLASData.L2_sGER_x_m1); 3482 vectorYS.copyFrom(mBLASData.L2_sGER_y_n1); 3483 3484 // Test for the default case: NO_TRANS 3485 mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 3486 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 3487 matrixARef.copyFrom(mBLASData.L2_sGER_o_N); 3488 verifyMatrix(matrixARef, matrixAS); 3489 3490 // Test for incX = 2 & incY = 3; 3491 incX = 2; 3492 incY = 3; 3493 int dimX = 1 + (mBLASData.dM - 1) * incX; 3494 int dimY = 1 + (mBLASData.dN - 1) * incY; 3495 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3496 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 3497 vectorXS.copyFrom(mBLASData.L2_sGER_x_m2); 3498 vectorYS.copyFrom(mBLASData.L2_sGER_y_n2); 3499 matrixAS.copyFrom(mBLASData.L2_sGER_A_mn); 3500 3501 mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 3502 verifyMatrix(matrixARef, matrixAS); 3503 3504 mRS.finish(); 3505 checkError(); 3506 } 3507 3508 public void test_L2_DGER_Correctness() { 3509 int incX = 1; 3510 int incY = 1; 3511 3512 // Populate input allocations 3513 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 3514 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 3515 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 3516 matrixAD.copyFrom(mBLASData.L2_dGER_A_mn); 3517 vectorXD.copyFrom(mBLASData.L2_dGER_x_m1); 3518 vectorYD.copyFrom(mBLASData.L2_dGER_y_n1); 3519 3520 // Test for the default case: NO_TRANS 3521 mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 3522 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 3523 matrixARef.copyFrom(mBLASData.L2_dGER_o_N); 3524 verifyMatrix(matrixARef, matrixAD); 3525 3526 // Test for incX = 2 & incY = 3; 3527 incX = 2; 3528 incY = 3; 3529 int dimX = 1 + (mBLASData.dM - 1) * incX; 3530 int dimY = 1 + (mBLASData.dN - 1) * incY; 3531 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3532 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 3533 vectorXD.copyFrom(mBLASData.L2_dGER_x_m2); 3534 vectorYD.copyFrom(mBLASData.L2_dGER_y_n2); 3535 matrixAD.copyFrom(mBLASData.L2_dGER_A_mn); 3536 3537 mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 3538 verifyMatrix(matrixARef, matrixAD); 3539 3540 mRS.finish(); 3541 checkError(); 3542 } 3543 3544 3545 private boolean validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 3546 if (!A.getType().getElement().isCompatible(e) || 3547 !X.getType().getElement().isCompatible(e) || 3548 !Y.getType().getElement().isCompatible(e)) { 3549 return false; 3550 } 3551 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 3552 return false; 3553 } 3554 3555 int M = A.getType().getY(); 3556 int N = A.getType().getX(); 3557 if (incX <= 0 || incY <= 0) { 3558 return false; 3559 } 3560 int expectedXDim = 1 + (M - 1) * incX; 3561 if (X.getType().getX() != expectedXDim) { 3562 return false; 3563 } 3564 int expectedYDim = 1 + (N - 1) * incY; 3565 if (Y.getType().getX() != expectedYDim) { 3566 return false; 3567 } 3568 return true; 3569 } 3570 3571 private void xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3572 for (Allocation matA : mMatrix) { 3573 for (Allocation vecX : mMatrix) { 3574 if (!validateVecInput(vecX)) { 3575 continue; 3576 } 3577 for (Allocation vecY : mMatrix) { 3578 if (!validateVecInput(vecY)) { 3579 continue; 3580 } 3581 Element elemA = matA.getType().getElement(); 3582 if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) { 3583 try { 3584 if (elemA.isCompatible(Element.F32_2(mRS))) { 3585 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA); 3586 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3587 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA); 3588 } 3589 } catch (RSRuntimeException e) { 3590 fail("should NOT throw RSRuntimeException"); 3591 } 3592 } else { 3593 try { 3594 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA); 3595 fail("should throw RSRuntimeException for CGERU"); 3596 } catch (RSRuntimeException e) { 3597 } 3598 try { 3599 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA); 3600 fail("should throw RSRuntimeException for ZGERU"); 3601 } catch (RSRuntimeException e) { 3602 } 3603 } 3604 } 3605 } 3606 } 3607 } 3608 3609 private void L2_xGERU_API(ArrayList<Allocation> mMatrix) { 3610 for (int incX : mInc) { 3611 for (int incY : mInc) { 3612 xGERU_API_test(incX, incY, mMatrix); 3613 } 3614 } 3615 } 3616 3617 public void test_L2_CGERU_API() { 3618 L2_xGERU_API(mMatrixC); 3619 } 3620 3621 public void test_L2_ZGERU_API() { 3622 L2_xGERU_API(mMatrixZ); 3623 } 3624 3625 public void test_L2_CGERU_Correctness() { 3626 int incX = 1; 3627 int incY = 1; 3628 3629 // Populate input allocations 3630 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3631 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 3632 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3633 matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn); 3634 vectorXC.copyFrom(mBLASData.L2_cGERU_x_m1); 3635 vectorYC.copyFrom(mBLASData.L2_cGERU_y_n1); 3636 3637 // Test for the default case: NO_TRANS 3638 mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3639 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3640 matrixARef.copyFrom(mBLASData.L2_cGERU_o_N); 3641 verifyMatrix(matrixARef, matrixAC); 3642 3643 // Test for incX = 2 & incY = 3; 3644 incX = 2; 3645 incY = 3; 3646 int dimX = 1 + (mBLASData.dM - 1) * incX; 3647 int dimY = 1 + (mBLASData.dN - 1) * incY; 3648 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3649 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 3650 vectorXC.copyFrom(mBLASData.L2_cGERU_x_m2); 3651 vectorYC.copyFrom(mBLASData.L2_cGERU_y_n2); 3652 matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn); 3653 3654 mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3655 verifyMatrix(matrixARef, matrixAC); 3656 3657 mRS.finish(); 3658 checkError(); 3659 } 3660 3661 public void test_L2_ZGERU_Correctness() { 3662 int incX = 1; 3663 int incY = 1; 3664 3665 // Populate input allocations 3666 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3667 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 3668 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3669 matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn); 3670 vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m1); 3671 vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n1); 3672 3673 // Test for the default case: NO_TRANS 3674 mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3675 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3676 matrixARef.copyFrom(mBLASData.L2_zGERU_o_N); 3677 verifyMatrix(matrixARef, matrixAZ); 3678 3679 // Test for incX = 2 & incY = 3; 3680 incX = 2; 3681 incY = 3; 3682 int dimX = 1 + (mBLASData.dM - 1) * incX; 3683 int dimY = 1 + (mBLASData.dN - 1) * incY; 3684 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3685 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 3686 vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m2); 3687 vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n2); 3688 matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn); 3689 3690 mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3691 verifyMatrix(matrixARef, matrixAZ); 3692 3693 mRS.finish(); 3694 checkError(); 3695 } 3696 3697 3698 3699 private void xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3700 for (Allocation matA : mMatrix) { 3701 for (Allocation vecX : mMatrix) { 3702 if (!validateVecInput(vecX)) { 3703 continue; 3704 } 3705 for (Allocation vecY : mMatrix) { 3706 if (!validateVecInput(vecY)) { 3707 continue; 3708 } 3709 Element elemA = matA.getType().getElement(); 3710 if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) { 3711 try { 3712 if (elemA.isCompatible(Element.F32_2(mRS))) { 3713 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA); 3714 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3715 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA); 3716 } 3717 } catch (RSRuntimeException e) { 3718 fail("should NOT throw RSRuntimeException"); 3719 } 3720 } else { 3721 try { 3722 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA); 3723 fail("should throw RSRuntimeException for CGERC"); 3724 } catch (RSRuntimeException e) { 3725 } 3726 try { 3727 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA); 3728 fail("should throw RSRuntimeException for ZGERC"); 3729 } catch (RSRuntimeException e) { 3730 } 3731 } 3732 } 3733 } 3734 } 3735 } 3736 3737 private void L2_xGERC_API(ArrayList<Allocation> mMatrix) { 3738 for (int incX : mInc) { 3739 for (int incY : mInc) { 3740 xGERC_API_test(incX, incY, mMatrix); 3741 } 3742 } 3743 } 3744 3745 public void test_L2_CGERC_API() { 3746 L2_xGERC_API(mMatrixC); 3747 } 3748 3749 public void test_L2_ZGERC_API() { 3750 L2_xGERC_API(mMatrixZ); 3751 } 3752 3753 public void test_L2_CGERC_Correctness() { 3754 int incX = 1; 3755 int incY = 1; 3756 3757 // Populate input allocations 3758 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3759 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 3760 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3761 matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn); 3762 vectorXC.copyFrom(mBLASData.L2_cGERC_x_m1); 3763 vectorYC.copyFrom(mBLASData.L2_cGERC_y_n1); 3764 3765 // Test for the default case: NO_TRANS 3766 mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3767 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3768 matrixARef.copyFrom(mBLASData.L2_cGERC_o_N); 3769 verifyMatrix(matrixARef, matrixAC); 3770 3771 // Test for incX = 2 & incY = 3; 3772 incX = 2; 3773 incY = 3; 3774 int dimX = 1 + (mBLASData.dM - 1) * incX; 3775 int dimY = 1 + (mBLASData.dN - 1) * incY; 3776 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3777 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 3778 vectorXC.copyFrom(mBLASData.L2_cGERC_x_m2); 3779 vectorYC.copyFrom(mBLASData.L2_cGERC_y_n2); 3780 matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn); 3781 3782 mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3783 verifyMatrix(matrixARef, matrixAC); 3784 3785 mRS.finish(); 3786 checkError(); 3787 } 3788 3789 public void test_L2_ZGERC_Correctness() { 3790 int incX = 1; 3791 int incY = 1; 3792 3793 // Populate input allocations 3794 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3795 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 3796 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3797 matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn); 3798 vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m1); 3799 vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n1); 3800 3801 // Test for the default case: NO_TRANS 3802 mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3803 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3804 matrixARef.copyFrom(mBLASData.L2_zGERC_o_N); 3805 verifyMatrix(matrixARef, matrixAZ); 3806 3807 // Test for incX = 2 & incY = 3; 3808 incX = 2; 3809 incY = 3; 3810 int dimX = 1 + (mBLASData.dM - 1) * incX; 3811 int dimY = 1 + (mBLASData.dN - 1) * incY; 3812 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3813 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 3814 vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m2); 3815 vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n2); 3816 matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn); 3817 3818 mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3819 verifyMatrix(matrixARef, matrixAZ); 3820 3821 mRS.finish(); 3822 checkError(); 3823 } 3824 3825 3826 private void xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 3827 for (Allocation matA : mMatrix) { 3828 for (Allocation vecX : mMatrix) { 3829 if (!validateVecInput(vecX)) { 3830 continue; 3831 } 3832 Element elemA = matA.getType().getElement(); 3833 if (validateSYR(elemA, Uplo, vecX, incX, matA)) { 3834 try { 3835 if (elemA.isCompatible(Element.F32_2(mRS))) { 3836 mBLAS.CHER(Uplo, alphaS, vecX, incX, matA); 3837 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3838 mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA); 3839 } 3840 } catch (RSRuntimeException e) { 3841 fail("should NOT throw RSRuntimeException"); 3842 } 3843 } else { 3844 try { 3845 mBLAS.CHER(Uplo, alphaS, vecX, incX, matA); 3846 fail("should throw RSRuntimeException for CHER"); 3847 } catch (RSRuntimeException e) { 3848 } 3849 try { 3850 mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA); 3851 fail("should throw RSRuntimeException for ZHER"); 3852 } catch (RSRuntimeException e) { 3853 } 3854 } 3855 } 3856 } 3857 } 3858 3859 public void L2_xHER_API(ArrayList<Allocation> mMatrix) { 3860 for (int Uplo : mUplo) { 3861 for (int incX : mInc) { 3862 xHER_API_test(Uplo, incX, mMatrix); 3863 } 3864 } 3865 } 3866 3867 public void test_L2_CHER_API() { 3868 L2_xHER_API(mMatrixC); 3869 } 3870 3871 public void test_L2_ZHER_API() { 3872 L2_xHER_API(mMatrixZ); 3873 } 3874 3875 public void test_L2_CHER_Correctness() { 3876 int uplo = ScriptIntrinsicBLAS.UPPER; 3877 int incX = 1; 3878 3879 // Populate input allocations 3880 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3881 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3882 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn); 3883 vectorXC.copyFrom(mBLASData.L2_cHER_x_n1); 3884 3885 // Test for the default case: NO_TRANS 3886 mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC); 3887 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3888 matrixARef.copyFrom(mBLASData.L2_cHER_o_N); 3889 verifyMatrix(matrixARef, matrixAC, true); 3890 3891 // Test for incX = 2; 3892 incX = 2; 3893 int dimX = 1 + (mBLASData.dN - 1) * incX; 3894 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3895 vectorXC.copyFrom(mBLASData.L2_cHER_x_n2); 3896 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn); 3897 3898 mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC); 3899 verifyMatrix(matrixARef, matrixAC, true); 3900 3901 mRS.finish(); 3902 checkError(); 3903 } 3904 3905 public void test_L2_ZHER_Correctness() { 3906 int uplo = ScriptIntrinsicBLAS.UPPER; 3907 int incX = 1; 3908 3909 // Populate input allocations 3910 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3911 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3912 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn); 3913 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1); 3914 3915 // Test for the default case: NO_TRANS 3916 mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ); 3917 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3918 matrixARef.copyFrom(mBLASData.L2_zHER_o_N); 3919 verifyMatrix(matrixARef, matrixAZ, true); 3920 3921 // Test for incX = 2; 3922 incX = 2; 3923 int dimX = 1 + (mBLASData.dN - 1) * incX; 3924 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3925 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2); 3926 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn); 3927 3928 mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ); 3929 verifyMatrix(matrixARef, matrixAZ, true); 3930 3931 mRS.finish(); 3932 checkError(); 3933 } 3934 3935 3936 private void xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 3937 for (Allocation matA : mMatrix) { 3938 for (Allocation vecX : mMatrix) { 3939 if (!validateVecInput(vecX)) { 3940 continue; 3941 } 3942 Element elemA = matA.getType().getElement(); 3943 if (validateSPR(elemA, Uplo, vecX, incX, matA)) { 3944 try { 3945 if (elemA.isCompatible(Element.F32_2(mRS))) { 3946 mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA); 3947 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3948 mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA); 3949 } 3950 } catch (RSRuntimeException e) { 3951 fail("should NOT throw RSRuntimeException"); 3952 } 3953 } else { 3954 try { 3955 mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA); 3956 fail("should throw RSRuntimeException for CHPR"); 3957 } catch (RSRuntimeException e) { 3958 } 3959 try { 3960 mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA); 3961 fail("should throw RSRuntimeException for ZHPR"); 3962 } catch (RSRuntimeException e) { 3963 } 3964 } 3965 } 3966 } 3967 } 3968 3969 public void L2_xHPR_API(ArrayList<Allocation> mMatrix) { 3970 for (int Uplo : mUplo) { 3971 for (int incX : mInc) { 3972 xHPR_API_test(Uplo, incX, mMatrix); 3973 } 3974 } 3975 } 3976 3977 public void test_L2_CHPR_API() { 3978 L2_xHPR_API(mMatrixC); 3979 } 3980 3981 public void test_L2_ZHPR_API() { 3982 L2_xHPR_API(mMatrixZ); 3983 } 3984 3985 public void test_L2_CHPR_Correctness() { 3986 int uplo = ScriptIntrinsicBLAS.UPPER; 3987 int incX = 1; 3988 3989 // Populate input allocations 3990 int N = mBLASData.dN; 3991 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3992 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3993 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu); 3994 vectorXC.copyFrom(mBLASData.L2_cHER_x_n1); 3995 3996 // Test for the default case: NO_TRANS 3997 mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC); 3998 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3999 matrixARef.copyFrom(mBLASData.L2_cHER_o_N_pu); 4000 verifyMatrix(matrixARef, matrixAC, true); 4001 4002 // Test for incX = 2; 4003 incX = 2; 4004 int dimX = 1 + (N - 1) * incX; 4005 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4006 vectorXC.copyFrom(mBLASData.L2_cHER_x_n2); 4007 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu); 4008 4009 mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC); 4010 verifyMatrix(matrixARef, matrixAC, true); 4011 4012 mRS.finish(); 4013 checkError(); 4014 } 4015 4016 public void test_L2_ZHPR_Correctness() { 4017 int uplo = ScriptIntrinsicBLAS.UPPER; 4018 int incX = 1; 4019 4020 // Populate input allocations 4021 int N = mBLASData.dN; 4022 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4023 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4024 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu); 4025 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1); 4026 4027 // Test for the default case: NO_TRANS 4028 mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ); 4029 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4030 matrixARef.copyFrom(mBLASData.L2_zHER_o_N_pu); 4031 verifyMatrix(matrixARef, matrixAZ, true); 4032 4033 // Test for incX = 2; 4034 incX = 2; 4035 int dimX = 1 + (N - 1) * incX; 4036 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4037 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2); 4038 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu); 4039 4040 mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ); 4041 verifyMatrix(matrixARef, matrixAZ, true); 4042 4043 mRS.finish(); 4044 checkError(); 4045 } 4046 4047 4048 private void xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4049 for (Allocation matA : mMatrix) { 4050 for (Allocation vecX : mMatrix) { 4051 if (!validateVecInput(vecX)) { 4052 continue; 4053 } 4054 for (Allocation vecY : mMatrix) { 4055 if (!validateVecInput(vecY)) { 4056 continue; 4057 } 4058 Element elemA = matA.getType().getElement(); 4059 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4060 try { 4061 if (elemA.isCompatible(Element.F32_2(mRS))) { 4062 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4063 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 4064 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4065 } 4066 } catch (RSRuntimeException e) { 4067 fail("should NOT throw RSRuntimeException"); 4068 } 4069 } else { 4070 try { 4071 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4072 fail("should throw RSRuntimeException for CHER2"); 4073 } catch (RSRuntimeException e) { 4074 } 4075 try { 4076 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4077 fail("should throw RSRuntimeException for ZHER2"); 4078 } catch (RSRuntimeException e) { 4079 } 4080 } 4081 } 4082 } 4083 } 4084 } 4085 4086 public void L2_xHER2_API(ArrayList<Allocation> mMatrix) { 4087 for (int Uplo : mUplo) { 4088 for (int incX : mInc) { 4089 xHER2_API_test(Uplo, incX, incX, mMatrix); 4090 } 4091 } 4092 } 4093 4094 public void test_L2_CHER2_API() { 4095 L2_xHER2_API(mMatrixC); 4096 } 4097 4098 public void test_L2_ZHER2_API() { 4099 L2_xHER2_API(mMatrixZ); 4100 } 4101 4102 public void test_L2_CHER2_Correctness() { 4103 int uplo = ScriptIntrinsicBLAS.UPPER; 4104 int incX = 1; 4105 int incY = 1; 4106 4107 // Populate input allocations 4108 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 4109 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 4110 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 4111 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn); 4112 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1); 4113 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1); 4114 4115 // Test for the default case: NO_TRANS 4116 mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4117 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 4118 matrixARef.copyFrom(mBLASData.L2_cHER2_o_N); 4119 verifyMatrix(matrixARef, matrixAC, true); 4120 4121 // Test for incX = 2 & incY = 3; 4122 incX = 2; 4123 incY = 3; 4124 int dimX = 1 + (mBLASData.dN - 1) * incX; 4125 int dimY = 1 + (mBLASData.dN - 1) * incY; 4126 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4127 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 4128 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2); 4129 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2); 4130 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn); 4131 4132 mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4133 verifyMatrix(matrixARef, matrixAC, true); 4134 4135 mRS.finish(); 4136 checkError(); 4137 } 4138 4139 public void test_L2_ZHER2_Correctness() { 4140 int uplo = ScriptIntrinsicBLAS.UPPER; 4141 int incX = 1; 4142 int incY = 1; 4143 4144 // Populate input allocations 4145 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 4146 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 4147 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 4148 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn); 4149 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1); 4150 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1); 4151 4152 // Test for the default case: NO_TRANS 4153 mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4154 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 4155 matrixARef.copyFrom(mBLASData.L2_zHER2_o_N); 4156 verifyMatrix(matrixARef, matrixAZ, true); 4157 4158 // Test for incX = 2 & incY = 3; 4159 incX = 2; 4160 incY = 3; 4161 int dimX = 1 + (mBLASData.dN - 1) * incX; 4162 int dimY = 1 + (mBLASData.dN - 1) * incY; 4163 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4164 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 4165 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2); 4166 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2); 4167 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn); 4168 4169 mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4170 verifyMatrix(matrixARef, matrixAZ, true); 4171 4172 mRS.finish(); 4173 checkError(); 4174 } 4175 4176 4177 4178 private void xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4179 for (Allocation matA : mMatrix) { 4180 for (Allocation vecX : mMatrix) { 4181 if (!validateVecInput(vecX)) { 4182 continue; 4183 } 4184 for (Allocation vecY : mMatrix) { 4185 if (!validateVecInput(vecY)) { 4186 continue; 4187 } 4188 Element elemA = matA.getType().getElement(); 4189 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4190 try { 4191 if (elemA.isCompatible(Element.F32_2(mRS))) { 4192 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4193 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 4194 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4195 } 4196 } catch (RSRuntimeException e) { 4197 fail("should NOT throw RSRuntimeException"); 4198 } 4199 } else { 4200 try { 4201 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4202 fail("should throw RSRuntimeException for CHPR2"); 4203 } catch (RSRuntimeException e) { 4204 } 4205 try { 4206 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4207 fail("should throw RSRuntimeException for ZHPR2"); 4208 } catch (RSRuntimeException e) { 4209 } 4210 } 4211 } 4212 } 4213 } 4214 } 4215 4216 public void L2_xHPR2_API(ArrayList<Allocation> mMatrix) { 4217 for (int Uplo : mUplo) { 4218 for (int incX : mInc) { 4219 xHPR2_API_test(Uplo, incX, incX, mMatrix); 4220 } 4221 } 4222 } 4223 4224 public void test_L2_CHPR2_API() { 4225 L2_xHPR2_API(mMatrixC); 4226 } 4227 4228 public void test_L2_ZHPR2_API() { 4229 L2_xHPR2_API(mMatrixZ); 4230 } 4231 4232 public void test_L2_CHPR2_Correctness() { 4233 int uplo = ScriptIntrinsicBLAS.UPPER; 4234 int incX = 1; 4235 int incY = 1; 4236 4237 // Populate input allocations 4238 int N = mBLASData.dN; 4239 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 4240 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 4241 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 4242 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu); 4243 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1); 4244 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1); 4245 4246 // Test for the default case: NO_TRANS 4247 mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4248 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 4249 matrixARef.copyFrom(mBLASData.L2_cHER2_o_N_pu); 4250 verifyMatrix(matrixARef, matrixAC, true); 4251 4252 // Test for incX = 2 & incY = 3; 4253 incX = 2; 4254 incY = 3; 4255 int dimX = 1 + (N - 1) * incX; 4256 int dimY = 1 + (N - 1) * incY; 4257 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4258 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 4259 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2); 4260 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2); 4261 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu); 4262 4263 mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4264 verifyMatrix(matrixARef, matrixAC, true); 4265 4266 mRS.finish(); 4267 checkError(); 4268 } 4269 4270 public void test_L2_ZHPR2_Correctness() { 4271 int uplo = ScriptIntrinsicBLAS.UPPER; 4272 int incX = 1; 4273 int incY = 1; 4274 4275 // Populate input allocations 4276 int N = mBLASData.dN; 4277 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4278 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4279 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4280 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu); 4281 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1); 4282 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1); 4283 4284 // Test for the default case: NO_TRANS 4285 mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4286 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4287 matrixARef.copyFrom(mBLASData.L2_zHER2_o_N_pu); 4288 verifyMatrix(matrixARef, matrixAZ, true); 4289 4290 // Test for incX = 2 & incY = 3; 4291 incX = 2; 4292 incY = 3; 4293 int dimX = 1 + (N - 1) * incX; 4294 int dimY = 1 + (N - 1) * incY; 4295 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4296 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 4297 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2); 4298 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2); 4299 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu); 4300 4301 mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4302 verifyMatrix(matrixARef, matrixAZ, true); 4303 4304 mRS.finish(); 4305 checkError(); 4306 } 4307 4308 4309 4310 private boolean validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A) { 4311 if (!validateUplo(Uplo)) { 4312 return false; 4313 } 4314 if (!A.getType().getElement().isCompatible(e) || 4315 !X.getType().getElement().isCompatible(e)) { 4316 return false; 4317 } 4318 4319 int N = A.getType().getX(); 4320 4321 if (X.getType().getY() > 1) { 4322 return false; 4323 } 4324 if (N != A.getType().getY()) { 4325 return false; 4326 } 4327 if (incX <= 0) { 4328 return false; 4329 } 4330 int expectedXDim = 1 + (N - 1) * incX; 4331 if (X.getType().getX() != expectedXDim) { 4332 return false; 4333 } 4334 return true; 4335 } 4336 4337 private void xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 4338 for (Allocation matA : mMatrix) { 4339 for (Allocation vecX : mMatrix) { 4340 if (!validateVecInput(vecX)) { 4341 continue; 4342 } 4343 Element elemA = matA.getType().getElement(); 4344 if (validateSYR(elemA, Uplo, vecX, incX, matA)) { 4345 try { 4346 if (elemA.isCompatible(Element.F32(mRS))) { 4347 mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA); 4348 } else if (elemA.isCompatible(Element.F64(mRS))) { 4349 mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA); 4350 } 4351 } catch (RSRuntimeException e) { 4352 fail("should NOT throw RSRuntimeException"); 4353 } 4354 } else { 4355 try { 4356 mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA); 4357 fail("should throw RSRuntimeException for SSYR"); 4358 } catch (RSRuntimeException e) { 4359 } 4360 try { 4361 mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA); 4362 fail("should throw RSRuntimeException for DSYR"); 4363 } catch (RSRuntimeException e) { 4364 } 4365 } 4366 } 4367 } 4368 } 4369 4370 public void L2_xSYR_API(ArrayList<Allocation> mMatrix) { 4371 for (int Uplo : mUplo) { 4372 for (int incX : mInc) { 4373 xSYR_API_test(Uplo, incX, mMatrix); 4374 } 4375 } 4376 } 4377 4378 public void test_L2_SSYR_API() { 4379 L2_xSYR_API(mMatrixS); 4380 } 4381 4382 public void test_L2_DSYR_API() { 4383 L2_xSYR_API(mMatrixD); 4384 } 4385 4386 public void test_L2_SSYR_Correctness() { 4387 int uplo = ScriptIntrinsicBLAS.UPPER; 4388 int incX = 1; 4389 4390 // Populate input allocations 4391 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4392 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4393 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn); 4394 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1); 4395 4396 // Test for the default case: NO_TRANS 4397 mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS); 4398 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4399 matrixARef.copyFrom(mBLASData.L2_sSYR_o_N); 4400 verifyMatrix(matrixARef, matrixAS, true); 4401 4402 // Test for incX = 2; 4403 incX = 2; 4404 int dimX = 1 + (mBLASData.dN - 1) * incX; 4405 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4406 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2); 4407 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn); 4408 4409 mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS); 4410 verifyMatrix(matrixARef, matrixAS, true); 4411 4412 mRS.finish(); 4413 checkError(); 4414 } 4415 4416 public void test_L2_DSYR_Correctness() { 4417 int uplo = ScriptIntrinsicBLAS.UPPER; 4418 int incX = 1; 4419 4420 // Populate input allocations 4421 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4422 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4423 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn); 4424 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1); 4425 4426 // Test for the default case: NO_TRANS 4427 mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD); 4428 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4429 matrixARef.copyFrom(mBLASData.L2_dSYR_o_N); 4430 verifyMatrix(matrixARef, matrixAD, true); 4431 4432 // Test for incX = 2; 4433 incX = 2; 4434 int dimX = 1 + (mBLASData.dN - 1) * incX; 4435 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4436 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2); 4437 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn); 4438 4439 mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD); 4440 verifyMatrix(matrixARef, matrixAD, true); 4441 4442 mRS.finish(); 4443 checkError(); 4444 } 4445 4446 4447 private boolean validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap) { 4448 if (!validateUplo(Uplo)) { 4449 return false; 4450 } 4451 if (!Ap.getType().getElement().isCompatible(e) || 4452 !X.getType().getElement().isCompatible(e)) { 4453 return false; 4454 } 4455 if (X.getType().getY() > 1) { 4456 return false; 4457 } 4458 4459 if (Ap.getType().getY() > 1) { 4460 return false; 4461 } 4462 4463 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 4464 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 4465 return false; 4466 } 4467 if (incX <= 0) { 4468 return false; 4469 } 4470 int expectedXDim = 1 + (N - 1) * incX; 4471 if (X.getType().getX() != expectedXDim) { 4472 return false; 4473 } 4474 4475 return true; 4476 } 4477 4478 private void xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 4479 for (Allocation matA : mMatrix) { 4480 for (Allocation vecX : mMatrix) { 4481 if (!validateVecInput(vecX)) { 4482 continue; 4483 } 4484 Element elemA = matA.getType().getElement(); 4485 if (validateSPR(elemA, Uplo, vecX, incX, matA)) { 4486 try { 4487 if (elemA.isCompatible(Element.F32(mRS))) { 4488 mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA); 4489 } else if (elemA.isCompatible(Element.F64(mRS))) { 4490 mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA); 4491 } 4492 } catch (RSRuntimeException e) { 4493 fail("should NOT throw RSRuntimeException"); 4494 } 4495 } else { 4496 try { 4497 mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA); 4498 fail("should throw RSRuntimeException for SSPR"); 4499 } catch (RSRuntimeException e) { 4500 } 4501 try { 4502 mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA); 4503 fail("should throw RSRuntimeException for DSPR"); 4504 } catch (RSRuntimeException e) { 4505 } 4506 } 4507 } 4508 } 4509 } 4510 4511 public void L2_xSPR_API(ArrayList<Allocation> mMatrix) { 4512 for (int Uplo : mUplo) { 4513 for (int incX : mInc) { 4514 xSPR_API_test(Uplo, incX, mMatrix); 4515 } 4516 } 4517 } 4518 4519 public void test_L2_SSPR_API() { 4520 L2_xSPR_API(mMatrixS); 4521 } 4522 4523 public void test_L2_DSPR_API() { 4524 L2_xSPR_API(mMatrixD); 4525 } 4526 4527 public void test_L2_SSPR_Correctness() { 4528 int uplo = ScriptIntrinsicBLAS.UPPER; 4529 int incX = 1; 4530 4531 // Populate input allocations 4532 int N = mBLASData.dN; 4533 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4534 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4535 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu); 4536 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1); 4537 4538 // Test for the default case: NO_TRANS 4539 mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS); 4540 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4541 matrixARef.copyFrom(mBLASData.L2_sSYR_o_N_pu); 4542 verifyMatrix(matrixARef, matrixAS, true); 4543 4544 // Test for incX = 2; 4545 incX = 2; 4546 int dimX = 1 + (N - 1) * incX; 4547 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4548 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2); 4549 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu); 4550 4551 mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS); 4552 verifyMatrix(matrixARef, matrixAS, true); 4553 4554 mRS.finish(); 4555 checkError(); 4556 } 4557 4558 public void test_L2_DSPR_Correctness() { 4559 int uplo = ScriptIntrinsicBLAS.UPPER; 4560 int incX = 1; 4561 4562 // Populate input allocations 4563 int N = mBLASData.dN; 4564 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4565 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4566 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu); 4567 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1); 4568 4569 // Test for the default case: NO_TRANS 4570 mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD); 4571 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4572 matrixARef.copyFrom(mBLASData.L2_dSYR_o_N_pu); 4573 verifyMatrix(matrixARef, matrixAD, true); 4574 4575 // Test for incX = 2; 4576 incX = 2; 4577 int dimX = 1 + (N - 1) * incX; 4578 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4579 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2); 4580 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu); 4581 4582 mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD); 4583 verifyMatrix(matrixARef, matrixAD, true); 4584 4585 mRS.finish(); 4586 checkError(); 4587 } 4588 4589 4590 private boolean validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 4591 if (!validateUplo(Uplo)) { 4592 return false; 4593 } 4594 if (!A.getType().getElement().isCompatible(e) || 4595 !X.getType().getElement().isCompatible(e) || 4596 !Y.getType().getElement().isCompatible(e)) { 4597 return false; 4598 } 4599 4600 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 4601 return false; 4602 } 4603 4604 int N = A.getType().getX(); 4605 4606 if (N != A.getType().getY()) { 4607 return false; 4608 } 4609 if (incX <= 0 || incY <= 0) { 4610 return false; 4611 } 4612 int expectedXDim = 1 + (N - 1) * incX; 4613 int expectedYDim = 1 + (N - 1) * incY; 4614 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 4615 return false; 4616 } 4617 return true; 4618 } 4619 4620 private void xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4621 for (Allocation matA : mMatrix) { 4622 for (Allocation vecX : mMatrix) { 4623 if (!validateVecInput(vecX)) { 4624 continue; 4625 } 4626 for (Allocation vecY : mMatrix) { 4627 if (!validateVecInput(vecY)) { 4628 continue; 4629 } 4630 Element elemA = matA.getType().getElement(); 4631 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4632 try { 4633 if (elemA.isCompatible(Element.F32(mRS))) { 4634 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4635 } else if (elemA.isCompatible(Element.F64(mRS))) { 4636 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4637 } 4638 } catch (RSRuntimeException e) { 4639 fail("should NOT throw RSRuntimeException"); 4640 } 4641 } else { 4642 try { 4643 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4644 fail("should throw RSRuntimeException for SSYR2"); 4645 } catch (RSRuntimeException e) { 4646 } 4647 try { 4648 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4649 fail("should throw RSRuntimeException for DSYR2"); 4650 } catch (RSRuntimeException e) { 4651 } 4652 } 4653 } 4654 } 4655 } 4656 } 4657 4658 public void L2_xSYR2_API(ArrayList<Allocation> mMatrix) { 4659 for (int Uplo : mUplo) { 4660 for (int incX : mInc) { 4661 xSYR2_API_test(Uplo, incX, incX, mMatrix); 4662 } 4663 } 4664 } 4665 4666 public void test_L2_SSYR2_API() { 4667 L2_xSYR2_API(mMatrixS); 4668 } 4669 4670 public void test_L2_DSYR2_API() { 4671 L2_xSYR2_API(mMatrixD); 4672 } 4673 4674 public void test_L2_SSYR2_Correctness() { 4675 int uplo = ScriptIntrinsicBLAS.UPPER; 4676 int incX = 1; 4677 int incY = 1; 4678 4679 // Populate input allocations 4680 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4681 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4682 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4683 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn); 4684 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1); 4685 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1); 4686 4687 // Test for the default case: NO_TRANS 4688 mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4689 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4690 matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N); 4691 verifyMatrix(matrixARef, matrixAS, true); 4692 4693 // Test for incX = 2 & incY = 3; 4694 incX = 2; 4695 incY = 3; 4696 int dimX = 1 + (mBLASData.dN - 1) * incX; 4697 int dimY = 1 + (mBLASData.dN - 1) * incY; 4698 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4699 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 4700 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2); 4701 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2); 4702 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn); 4703 4704 mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4705 verifyMatrix(matrixARef, matrixAS, true); 4706 4707 mRS.finish(); 4708 checkError(); 4709 } 4710 4711 public void test_L2_DSYR2_Correctness() { 4712 int uplo = ScriptIntrinsicBLAS.UPPER; 4713 int incX = 1; 4714 int incY = 1; 4715 4716 // Populate input allocations 4717 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4718 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4719 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4720 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn); 4721 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1); 4722 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1); 4723 4724 // Test for the default case: NO_TRANS 4725 mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4726 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4727 matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N); 4728 verifyMatrix(matrixARef, matrixAD, true); 4729 4730 // Test for incX = 2 & incY = 3; 4731 incX = 2; 4732 incY = 3; 4733 int dimX = 1 + (mBLASData.dN - 1) * incX; 4734 int dimY = 1 + (mBLASData.dN - 1) * incY; 4735 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4736 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 4737 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2); 4738 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2); 4739 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn); 4740 4741 mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4742 verifyMatrix(matrixARef, matrixAD, true); 4743 4744 mRS.finish(); 4745 checkError(); 4746 } 4747 4748 4749 private boolean validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 4750 if (!validateUplo(Uplo)) { 4751 return false; 4752 } 4753 if (!Ap.getType().getElement().isCompatible(e) || 4754 !X.getType().getElement().isCompatible(e) || 4755 !Y.getType().getElement().isCompatible(e)) { 4756 return false; 4757 } 4758 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 4759 return false; 4760 } 4761 4762 if (Ap.getType().getY() > 1) { 4763 return false; 4764 } 4765 4766 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 4767 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 4768 return false; 4769 } 4770 if (incX <= 0 || incY <= 0) { 4771 return false; 4772 } 4773 int expectedXDim = 1 + (N - 1) * incX; 4774 int expectedYDim = 1 + (N - 1) * incY; 4775 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 4776 return false; 4777 } 4778 4779 return true; 4780 } 4781 4782 private void xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4783 for (Allocation matA : mMatrix) { 4784 for (Allocation vecX : mMatrix) { 4785 if (!validateVecInput(vecX)) { 4786 continue; 4787 } 4788 for (Allocation vecY : mMatrix) { 4789 if (!validateVecInput(vecY)) { 4790 continue; 4791 } 4792 Element elemA = matA.getType().getElement(); 4793 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4794 try { 4795 if (elemA.isCompatible(Element.F32(mRS))) { 4796 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4797 } else if (elemA.isCompatible(Element.F64(mRS))) { 4798 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4799 } 4800 } catch (RSRuntimeException e) { 4801 fail("should NOT throw RSRuntimeException"); 4802 } 4803 } else { 4804 try { 4805 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4806 fail("should throw RSRuntimeException for SSPR2"); 4807 } catch (RSRuntimeException e) { 4808 } 4809 try { 4810 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4811 fail("should throw RSRuntimeException for DSPR2"); 4812 } catch (RSRuntimeException e) { 4813 } 4814 } 4815 } 4816 } 4817 } 4818 } 4819 4820 public void L2_xSPR2_API(ArrayList<Allocation> mMatrix) { 4821 for (int Uplo : mUplo) { 4822 for (int incX : mInc) { 4823 xSPR2_API_test(Uplo, incX, incX, mMatrix); 4824 } 4825 } 4826 } 4827 4828 public void test_L2_SSPR2_API() { 4829 L2_xSPR2_API(mMatrixS); 4830 } 4831 4832 public void test_L2_DSPR2_API() { 4833 L2_xSPR2_API(mMatrixD); 4834 } 4835 4836 public void test_L2_SSPR2_Correctness() { 4837 int uplo = ScriptIntrinsicBLAS.UPPER; 4838 int incX = 1; 4839 int incY = 1; 4840 4841 // Populate input allocations 4842 int N = mBLASData.dN; 4843 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4844 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4845 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4846 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu); 4847 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1); 4848 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1); 4849 4850 // Test for the default case: NO_TRANS 4851 mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4852 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4853 matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N_pu); 4854 verifyMatrix(matrixARef, matrixAS, true); 4855 4856 // Test for incX = 2 & incY = 3; 4857 incX = 2; 4858 incY = 3; 4859 int dimX = 1 + (N - 1) * incX; 4860 int dimY = 1 + (N - 1) * incY; 4861 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4862 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 4863 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2); 4864 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2); 4865 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu); 4866 4867 mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4868 verifyMatrix(matrixARef, matrixAS, true); 4869 4870 mRS.finish(); 4871 checkError(); 4872 } 4873 4874 public void test_L2_DSPR2_Correctness() { 4875 int uplo = ScriptIntrinsicBLAS.UPPER; 4876 int incX = 1; 4877 int incY = 1; 4878 4879 // Populate input allocations 4880 int N = mBLASData.dN; 4881 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4882 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4883 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4884 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu); 4885 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1); 4886 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1); 4887 4888 // Test for the default case: NO_TRANS 4889 mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4890 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4891 matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N_pu); 4892 verifyMatrix(matrixARef, matrixAD, true); 4893 4894 // Test for incX = 2 & incY = 3; 4895 incX = 2; 4896 incY = 3; 4897 int dimX = 1 + (N - 1) * incX; 4898 int dimY = 1 + (N - 1) * incY; 4899 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4900 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 4901 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2); 4902 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2); 4903 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu); 4904 4905 mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4906 verifyMatrix(matrixARef, matrixAD, true); 4907 4908 mRS.finish(); 4909 checkError(); 4910 } 4911 4912 4913 4914 private boolean validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 4915 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 4916 if ((A != null && !A.getType().getElement().isCompatible(e)) || 4917 (B != null && !B.getType().getElement().isCompatible(e)) || 4918 (C != null && !C.getType().getElement().isCompatible(e))) { 4919 return false; 4920 } 4921 if (C == null) { 4922 //since matrix C is used to store the result, it cannot be null. 4923 return false; 4924 } 4925 cM = C.getType().getY(); 4926 cN = C.getType().getX(); 4927 4928 if (Side == ScriptIntrinsicBLAS.RIGHT) { 4929 if ((A == null && B != null) || (A != null && B == null)) { 4930 return false; 4931 } 4932 if (B != null) { 4933 bM = A.getType().getY(); 4934 bN = A.getType().getX(); 4935 } 4936 if (A != null) { 4937 aM = B.getType().getY(); 4938 aN = B.getType().getX(); 4939 } 4940 } else { 4941 if (A != null) { 4942 if (TransA == ScriptIntrinsicBLAS.TRANSPOSE || 4943 TransA == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) { 4944 aN = A.getType().getY(); 4945 aM = A.getType().getX(); 4946 } else { 4947 aM = A.getType().getY(); 4948 aN = A.getType().getX(); 4949 } 4950 } 4951 if (B != null) { 4952 if (TransB == ScriptIntrinsicBLAS.TRANSPOSE || 4953 TransB == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) { 4954 bN = B.getType().getY(); 4955 bM = B.getType().getX(); 4956 } else { 4957 bM = B.getType().getY(); 4958 bN = B.getType().getX(); 4959 } 4960 } 4961 } 4962 if (A != null && B != null && C != null) { 4963 if (aN != bM || aM != cM || bN != cN) { 4964 return false; 4965 } 4966 } else if (A != null && C != null) { 4967 // A and C only, for SYRK 4968 if (cM != cN) { 4969 return false; 4970 } 4971 if (aM != cM) { 4972 return false; 4973 } 4974 } else if (A != null && B != null) { 4975 // A and B only 4976 if (aN != bM) { 4977 return false; 4978 } 4979 } 4980 4981 return true; 4982 } 4983 4984 private boolean validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C) { 4985 boolean result = true; 4986 result &= validateTranspose(TransA); 4987 result &= validateTranspose(TransB); 4988 result &= validateL3(e, TransA, TransB, 0, A, B, C); 4989 4990 return result; 4991 } 4992 4993 private void xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix) { 4994 for (Allocation matA : mMatrix) { 4995 for (Allocation matB : mMatrix) { 4996 for (Allocation matC : mMatrix) { 4997 Element elemA = matA.getType().getElement(); 4998 if (validateL3_xGEMM(elemA, transA, transB, matA, matB, matC)) { 4999 try { 5000 if (elemA.isCompatible(Element.F32(mRS))) { 5001 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC); 5002 } else if (elemA.isCompatible(Element.F64(mRS))) { 5003 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC); 5004 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5005 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC); 5006 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5007 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC); 5008 } 5009 } catch (RSRuntimeException e) { 5010 fail("should NOT throw RSRuntimeException"); 5011 } 5012 } else { 5013 try { 5014 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC); 5015 fail("should throw RSRuntimeException for SGEMM"); 5016 } catch (RSRuntimeException e) { 5017 } 5018 try { 5019 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC); 5020 fail("should throw RSRuntimeException for DGEMM"); 5021 } catch (RSRuntimeException e) { 5022 } 5023 try { 5024 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC); 5025 fail("should throw RSRuntimeException for CGEMM"); 5026 } catch (RSRuntimeException e) { 5027 } 5028 try { 5029 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC); 5030 fail("should throw RSRuntimeException for ZGEMM"); 5031 } catch (RSRuntimeException e) { 5032 } 5033 } 5034 } 5035 } 5036 } 5037 } 5038 5039 private void L3_xGEMM_API(ArrayList<Allocation> mMatrix) { 5040 for (int transA : mTranspose) { 5041 for (int transB : mTranspose) { 5042 xGEMM_API_test(transA, transB, mMatrix); 5043 } 5044 } 5045 } 5046 5047 public void test_L3_SGEMM_API() { 5048 L3_xGEMM_API(mMatrixS); 5049 } 5050 5051 public void test_L3_DGEMM_API() { 5052 L3_xGEMM_API(mMatrixD); 5053 } 5054 5055 public void test_L3_CGEMM_API() { 5056 L3_xGEMM_API(mMatrixC); 5057 } 5058 5059 public void test_L3_ZGEMM_API() { 5060 L3_xGEMM_API(mMatrixZ); 5061 } 5062 5063 5064 public void test_L3_SGEMM_Correctness() { 5065 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5066 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5067 5068 // Populate input allocations 5069 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dM)); 5070 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 5071 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5072 matrixAS.copyFrom(mBLASData.L3_sGEMM_A_mk); 5073 matrixBS.copyFrom(mBLASData.L3_sGEMM_B_kn); 5074 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5075 5076 // Test for the default case: NO_TRANS 5077 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5078 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5079 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_NN); 5080 verifyMatrix(matrixCRef, matrixCS); 5081 5082 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5083 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dK)); 5084 matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 5085 matrixAS.copyFrom(mBLASData.L3_sGEMM_A_km); 5086 matrixBS.copyFrom(mBLASData.L3_sGEMM_B_nk); 5087 5088 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5089 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5090 // Reload matrix C, since it was overwritten by BLAS. 5091 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5092 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5093 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_TT); 5094 verifyMatrix(matrixCRef, matrixCS); 5095 5096 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5097 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5098 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5099 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5100 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_HH); 5101 verifyMatrix(matrixCRef, matrixCS); 5102 5103 mRS.finish(); 5104 checkError(); 5105 } 5106 5107 public void test_L3_DGEMM_Correctness() { 5108 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5109 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5110 5111 // Populate input allocations 5112 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dM)); 5113 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 5114 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5115 matrixAD.copyFrom(mBLASData.L3_dGEMM_A_mk); 5116 matrixBD.copyFrom(mBLASData.L3_dGEMM_B_kn); 5117 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5118 // Test for the default case: NO_TRANS 5119 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5120 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5121 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_NN); 5122 verifyMatrix(matrixCRef, matrixCD); 5123 5124 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5125 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dK)); 5126 matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 5127 matrixAD.copyFrom(mBLASData.L3_dGEMM_A_km); 5128 matrixBD.copyFrom(mBLASData.L3_dGEMM_B_nk); 5129 5130 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5131 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5132 // Reload matrix C, since it was overwritten by BLAS. 5133 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5134 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5135 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_TT); 5136 verifyMatrix(matrixCRef, matrixCD); 5137 5138 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5139 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5140 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5141 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5142 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_HH); 5143 verifyMatrix(matrixCRef, matrixCD); 5144 5145 mRS.finish(); 5146 checkError(); 5147 } 5148 5149 public void test_L3_CGEMM_Correctness() { 5150 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5151 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5152 5153 // Populate input allocations 5154 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dM)); 5155 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5156 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5157 matrixAC.copyFrom(mBLASData.L3_cGEMM_A_mk); 5158 matrixBC.copyFrom(mBLASData.L3_cGEMM_B_kn); 5159 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5160 5161 // Test for the default case: NO_TRANS 5162 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5163 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5164 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_NN); 5165 verifyMatrix(matrixCRef, matrixCC); 5166 5167 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5168 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dK)); 5169 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5170 matrixAC.copyFrom(mBLASData.L3_cGEMM_A_km); 5171 matrixBC.copyFrom(mBLASData.L3_cGEMM_B_nk); 5172 5173 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5174 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5175 // Reload matrix C, since it was overwritten by BLAS. 5176 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5177 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5178 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_TT); 5179 verifyMatrix(matrixCRef, matrixCC); 5180 5181 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5182 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5183 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5184 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5185 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_HH); 5186 verifyMatrix(matrixCRef, matrixCC); 5187 5188 mRS.finish(); 5189 checkError(); 5190 } 5191 5192 public void test_L3_ZGEMM_Correctness() { 5193 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5194 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5195 5196 // Populate input allocations 5197 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dM)); 5198 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5199 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5200 matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_mk); 5201 matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_kn); 5202 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5203 5204 // Test for the default case: NO_TRANS 5205 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5206 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5207 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_NN); 5208 verifyMatrix(matrixCRef, matrixCZ); 5209 5210 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5211 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dK)); 5212 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5213 matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_km); 5214 matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_nk); 5215 5216 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5217 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5218 // Reload matrix C, since it was overwritten by BLAS. 5219 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5220 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5221 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_TT); 5222 verifyMatrix(matrixCRef, matrixCZ); 5223 5224 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5225 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5226 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5227 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5228 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_HH); 5229 verifyMatrix(matrixCRef, matrixCZ); 5230 5231 mRS.finish(); 5232 checkError(); 5233 } 5234 5235 5236 5237 private boolean validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) { 5238 boolean result = true; 5239 result &= validateSide(Side); 5240 result &= validateUplo(Uplo); 5241 result &= validateL3(e, 0, 0, Side, A, B, C); 5242 result &= (A.getType().getX() == A.getType().getY()); 5243 return result; 5244 } 5245 5246 private void xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) { 5247 for (Allocation matA : mMatrix) { 5248 for (Allocation matB : mMatrix) { 5249 for (Allocation matC : mMatrix) { 5250 Element elemA = matA.getType().getElement(); 5251 if (validateL3_xSYMM(elemA, Side, Uplo, matA, matB, matC)) { 5252 try { 5253 if (elemA.isCompatible(Element.F32(mRS))) { 5254 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC); 5255 } else if (elemA.isCompatible(Element.F64(mRS))) { 5256 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC); 5257 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5258 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5259 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5260 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5261 } 5262 } catch (RSRuntimeException e) { 5263 fail("should NOT throw RSRuntimeException"); 5264 } 5265 } else { 5266 try { 5267 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC); 5268 fail("should throw RSRuntimeException for SSYMM"); 5269 } catch (RSRuntimeException e) { 5270 } 5271 try { 5272 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC); 5273 fail("should throw RSRuntimeException for DSYMM"); 5274 } catch (RSRuntimeException e) { 5275 } 5276 try { 5277 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5278 fail("should throw RSRuntimeException for CSYMM"); 5279 } catch (RSRuntimeException e) { 5280 } 5281 try { 5282 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5283 fail("should throw RSRuntimeException for ZSYMM"); 5284 } catch (RSRuntimeException e) { 5285 } 5286 } 5287 } 5288 } 5289 } 5290 } 5291 5292 private void L3_xSYMM_API(ArrayList<Allocation> mMatrix) { 5293 for (int Side : mSide) { 5294 for (int Uplo : mUplo) { 5295 xSYMM_API_test(Side, Uplo, mMatrix); 5296 } 5297 } 5298 } 5299 5300 public void test_L3_SSYMM_API() { 5301 L3_xSYMM_API(mMatrixS); 5302 } 5303 5304 public void test_L3_DSYMM_API() { 5305 L3_xSYMM_API(mMatrixD); 5306 } 5307 5308 public void test_L3_CSYMM_API() { 5309 L3_xSYMM_API(mMatrixC); 5310 } 5311 5312 public void test_L3_ZSYMM_API() { 5313 L3_xSYMM_API(mMatrixZ); 5314 } 5315 5316 5317 public void test_L3_SSYMM_Correctness() { 5318 int side = ScriptIntrinsicBLAS.LEFT; 5319 int uplo = ScriptIntrinsicBLAS.UPPER; 5320 5321 // Populate input allocations 5322 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 5323 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5324 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5325 matrixAS.copyFrom(mBLASData.L3_sSYMM_A_mm); 5326 matrixBS.copyFrom(mBLASData.L3_sSYMM_B_mn); 5327 matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn); 5328 5329 // Default case: SIDE = LEFT 5330 mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5331 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5332 matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_L); 5333 verifyMatrix(matrixCRef, matrixCS); 5334 5335 // SIDE = RIGHT 5336 side = ScriptIntrinsicBLAS.RIGHT; 5337 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5338 matrixAS.copyFrom(mBLASData.L3_sSYMM_A_nn); 5339 // Reload matrix C, since it was overwritten by BLAS. 5340 matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn); 5341 mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5342 matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_R); 5343 verifyMatrix(matrixCRef, matrixCS); 5344 5345 mRS.finish(); 5346 checkError(); 5347 } 5348 5349 public void test_L3_DSYMM_Correctness() { 5350 int side = ScriptIntrinsicBLAS.LEFT; 5351 int uplo = ScriptIntrinsicBLAS.UPPER; 5352 5353 // Populate input allocations 5354 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 5355 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5356 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5357 matrixAD.copyFrom(mBLASData.L3_dSYMM_A_mm); 5358 matrixBD.copyFrom(mBLASData.L3_dSYMM_B_mn); 5359 matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn); 5360 5361 // Default case: SIDE = LEFT 5362 mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5363 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5364 matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_L); 5365 verifyMatrix(matrixCRef, matrixCD); 5366 5367 // SIDE = RIGHT 5368 side = ScriptIntrinsicBLAS.RIGHT; 5369 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5370 matrixAD.copyFrom(mBLASData.L3_dSYMM_A_nn); 5371 // Reload matrix C, since it was overwritten by BLAS. 5372 matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn); 5373 mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5374 matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_R); 5375 verifyMatrix(matrixCRef, matrixCD); 5376 5377 mRS.finish(); 5378 checkError(); 5379 } 5380 5381 public void test_L3_CSYMM_Correctness() { 5382 int side = ScriptIntrinsicBLAS.LEFT; 5383 int uplo = ScriptIntrinsicBLAS.UPPER; 5384 5385 // Populate input allocations 5386 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 5387 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5388 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5389 matrixAC.copyFrom(mBLASData.L3_cSYMM_A_mm); 5390 matrixBC.copyFrom(mBLASData.L3_cSYMM_B_mn); 5391 matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn); 5392 5393 // Default case: SIDE = LEFT 5394 mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5395 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5396 matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_L); 5397 verifyMatrix(matrixCRef, matrixCC); 5398 5399 // SIDE = RIGHT 5400 side = ScriptIntrinsicBLAS.RIGHT; 5401 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5402 matrixAC.copyFrom(mBLASData.L3_cSYMM_A_nn); 5403 // Reload matrix C, since it was overwritten by BLAS. 5404 matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn); 5405 mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5406 matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_R); 5407 verifyMatrix(matrixCRef, matrixCC); 5408 5409 mRS.finish(); 5410 checkError(); 5411 } 5412 5413 public void test_L3_ZSYMM_Correctness() { 5414 int side = ScriptIntrinsicBLAS.LEFT; 5415 int uplo = ScriptIntrinsicBLAS.UPPER; 5416 5417 // Populate input allocations 5418 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 5419 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5420 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5421 matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_mm); 5422 matrixBZ.copyFrom(mBLASData.L3_zSYMM_B_mn); 5423 matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn); 5424 5425 // Default case: SIDE = LEFT 5426 mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5427 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5428 matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_L); 5429 verifyMatrix(matrixCRef, matrixCZ); 5430 5431 // SIDE = RIGHT 5432 side = ScriptIntrinsicBLAS.RIGHT; 5433 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5434 matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_nn); 5435 // Reload matrix C, since it was overwritten by BLAS. 5436 matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn); 5437 mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5438 matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_R); 5439 verifyMatrix(matrixCRef, matrixCZ); 5440 5441 mRS.finish(); 5442 checkError(); 5443 } 5444 5445 5446 private boolean validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) { 5447 if (!validateSide(Side)) { 5448 return false; 5449 } 5450 5451 if (!validateUplo(Uplo)) { 5452 return false; 5453 } 5454 5455 if (!A.getType().getElement().isCompatible(e) || 5456 !B.getType().getElement().isCompatible(e) || 5457 !C.getType().getElement().isCompatible(e)) { 5458 return false; 5459 } 5460 5461 // A must be square; can potentially be relaxed similar to TRSM 5462 int adim = A.getType().getX(); 5463 if (adim != A.getType().getY()) { 5464 return false; 5465 } 5466 if ((Side == ScriptIntrinsicBLAS.LEFT && adim != B.getType().getY()) || 5467 (Side == ScriptIntrinsicBLAS.RIGHT && adim != B.getType().getX())) { 5468 return false; 5469 } 5470 if (B.getType().getX() != C.getType().getX() || 5471 B.getType().getY() != C.getType().getY()) { 5472 return false; 5473 } 5474 5475 return true; 5476 } 5477 5478 private void xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) { 5479 for (Allocation matA : mMatrix) { 5480 for (Allocation matB : mMatrix) { 5481 for (Allocation matC : mMatrix) { 5482 Element elemA = matA.getType().getElement(); 5483 if (validateHEMM(elemA, Side, Uplo, matA, matB, matC)) { 5484 try { 5485 if (elemA.isCompatible(Element.F32_2(mRS))) { 5486 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5487 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5488 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5489 } 5490 } catch (RSRuntimeException e) { 5491 fail("should NOT throw RSRuntimeException"); 5492 } 5493 } else { 5494 try { 5495 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5496 fail("should throw RSRuntimeException for CHEMM"); 5497 } catch (RSRuntimeException e) { 5498 } 5499 try { 5500 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5501 fail("should throw RSRuntimeException for ZHEMM"); 5502 } catch (RSRuntimeException e) { 5503 } 5504 } 5505 } 5506 } 5507 } 5508 } 5509 5510 public void L3_xHEMM_API(ArrayList<Allocation> mMatrix) { 5511 for (int Side : mSide) { 5512 for (int Uplo : mUplo) { 5513 xHEMM_API_test(Side, Uplo, mMatrix); 5514 } 5515 } 5516 } 5517 5518 public void test_L3_CHEMM_API() { 5519 L3_xHEMM_API(mMatrixC); 5520 } 5521 5522 public void test_L3_ZHEMM_API() { 5523 L3_xHEMM_API(mMatrixZ); 5524 } 5525 5526 public void test_L3_CHEMM_Correctness() { 5527 int side = ScriptIntrinsicBLAS.LEFT; 5528 int uplo = ScriptIntrinsicBLAS.UPPER; 5529 5530 // Populate input allocations 5531 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 5532 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5533 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5534 matrixAC.copyFrom(mBLASData.L3_cHEMM_A_mm); 5535 matrixBC.copyFrom(mBLASData.L3_cHEMM_B_mn); 5536 matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn); 5537 5538 // Default case: SIDE = LEFT 5539 mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5540 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5541 matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_L); 5542 verifyMatrix(matrixCRef, matrixCC); 5543 5544 // SIDE = RIGHT 5545 side = ScriptIntrinsicBLAS.RIGHT; 5546 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5547 matrixAC.copyFrom(mBLASData.L3_cHEMM_A_nn); 5548 // Reload matrix C, since it was overwritten by BLAS. 5549 matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn); 5550 mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5551 matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_R); 5552 verifyMatrix(matrixCRef, matrixCC); 5553 5554 mRS.finish(); 5555 checkError(); 5556 } 5557 5558 public void test_L3_ZHEMM_Correctness() { 5559 int side = ScriptIntrinsicBLAS.LEFT; 5560 int uplo = ScriptIntrinsicBLAS.UPPER; 5561 5562 // Populate input allocations 5563 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 5564 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5565 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5566 matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_mm); 5567 matrixBZ.copyFrom(mBLASData.L3_zHEMM_B_mn); 5568 matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn); 5569 5570 // Default case: SIDE = LEFT 5571 mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5572 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5573 matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_L); 5574 verifyMatrix(matrixCRef, matrixCZ); 5575 5576 // SIDE = RIGHT 5577 side = ScriptIntrinsicBLAS.RIGHT; 5578 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5579 matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_nn); 5580 // Reload matrix C, since it was overwritten by BLAS. 5581 matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn); 5582 mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5583 matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_R); 5584 verifyMatrix(matrixCRef, matrixCZ); 5585 5586 mRS.finish(); 5587 checkError(); 5588 } 5589 5590 5591 5592 private boolean validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C) { 5593 boolean result = true; 5594 result &= validateTranspose(Trans); 5595 result &= validateUplo(Uplo); 5596 result &= validateL3(e, Trans, 0, 0, A, null, C); 5597 5598 return result; 5599 } 5600 5601 private void xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5602 for (Allocation matA : mMatrix) { 5603 for (Allocation matC : mMatrix) { 5604 Element elemA = matA.getType().getElement(); 5605 if (validateL3_xSYRK(elemA, Uplo, Trans, matA, matC)) { 5606 try { 5607 if (elemA.isCompatible(Element.F32(mRS))) { 5608 mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC); 5609 } else if (elemA.isCompatible(Element.F64(mRS))) { 5610 mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC); 5611 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5612 mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC); 5613 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5614 mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC); 5615 } 5616 } catch (RSRuntimeException e) { 5617 fail("should NOT throw RSRuntimeException"); 5618 } 5619 } else { 5620 try { 5621 mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC); 5622 fail("should throw RSRuntimeException for SSYRK"); 5623 } catch (RSRuntimeException e) { 5624 } 5625 try { 5626 mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC); 5627 fail("should throw RSRuntimeException for DSYRK"); 5628 } catch (RSRuntimeException e) { 5629 } 5630 try { 5631 mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC); 5632 fail("should throw RSRuntimeException for CSYRK"); 5633 } catch (RSRuntimeException e) { 5634 } 5635 try { 5636 mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC); 5637 fail("should throw RSRuntimeException for ZSYRK"); 5638 } catch (RSRuntimeException e) { 5639 } 5640 } 5641 } 5642 } 5643 } 5644 5645 public void L3_xSYRK_API(ArrayList<Allocation> mMatrix) { 5646 for (int Uplo : mUplo) { 5647 for (int Trans : mTranspose) { 5648 xSYRK_API_test(Uplo, Trans, mMatrix); 5649 } 5650 } 5651 } 5652 5653 public void test_L3_SSYRK_API() { 5654 L3_xSYRK_API(mMatrixS); 5655 } 5656 5657 public void test_L3_DSYRK_API() { 5658 L3_xSYRK_API(mMatrixD); 5659 } 5660 5661 public void test_L3_CSYRK_API() { 5662 L3_xSYRK_API(mMatrixC); 5663 } 5664 5665 public void test_L3_ZSYRK_API() { 5666 L3_xSYRK_API(mMatrixZ); 5667 } 5668 5669 5670 public void test_L3_SSYRK_Correctness() { 5671 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5672 int uplo = ScriptIntrinsicBLAS.UPPER; 5673 5674 // Populate input allocations 5675 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 5676 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5677 matrixAS.copyFrom(mBLASData.L3_sSYRK_A_nk); 5678 matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn); 5679 5680 // Default case: NO_TRANSPOSE 5681 mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS); 5682 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5683 matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_N); 5684 verifyMatrix(matrixCRef, matrixCS, true); 5685 5686 // Case: TRANSPOSE 5687 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 5688 matrixAS.copyFrom(mBLASData.L3_sSYRK_A_kn); 5689 // Reload matrix C, since it was overwritten by BLAS. 5690 matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn); 5691 5692 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5693 mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS); 5694 matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_T); 5695 verifyMatrix(matrixCRef, matrixCS, true); 5696 5697 mRS.finish(); 5698 checkError(); 5699 } 5700 5701 public void test_L3_DSYRK_Correctness() { 5702 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5703 int uplo = ScriptIntrinsicBLAS.UPPER; 5704 5705 // Populate input allocations 5706 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 5707 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5708 matrixAD.copyFrom(mBLASData.L3_dSYRK_A_nk); 5709 matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn); 5710 5711 // Default case: NO_TRANSPOSE 5712 mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD); 5713 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5714 matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_N); 5715 verifyMatrix(matrixCRef, matrixCD, true); 5716 5717 // Case: TRANSPOSE 5718 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 5719 matrixAD.copyFrom(mBLASData.L3_dSYRK_A_kn); 5720 // Reload matrix C, since it was overwritten by BLAS. 5721 matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn); 5722 5723 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5724 mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD); 5725 matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_T); 5726 verifyMatrix(matrixCRef, matrixCD, true); 5727 5728 mRS.finish(); 5729 checkError(); 5730 } 5731 5732 public void test_L3_CSYRK_Correctness() { 5733 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5734 int uplo = ScriptIntrinsicBLAS.UPPER; 5735 5736 // Populate input allocations 5737 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5738 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5739 matrixAC.copyFrom(mBLASData.L3_cSYRK_A_nk); 5740 matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn); 5741 5742 // Default case: NO_TRANSPOSE 5743 mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC); 5744 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5745 matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_N); 5746 verifyMatrix(matrixCRef, matrixCC, true); 5747 5748 // Case: TRANSPOSE 5749 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5750 matrixAC.copyFrom(mBLASData.L3_cSYRK_A_kn); 5751 // Reload matrix C, since it was overwritten by BLAS. 5752 matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn); 5753 5754 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5755 mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC); 5756 matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_T); 5757 verifyMatrix(matrixCRef, matrixCC, true); 5758 5759 mRS.finish(); 5760 checkError(); 5761 } 5762 5763 public void test_L3_ZSYRK_Correctness() { 5764 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5765 int uplo = ScriptIntrinsicBLAS.UPPER; 5766 5767 // Populate input allocations 5768 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5769 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5770 matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_nk); 5771 matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn); 5772 5773 // Default case: NO_TRANSPOSE 5774 mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ); 5775 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5776 matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_N); 5777 verifyMatrix(matrixCRef, matrixCZ, true); 5778 5779 // Case: TRANSPOSE 5780 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5781 matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_kn); 5782 // Reload matrix C, since it was overwritten by BLAS. 5783 matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn); 5784 5785 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5786 mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ); 5787 matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_T); 5788 verifyMatrix(matrixCRef, matrixCZ, true); 5789 5790 mRS.finish(); 5791 checkError(); 5792 } 5793 5794 5795 private boolean validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C) { 5796 if (!validateUplo(Uplo)) { 5797 return false; 5798 } 5799 if (!A.getType().getElement().isCompatible(e) || 5800 !C.getType().getElement().isCompatible(e)) { 5801 return false; 5802 } 5803 if (!validateConjTranspose(Trans)) { 5804 return false; 5805 } 5806 int cdim = C.getType().getX(); 5807 if (cdim != C.getType().getY()) { 5808 return false; 5809 } 5810 if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 5811 if (cdim != A.getType().getY()) { 5812 return false; 5813 } 5814 } else { 5815 if (cdim != A.getType().getX()) { 5816 return false; 5817 } 5818 } 5819 return true; 5820 } 5821 5822 private void xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5823 for (Allocation matA : mMatrix) { 5824 for (Allocation matC : mMatrix) { 5825 Element elemA = matA.getType().getElement(); 5826 if (validateHERK(elemA, Uplo, Trans, matA, matC)) { 5827 try { 5828 if (elemA.isCompatible(Element.F32_2(mRS))) { 5829 mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC); 5830 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5831 mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC); 5832 } 5833 } catch (RSRuntimeException e) { 5834 fail("should NOT throw RSRuntimeException"); 5835 } 5836 } else { 5837 try { 5838 mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC); 5839 fail("should throw RSRuntimeException for CHERK"); 5840 } catch (RSRuntimeException e) { 5841 } 5842 try { 5843 mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC); 5844 fail("should throw RSRuntimeException for ZHERK"); 5845 } catch (RSRuntimeException e) { 5846 } 5847 } 5848 } 5849 } 5850 } 5851 5852 public void L3_xHERK_API(ArrayList<Allocation> mMatrix) { 5853 for (int Uplo : mUplo) { 5854 for (int Trans : mTranspose) { 5855 xHERK_API_test(Uplo, Trans, mMatrix); 5856 } 5857 } 5858 } 5859 5860 public void test_L3_CHERK_API() { 5861 L3_xHERK_API(mMatrixC); 5862 } 5863 5864 public void test_L3_ZHERK_API() { 5865 L3_xHERK_API(mMatrixZ); 5866 } 5867 5868 public void test_L3_CHERK_Correctness() { 5869 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5870 int uplo = ScriptIntrinsicBLAS.UPPER; 5871 5872 // Populate input allocations 5873 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5874 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5875 matrixAC.copyFrom(mBLASData.L3_cHERK_A_nk); 5876 matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn); 5877 5878 // Default case: NO_TRANSPOSE 5879 mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC); 5880 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5881 matrixCRef.copyFrom(mBLASData.L3_cHERK_o_N); 5882 verifyMatrix(matrixCRef, matrixCC, true); 5883 5884 // Case: TRANSPOSE 5885 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5886 matrixAC.copyFrom(mBLASData.L3_cHERK_A_kn); 5887 // Reload matrix C, since it was overwritten by BLAS. 5888 matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn); 5889 5890 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5891 mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC); 5892 matrixCRef.copyFrom(mBLASData.L3_cHERK_o_H); 5893 verifyMatrix(matrixCRef, matrixCC, true); 5894 5895 mRS.finish(); 5896 checkError(); 5897 } 5898 5899 public void test_L3_ZHERK_Correctness() { 5900 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5901 int uplo = ScriptIntrinsicBLAS.UPPER; 5902 5903 // Populate input allocations 5904 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5905 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5906 matrixAZ.copyFrom(mBLASData.L3_zHERK_A_nk); 5907 matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn); 5908 5909 // Default case: NO_TRANSPOSE 5910 mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ); 5911 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5912 matrixCRef.copyFrom(mBLASData.L3_zHERK_o_N); 5913 verifyMatrix(matrixCRef, matrixCZ, true); 5914 5915 // Case: TRANSPOSE 5916 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5917 matrixAZ.copyFrom(mBLASData.L3_zHERK_A_kn); 5918 // Reload matrix C, since it was overwritten by BLAS. 5919 matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn); 5920 5921 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5922 mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ); 5923 matrixCRef.copyFrom(mBLASData.L3_zHERK_o_H); 5924 verifyMatrix(matrixCRef, matrixCZ, true); 5925 5926 mRS.finish(); 5927 checkError(); 5928 } 5929 5930 5931 private boolean validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) { 5932 if (!validateTranspose(Trans)) { 5933 return false; 5934 } 5935 if (!validateUplo(Uplo)) { 5936 return false; 5937 } 5938 5939 if (!A.getType().getElement().isCompatible(e) || 5940 !B.getType().getElement().isCompatible(e) || 5941 !C.getType().getElement().isCompatible(e)) { 5942 return false; 5943 } 5944 int Cdim = -1; 5945 // A is n x k if no transpose, k x n if transpose 5946 // C is n x n 5947 if (Trans == ScriptIntrinsicBLAS.TRANSPOSE) { 5948 // check columns versus C 5949 Cdim = A.getType().getX(); 5950 } else { 5951 // check rows versus C 5952 Cdim = A.getType().getY(); 5953 } 5954 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 5955 return false; 5956 } 5957 // A dims == B dims 5958 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 5959 return false; 5960 } 5961 return true; 5962 } 5963 5964 private void xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5965 for (Allocation matA : mMatrix) { 5966 for (Allocation matB : mMatrix) { 5967 for (Allocation matC : mMatrix) { 5968 Element elemA = matA.getType().getElement(); 5969 if (validateSYR2K(elemA, Uplo, Trans, matA, matB, matC)) { 5970 try { 5971 if (elemA.isCompatible(Element.F32(mRS))) { 5972 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC); 5973 } else if (elemA.isCompatible(Element.F64(mRS))) { 5974 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC); 5975 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5976 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC); 5977 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5978 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC); 5979 } 5980 } catch (RSRuntimeException e) { 5981 fail("should NOT throw RSRuntimeException"); 5982 } 5983 } else { 5984 try { 5985 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC); 5986 fail("should throw RSRuntimeException for SSYR2K"); 5987 } catch (RSRuntimeException e) { 5988 } 5989 try { 5990 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC); 5991 fail("should throw RSRuntimeException for DSYR2K"); 5992 } catch (RSRuntimeException e) { 5993 } 5994 try { 5995 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC); 5996 fail("should throw RSRuntimeException for CSYR2K"); 5997 } catch (RSRuntimeException e) { 5998 } 5999 try { 6000 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC); 6001 fail("should throw RSRuntimeException for ZSYR2K"); 6002 } catch (RSRuntimeException e) { 6003 } 6004 } 6005 } 6006 } 6007 } 6008 } 6009 6010 public void L3_xSYR2K_API(ArrayList<Allocation> mMatrix) { 6011 for (int Uplo : mUplo) { 6012 for (int Trans : mTranspose) { 6013 xSYR2K_API_test(Uplo, Trans, mMatrix); 6014 } 6015 } 6016 } 6017 6018 public void test_L3_SSYR2K_API() { 6019 L3_xSYR2K_API(mMatrixS); 6020 } 6021 6022 public void test_L3_DSYR2K_API() { 6023 L3_xSYR2K_API(mMatrixD); 6024 } 6025 6026 public void test_L3_CSYR2K_API() { 6027 L3_xSYR2K_API(mMatrixC); 6028 } 6029 6030 public void test_L3_ZSYR2K_API() { 6031 L3_xSYR2K_API(mMatrixZ); 6032 } 6033 6034 6035 public void test_L3_SSYR2K_Correctness() { 6036 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6037 int uplo = ScriptIntrinsicBLAS.UPPER; 6038 6039 // Populate input allocations 6040 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 6041 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 6042 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6043 matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_nk); 6044 matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_nk); 6045 matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn); 6046 6047 // Default case: NO_TRANSPOSE 6048 mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS); 6049 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6050 matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_N); 6051 verifyMatrix(matrixCRef, matrixCS, true); 6052 6053 // Case: TRANSPOSE 6054 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 6055 matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 6056 matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_kn); 6057 matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_kn); 6058 // Reload matrix C, since it was overwritten by BLAS. 6059 matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn); 6060 6061 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6062 mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS); 6063 matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_T); 6064 verifyMatrix(matrixCRef, matrixCS, true); 6065 6066 mRS.finish(); 6067 checkError(); 6068 } 6069 6070 public void test_L3_DSYR2K_Correctness() { 6071 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6072 int uplo = ScriptIntrinsicBLAS.UPPER; 6073 6074 // Populate input allocations 6075 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 6076 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 6077 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6078 matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_nk); 6079 matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_nk); 6080 matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn); 6081 6082 // Default case: NO_TRANSPOSE 6083 mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD); 6084 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6085 matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_N); 6086 verifyMatrix(matrixCRef, matrixCD, true); 6087 6088 // Case: TRANSPOSE 6089 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 6090 matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 6091 matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_kn); 6092 matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_kn); 6093 // Reload matrix C, since it was overwritten by BLAS. 6094 matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn); 6095 6096 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6097 mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD); 6098 matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_T); 6099 verifyMatrix(matrixCRef, matrixCD, true); 6100 6101 mRS.finish(); 6102 checkError(); 6103 } 6104 6105 public void test_L3_CSYR2K_Correctness() { 6106 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6107 int uplo = ScriptIntrinsicBLAS.UPPER; 6108 6109 // Populate input allocations 6110 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6111 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6112 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6113 matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_nk); 6114 matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_nk); 6115 matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn); 6116 6117 // Default case: NO_TRANSPOSE 6118 mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC); 6119 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6120 matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_N); 6121 verifyMatrix(matrixCRef, matrixCC, true); 6122 6123 // Case: TRANSPOSE 6124 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6125 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6126 matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_kn); 6127 matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_kn); 6128 // Reload matrix C, since it was overwritten by BLAS. 6129 matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn); 6130 6131 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6132 mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC); 6133 matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_T); 6134 verifyMatrix(matrixCRef, matrixCC, true); 6135 6136 mRS.finish(); 6137 checkError(); 6138 } 6139 6140 public void test_L3_ZSYR2K_Correctness() { 6141 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6142 int uplo = ScriptIntrinsicBLAS.UPPER; 6143 6144 // Populate input allocations 6145 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6146 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6147 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6148 matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_nk); 6149 matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_nk); 6150 matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn); 6151 6152 // Default case: NO_TRANSPOSE 6153 mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 6154 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6155 matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_N); 6156 verifyMatrix(matrixCRef, matrixCZ, true); 6157 6158 // Case: TRANSPOSE 6159 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6160 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6161 matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_kn); 6162 matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_kn); 6163 // Reload matrix C, since it was overwritten by BLAS. 6164 matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn); 6165 6166 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6167 mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 6168 matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_T); 6169 verifyMatrix(matrixCRef, matrixCZ, true); 6170 6171 mRS.finish(); 6172 checkError(); 6173 } 6174 6175 6176 private boolean validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) { 6177 if (!validateUplo(Uplo)) { 6178 return false; 6179 } 6180 if (!A.getType().getElement().isCompatible(e) || 6181 !B.getType().getElement().isCompatible(e) || 6182 !C.getType().getElement().isCompatible(e)) { 6183 return false; 6184 } 6185 if (!validateConjTranspose(Trans)) { 6186 return false; 6187 } 6188 int cdim = C.getType().getX(); 6189 if (cdim != C.getType().getY()) { 6190 return false; 6191 } 6192 if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 6193 if (A.getType().getY() != cdim) { 6194 return false; 6195 } 6196 } else { 6197 if (A.getType().getX() != cdim) { 6198 return false; 6199 } 6200 } 6201 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 6202 return false; 6203 } 6204 return true; 6205 } 6206 6207 private void xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 6208 for (Allocation matA : mMatrix) { 6209 for (Allocation matB : mMatrix) { 6210 for (Allocation matC : mMatrix) { 6211 Element elemA = matA.getType().getElement(); 6212 if (validateHER2K(elemA, Uplo, Trans, matA, matB, matC)) { 6213 try { 6214 if (elemA.isCompatible(Element.F32_2(mRS))) { 6215 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC); 6216 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6217 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC); 6218 } 6219 } catch (RSRuntimeException e) { 6220 fail("should NOT throw RSRuntimeException"); 6221 } 6222 } else { 6223 try { 6224 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC); 6225 fail("should throw RSRuntimeException for CHER2K"); 6226 } catch (RSRuntimeException e) { 6227 } 6228 try { 6229 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC); 6230 fail("should throw RSRuntimeException for ZHER2K"); 6231 } catch (RSRuntimeException e) { 6232 } 6233 } 6234 } 6235 } 6236 } 6237 } 6238 6239 public void L3_xHER2K_API(ArrayList<Allocation> mMatrix) { 6240 for (int Uplo : mUplo) { 6241 for (int Trans : mTranspose) { 6242 xHER2K_API_test(Uplo, Trans, mMatrix); 6243 } 6244 } 6245 } 6246 6247 public void test_L3_CHER2K_API() { 6248 L3_xHER2K_API(mMatrixC); 6249 } 6250 6251 public void test_L3_ZHER2K_API() { 6252 L3_xHER2K_API(mMatrixZ); 6253 } 6254 6255 public void test_L3_CHER2K_Correctness() { 6256 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6257 int uplo = ScriptIntrinsicBLAS.UPPER; 6258 6259 // Populate input allocations 6260 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6261 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6262 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6263 matrixAC.copyFrom(mBLASData.L3_cHER2K_A_nk); 6264 matrixBC.copyFrom(mBLASData.L3_cHER2K_B_nk); 6265 matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn); 6266 6267 // Default case: NO_TRANSPOSE 6268 mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC); 6269 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6270 matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_N); 6271 verifyMatrix(matrixCRef, matrixCC, true); 6272 6273 // Case: TRANSPOSE 6274 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6275 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6276 matrixAC.copyFrom(mBLASData.L3_cHER2K_A_kn); 6277 matrixBC.copyFrom(mBLASData.L3_cHER2K_B_kn); 6278 // Reload matrix C, since it was overwritten by BLAS. 6279 matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn); 6280 6281 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 6282 mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC); 6283 matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_H); 6284 verifyMatrix(matrixCRef, matrixCC, true); 6285 6286 mRS.finish(); 6287 checkError(); 6288 } 6289 6290 public void test_L3_ZHER2K_Correctness() { 6291 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6292 int uplo = ScriptIntrinsicBLAS.UPPER; 6293 6294 // Populate input allocations 6295 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6296 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6297 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6298 matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_nk); 6299 matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_nk); 6300 matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn); 6301 6302 // Default case: NO_TRANSPOSE 6303 mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ); 6304 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6305 matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_N); 6306 verifyMatrix(matrixCRef, matrixCZ, true); 6307 6308 // Case: TRANSPOSE 6309 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6310 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6311 matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_kn); 6312 matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_kn); 6313 // Reload matrix C, since it was overwritten by BLAS. 6314 matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn); 6315 6316 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 6317 mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ); 6318 matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_H); 6319 verifyMatrix(matrixCRef, matrixCZ, true); 6320 6321 mRS.finish(); 6322 checkError(); 6323 } 6324 6325 6326 private boolean validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) { 6327 if (!validateSide(Side)) { 6328 return false; 6329 } 6330 if (!validateUplo(Uplo)) { 6331 return false; 6332 } 6333 if (!validateTranspose(TransA)) { 6334 return false; 6335 } 6336 if (!validateDiag(Diag)) { 6337 return false; 6338 } 6339 int aM = -1, aN = -1, bM = -1, bN = -1; 6340 if (!A.getType().getElement().isCompatible(e) || 6341 !B.getType().getElement().isCompatible(e)) { 6342 return false; 6343 } 6344 6345 aM = A.getType().getY(); 6346 aN = A.getType().getX(); 6347 if (aM != aN) { 6348 return false; 6349 } 6350 6351 bM = B.getType().getY(); 6352 bN = B.getType().getX(); 6353 if (Side == ScriptIntrinsicBLAS.LEFT) { 6354 if (aN != bM) { 6355 return false; 6356 } 6357 } else { 6358 if (bN != aM) { 6359 return false; 6360 } 6361 } 6362 return true; 6363 } 6364 6365 private void xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) { 6366 for (Allocation matA : mMatrix) { 6367 for (Allocation matB : mMatrix) { 6368 Element elemA = matA.getType().getElement(); 6369 if (validateTRMM(elemA, Side, Uplo, TransA, Diag, matA, matB)) { 6370 try { 6371 if (elemA.isCompatible(Element.F32(mRS))) { 6372 mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6373 } else if (elemA.isCompatible(Element.F64(mRS))) { 6374 mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6375 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 6376 mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6377 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6378 mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6379 } 6380 } catch (RSRuntimeException e) { 6381 fail("should NOT throw RSRuntimeException"); 6382 } 6383 } else { 6384 try { 6385 mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6386 fail("should throw RSRuntimeException for STRMM"); 6387 } catch (RSRuntimeException e) { 6388 } 6389 try { 6390 mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6391 fail("should throw RSRuntimeException for DTRMM"); 6392 } catch (RSRuntimeException e) { 6393 } 6394 try { 6395 mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6396 fail("should throw RSRuntimeException for CTRMM"); 6397 } catch (RSRuntimeException e) { 6398 } 6399 try { 6400 mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6401 fail("should throw RSRuntimeException for ZTRMM"); 6402 } catch (RSRuntimeException e) { 6403 } 6404 } 6405 } 6406 } 6407 } 6408 6409 public void L3_xTRMM_API(ArrayList<Allocation> mMatrix) { 6410 for (int Side : mSide) { 6411 for (int Uplo : mUplo) { 6412 for (int TransA : mTranspose) { 6413 for (int Diag : mDiag) { 6414 xTRMM_API_test(Side, Uplo, TransA, Diag, mMatrix); 6415 } 6416 } 6417 } 6418 } 6419 } 6420 6421 public void test_L3_STRMM_API() { 6422 L3_xTRMM_API(mMatrixS); 6423 } 6424 6425 public void test_L3_DTRMM_API() { 6426 L3_xTRMM_API(mMatrixD); 6427 } 6428 6429 public void test_L3_CTRMM_API() { 6430 L3_xTRMM_API(mMatrixC); 6431 } 6432 6433 public void test_L3_ZTRMM_API() { 6434 L3_xTRMM_API(mMatrixZ); 6435 } 6436 6437 6438 public void test_L3_STRMM_Correctness() { 6439 int side = ScriptIntrinsicBLAS.LEFT; 6440 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6441 int uplo = ScriptIntrinsicBLAS.UPPER; 6442 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6443 6444 // Populate input allocations 6445 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 6446 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6447 matrixAS.copyFrom(mBLASData.L3_sTRMM_A_mm); 6448 matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn); 6449 6450 // Default case: LEFT, UPPER, NO_TRANSPOSE 6451 mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6452 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6453 matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_LUN); 6454 verifyMatrix(matrixBRef, matrixBS); 6455 6456 // Case: RIGHT, LOWER, TRANSPOSE 6457 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6458 matrixAS.copyFrom(mBLASData.L3_sTRMM_A_nn); 6459 // Reload matrix B, since it was overwritten by BLAS. 6460 matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn); 6461 6462 side = ScriptIntrinsicBLAS.RIGHT; 6463 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6464 uplo = ScriptIntrinsicBLAS.LOWER; 6465 mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6466 matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_RLT); 6467 verifyMatrix(matrixBRef, matrixBS); 6468 6469 mRS.finish(); 6470 checkError(); 6471 } 6472 6473 public void test_L3_DTRMM_Correctness() { 6474 int side = ScriptIntrinsicBLAS.LEFT; 6475 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6476 int uplo = ScriptIntrinsicBLAS.UPPER; 6477 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6478 6479 // Populate input allocations 6480 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 6481 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6482 matrixAD.copyFrom(mBLASData.L3_dTRMM_A_mm); 6483 matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn); 6484 6485 // Default case: LEFT, UPPER, NO_TRANSPOSE 6486 mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6487 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6488 matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_LUN); 6489 verifyMatrix(matrixBRef, matrixBD); 6490 6491 // Case: RIGHT, LOWER, TRANSPOSE 6492 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6493 matrixAD.copyFrom(mBLASData.L3_dTRMM_A_nn); 6494 // Reload matrix B, since it was overwritten by BLAS. 6495 matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn); 6496 6497 side = ScriptIntrinsicBLAS.RIGHT; 6498 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6499 uplo = ScriptIntrinsicBLAS.LOWER; 6500 mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6501 matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_RLT); 6502 verifyMatrix(matrixBRef, matrixBD); 6503 6504 mRS.finish(); 6505 checkError(); 6506 } 6507 6508 public void test_L3_CTRMM_Correctness() { 6509 int side = ScriptIntrinsicBLAS.LEFT; 6510 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6511 int uplo = ScriptIntrinsicBLAS.UPPER; 6512 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6513 6514 // Populate input allocations 6515 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 6516 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6517 matrixAC.copyFrom(mBLASData.L3_cTRMM_A_mm); 6518 matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn); 6519 6520 // Default case: LEFT, UPPER, NO_TRANSPOSE 6521 mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6522 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6523 matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_LUN); 6524 verifyMatrix(matrixBRef, matrixBC); 6525 6526 // Case: RIGHT, LOWER, TRANSPOSE 6527 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6528 matrixAC.copyFrom(mBLASData.L3_cTRMM_A_nn); 6529 // Reload matrix B, since it was overwritten by BLAS. 6530 matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn); 6531 6532 side = ScriptIntrinsicBLAS.RIGHT; 6533 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6534 uplo = ScriptIntrinsicBLAS.LOWER; 6535 mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6536 matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_RLT); 6537 verifyMatrix(matrixBRef, matrixBC); 6538 6539 mRS.finish(); 6540 checkError(); 6541 } 6542 6543 public void test_L3_ZTRMM_Correctness() { 6544 int side = ScriptIntrinsicBLAS.LEFT; 6545 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6546 int uplo = ScriptIntrinsicBLAS.UPPER; 6547 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6548 6549 // Populate input allocations 6550 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 6551 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6552 matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_mm); 6553 matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn); 6554 6555 // Default case: LEFT, UPPER, NO_TRANSPOSE 6556 mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6557 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6558 matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_LUN); 6559 verifyMatrix(matrixBRef, matrixBZ); 6560 6561 // Case: RIGHT, LOWER, TRANSPOSE 6562 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6563 matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_nn); 6564 // Reload matrix B, since it was overwritten by BLAS. 6565 matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn); 6566 6567 side = ScriptIntrinsicBLAS.RIGHT; 6568 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6569 uplo = ScriptIntrinsicBLAS.LOWER; 6570 mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6571 matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_RLT); 6572 verifyMatrix(matrixBRef, matrixBZ); 6573 6574 mRS.finish(); 6575 checkError(); 6576 } 6577 6578 6579 private boolean validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) { 6580 int adim = -1, bM = -1, bN = -1; 6581 if (!validateSide(Side)) { 6582 return false; 6583 } 6584 if (!validateTranspose(TransA)) { 6585 return false; 6586 } 6587 if (!validateUplo(Uplo)) { 6588 return false; 6589 } 6590 if (!validateDiag(Diag)) { 6591 return false; 6592 } 6593 if (!A.getType().getElement().isCompatible(e) || 6594 !B.getType().getElement().isCompatible(e)) { 6595 return false; 6596 } 6597 adim = A.getType().getX(); 6598 if (adim != A.getType().getY()) { 6599 // this may be unnecessary, the restriction could potentially be relaxed 6600 // A needs to contain at least that symmetric matrix but could theoretically be larger 6601 // for now we assume adapters are sufficient, will reevaluate in the future 6602 return false; 6603 } 6604 bM = B.getType().getY(); 6605 bN = B.getType().getX(); 6606 if (Side == ScriptIntrinsicBLAS.LEFT) { 6607 // A is M*M 6608 if (adim != bM) { 6609 return false; 6610 } 6611 } else { 6612 // A is N*N 6613 if (adim != bN) { 6614 return false; 6615 } 6616 } 6617 return true; 6618 } 6619 6620 private void xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) { 6621 for (Allocation matA : mMatrix) { 6622 for (Allocation matB : mMatrix) { 6623 Element elemA = matA.getType().getElement(); 6624 if (validateTRSM(elemA, Side, Uplo, TransA, Diag, matA, matB)) { 6625 try { 6626 if (elemA.isCompatible(Element.F32(mRS))) { 6627 mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6628 } else if (elemA.isCompatible(Element.F64(mRS))) { 6629 mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6630 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 6631 mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6632 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6633 mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6634 } 6635 } catch (RSRuntimeException e) { 6636 fail("should NOT throw RSRuntimeException"); 6637 } 6638 } else { 6639 try { 6640 mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6641 fail("should throw RSRuntimeException for STRSM"); 6642 } catch (RSRuntimeException e) { 6643 } 6644 try { 6645 mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6646 fail("should throw RSRuntimeException for DTRSM"); 6647 } catch (RSRuntimeException e) { 6648 } 6649 try { 6650 mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6651 fail("should throw RSRuntimeException for CTRSM"); 6652 } catch (RSRuntimeException e) { 6653 } 6654 try { 6655 mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6656 fail("should throw RSRuntimeException for ZTRSM"); 6657 } catch (RSRuntimeException e) { 6658 } 6659 } 6660 } 6661 } 6662 } 6663 6664 public void L3_xTRSM_API(ArrayList<Allocation> mMatrix) { 6665 for (int Side : mSide) { 6666 for (int Uplo : mUplo) { 6667 for (int TransA : mTranspose) { 6668 for (int Diag : mDiag) { 6669 xTRSM_API_test(Side, Uplo, TransA, Diag, mMatrix); 6670 } 6671 } 6672 } 6673 } 6674 } 6675 6676 public void test_L3_STRSM_API() { 6677 L3_xTRSM_API(mMatrixS); 6678 } 6679 6680 public void test_L3_DTRSM_API() { 6681 L3_xTRSM_API(mMatrixD); 6682 } 6683 6684 public void test_L3_CTRSM_API() { 6685 L3_xTRSM_API(mMatrixC); 6686 } 6687 6688 public void test_L3_ZTRSM_API() { 6689 L3_xTRSM_API(mMatrixZ); 6690 } 6691 6692 public void test_L3_STRSM_Correctness() { 6693 int side = ScriptIntrinsicBLAS.LEFT; 6694 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6695 int uplo = ScriptIntrinsicBLAS.UPPER; 6696 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6697 6698 // Populate input allocations 6699 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 6700 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6701 matrixAS.copyFrom(mBLASData.L3_sTRSM_A_mm); 6702 matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn); 6703 6704 // Default case: LEFT, UPPER, NO_TRANSPOSE 6705 mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6706 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6707 matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_LUN); 6708 verifyMatrix(matrixBRef, matrixBS); 6709 6710 // Case: RIGHT, LOWER, TRANSPOSE 6711 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6712 matrixAS.copyFrom(mBLASData.L3_sTRSM_A_nn); 6713 // Reload matrix B, since it was overwritten by BLAS. 6714 matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn); 6715 6716 side = ScriptIntrinsicBLAS.RIGHT; 6717 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6718 uplo = ScriptIntrinsicBLAS.LOWER; 6719 mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6720 matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_RLT); 6721 verifyMatrix(matrixBRef, matrixBS); 6722 6723 mRS.finish(); 6724 checkError(); 6725 } 6726 6727 public void test_L3_DTRSM_Correctness() { 6728 int side = ScriptIntrinsicBLAS.LEFT; 6729 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6730 int uplo = ScriptIntrinsicBLAS.UPPER; 6731 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6732 6733 // Populate input allocations 6734 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 6735 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6736 matrixAD.copyFrom(mBLASData.L3_dTRSM_A_mm); 6737 matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn); 6738 6739 // Default case: LEFT, UPPER, NO_TRANSPOSE 6740 mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6741 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6742 matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_LUN); 6743 verifyMatrix(matrixBRef, matrixBD); 6744 6745 // Case: RIGHT, LOWER, TRANSPOSE 6746 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6747 matrixAD.copyFrom(mBLASData.L3_dTRSM_A_nn); 6748 // Reload matrix B, since it was overwritten by BLAS. 6749 matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn); 6750 6751 side = ScriptIntrinsicBLAS.RIGHT; 6752 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6753 uplo = ScriptIntrinsicBLAS.LOWER; 6754 mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6755 matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_RLT); 6756 verifyMatrix(matrixBRef, matrixBD); 6757 6758 mRS.finish(); 6759 checkError(); 6760 } 6761 6762 public void test_L3_CTRSM_Correctness() { 6763 int side = ScriptIntrinsicBLAS.LEFT; 6764 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6765 int uplo = ScriptIntrinsicBLAS.UPPER; 6766 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6767 6768 // Populate input allocations 6769 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 6770 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6771 matrixAC.copyFrom(mBLASData.L3_cTRSM_A_mm); 6772 matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn); 6773 6774 // Default case: LEFT, UPPER, NO_TRANSPOSE 6775 mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6776 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6777 matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_LUN); 6778 verifyMatrix(matrixBRef, matrixBC); 6779 6780 // Case: RIGHT, LOWER, TRANSPOSE 6781 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6782 matrixAC.copyFrom(mBLASData.L3_cTRSM_A_nn); 6783 // Reload matrix B, since it was overwritten by BLAS. 6784 matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn); 6785 6786 side = ScriptIntrinsicBLAS.RIGHT; 6787 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6788 uplo = ScriptIntrinsicBLAS.LOWER; 6789 mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6790 matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_RLT); 6791 verifyMatrix(matrixBRef, matrixBC); 6792 6793 mRS.finish(); 6794 checkError(); 6795 } 6796 6797 public void test_L3_ZTRSM_Correctness() { 6798 int side = ScriptIntrinsicBLAS.LEFT; 6799 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6800 int uplo = ScriptIntrinsicBLAS.UPPER; 6801 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6802 6803 // Populate input allocations 6804 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 6805 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6806 matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_mm); 6807 matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn); 6808 6809 // Default case: LEFT, UPPER, NO_TRANSPOSE 6810 mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6811 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6812 matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_LUN); 6813 verifyMatrix(matrixBRef, matrixBZ); 6814 6815 // Case: RIGHT, LOWER, TRANSPOSE 6816 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6817 matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_nn); 6818 // Reload matrix B, since it was overwritten by BLAS. 6819 matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn); 6820 6821 side = ScriptIntrinsicBLAS.RIGHT; 6822 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6823 uplo = ScriptIntrinsicBLAS.LOWER; 6824 mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6825 matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_RLT); 6826 verifyMatrix(matrixBRef, matrixBZ); 6827 6828 mRS.finish(); 6829 checkError(); 6830 } 6831 } 6832