Home | History | Annotate | Download | only in cts
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.cts.rsblas;
     18 
     19 import android.renderscript.*;
     20 import android.util.Log;
     21 import java.util.ArrayList;
     22 
     23 public class IntrinsicBLAS extends IntrinsicBase {
     24     private ScriptIntrinsicBLAS mBLAS;
     25     private BLASData mBLASData;
     26     private boolean mInitialized = false;
     27 
     28     private ArrayList<Allocation> mMatrixS;
     29     private final float alphaS = 1.0f;
     30     private final float betaS = 1.0f;
     31 
     32     private ArrayList<Allocation> mMatrixD;
     33     private final double alphaD = 1.0;
     34     private final double betaD = 1.0;
     35 
     36     private ArrayList<Allocation> mMatrixC;
     37     private final Float2 alphaC = new Float2(1.0f, 0.0f);
     38     private final Float2 betaC = new Float2(1.0f, 0.0f);
     39 
     40     private ArrayList<Allocation> mMatrixZ;
     41     private final Double2 alphaZ = new Double2(1.0, 0.0);
     42     private final Double2 betaZ = new Double2(1.0, 0.0);
     43 
     44     private int[] mTranspose = {ScriptIntrinsicBLAS.NO_TRANSPOSE,
     45                                 ScriptIntrinsicBLAS.TRANSPOSE,
     46                                 ScriptIntrinsicBLAS.CONJ_TRANSPOSE,
     47                                 0};
     48 
     49     private int[] mUplo = {ScriptIntrinsicBLAS.UPPER,
     50                            ScriptIntrinsicBLAS.LOWER,
     51                            0};
     52 
     53     private int[] mDiag = {ScriptIntrinsicBLAS.NON_UNIT,
     54                            ScriptIntrinsicBLAS.UNIT,
     55                            0};
     56 
     57     private int[] mSide = {ScriptIntrinsicBLAS.LEFT,
     58                            ScriptIntrinsicBLAS.RIGHT,
     59                            0};
     60 
     61     private int[] mInc = {0, 1, 2};
     62     private int[] mK = {-1, 0, 1};
     63     private int[] mDim = {1, 2, 3, 256};
     64 
     65     @Override
     66     protected void setUp() throws Exception {
     67         super.setUp();
     68 
     69         // Now populate the test Matrixes and Vectors.
     70         if (!mInitialized) {
     71             mBLASData = new BLASData();
     72             mBLASData.loadData(mCtx);
     73             mBLAS = ScriptIntrinsicBLAS.create(mRS);
     74             mMatrixS = new ArrayList<Allocation>();
     75             mMatrixD = new ArrayList<Allocation>();
     76             mMatrixC = new ArrayList<Allocation>();
     77             mMatrixZ = new ArrayList<Allocation>();
     78             for (int x : mDim) {
     79                 for (int y : mDim) {
     80                     mMatrixS.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), x, y)));
     81                     mMatrixD.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), x, y)));
     82                     mMatrixC.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), x, y)));
     83                     mMatrixZ.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), x, y)));
     84                 }
     85             }
     86             // Also need Allocation with mismatch Element.
     87             Allocation misAlloc = Allocation.createTyped(mRS, Type.createXY(mRS, Element.U8(mRS), 1, 1));
     88             mMatrixS.add(misAlloc);
     89             mMatrixD.add(misAlloc);
     90             mMatrixC.add(misAlloc);
     91             mMatrixZ.add(misAlloc);
     92             mInitialized = true;
     93         }
     94     }
     95 
     96     @Override
     97     protected void tearDown() throws Exception {
     98         super.tearDown();
     99     }
    100 
    101     // Calculate the square of the L2 norm of a matrix.
    102     private double calcL2Norm(float[] input) {
    103         double l2Norm = 0;
    104         for (int i = 0; i < input.length; ++i) {
    105             l2Norm += input[i] * input[i];
    106         }
    107         return l2Norm;
    108     }
    109 
    110     private double calcL2Norm(double[] input) {
    111         double l2Norm = 0;
    112         for (int i = 0; i < input.length; ++i) {
    113             l2Norm += input[i] * input[i];
    114         }
    115         return l2Norm;
    116     }
    117 
    118     // Routine to verify if matrix are equivalent.
    119     private void verifyMatrix(Allocation ref, Allocation out) {
    120         verifyMatrix(ref, out, false);
    121     }
    122 
    123     // Use L2 norm of a matrix as the scale to determine whether two matrices are equivalent:
    124     // if the absolute square error of any elements is smaller than the average L2 Norm
    125     // per element times an allowed error range (1e-6), then the two matrices are considered equivalent.
    126     // Criterion: (a[i,j] - a'[i,j])^2 < epsilon * ||A||/(M*N)
    127     // M, N: the dimensions of the matrix; epsilon: allowed relative error.
    128     private void verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix) {
    129         double l2Norm;
    130         int size;
    131         Element e = ref.getType().getElement();
    132         if (e.isCompatible(Element.F32(mRS)) || e.isCompatible(Element.F32_2(mRS))) {
    133             size = out.getBytesSize() / 4;
    134             float[] outArr = new float[size];
    135             float[] refArr = new float[size];
    136             out.copyTo(outArr);
    137             ref.copyTo(refArr);
    138 
    139             double l2NormOut = calcL2Norm(outArr);
    140             double l2NormRef = calcL2Norm(refArr);
    141             l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size;
    142         } else {
    143             size = out.getBytesSize() / 8;
    144             double[] outArr = new double[size];
    145             double[] refArr = new double[size];
    146             out.copyTo(outArr);
    147             ref.copyTo(refArr);
    148 
    149             double l2NormOut = calcL2Norm(outArr);
    150             double l2NormRef = calcL2Norm(refArr);
    151             l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size;
    152         }
    153         mVerify.invoke_verifyMatrix(ref, out, l2Norm, isUpperMatrix);
    154     }
    155 
    156 
    157     private boolean validateSide(int Side) {
    158         if (Side != ScriptIntrinsicBLAS.LEFT && Side != ScriptIntrinsicBLAS.RIGHT) {
    159             return false;
    160         }
    161         return true;
    162     }
    163 
    164     private boolean validateTranspose(int Trans) {
    165         if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
    166             Trans != ScriptIntrinsicBLAS.TRANSPOSE &&
    167             Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
    168             return false;
    169         }
    170         return true;
    171     }
    172 
    173     private boolean validateConjTranspose(int Trans) {
    174         if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
    175             Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
    176             return false;
    177         }
    178         return true;
    179     }
    180 
    181     private boolean validateDiag(int Diag) {
    182         if (Diag != ScriptIntrinsicBLAS.NON_UNIT &&
    183             Diag != ScriptIntrinsicBLAS.UNIT) {
    184             return false;
    185         }
    186         return true;
    187     }
    188 
    189     private boolean validateUplo(int Uplo) {
    190         if (Uplo != ScriptIntrinsicBLAS.UPPER &&
    191             Uplo != ScriptIntrinsicBLAS.LOWER) {
    192             return false;
    193         }
    194         return true;
    195     }
    196 
    197     private boolean validateVecInput(Allocation X) {
    198         if (X.getType().getY() > 2) {
    199             // For testing vector, need a mismatch Y for complete test coverage.
    200             return false;
    201         }
    202         return true;
    203     }
    204 
    205     private boolean validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
    206         if (!validateTranspose(TransA)) {
    207             return false;
    208         }
    209         int M = A.getType().getY();
    210         int N = A.getType().getX();
    211         if (!A.getType().getElement().isCompatible(e) ||
    212             !X.getType().getElement().isCompatible(e) ||
    213             !Y.getType().getElement().isCompatible(e)) {
    214             return false;
    215         }
    216         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
    217             return false;
    218         }
    219 
    220         if (incX <= 0 || incY <= 0) {
    221             return false;
    222         }
    223         int expectedXDim = -1, expectedYDim = -1;
    224         if (TransA == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
    225             expectedXDim = 1 + (N - 1) * incX;
    226             expectedYDim = 1 + (M - 1) * incY;
    227         } else {
    228             expectedXDim = 1 + (M - 1) * incX;
    229             expectedYDim = 1 + (N - 1) * incY;
    230         }
    231         if (X.getType().getX() != expectedXDim ||
    232             Y.getType().getX() != expectedYDim) {
    233             return false;
    234         }
    235         return true;
    236     }
    237 
    238     private void xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix) {
    239         for (Allocation matA : mMatrix) {
    240             for (Allocation vecX : mMatrix) {
    241                 if (!validateVecInput(vecX)) {
    242                     continue;
    243                 }
    244                 for (Allocation vecY : mMatrix) {
    245                     if (!validateVecInput(vecY)) {
    246                         continue;
    247                     }
    248                     Element elemA = matA.getType().getElement();
    249                     if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY)) {
    250                         try {
    251                             if (elemA.isCompatible(Element.F32(mRS))) {
    252                                 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
    253                             } else if (elemA.isCompatible(Element.F64(mRS))) {
    254                                 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
    255                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
    256                                 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
    257                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
    258                                 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    259                             }
    260                         } catch (RSRuntimeException e) {
    261                             fail("should NOT throw RSRuntimeException");
    262                         }
    263                     } else {
    264                         try {
    265                             mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
    266                             fail("should throw RSRuntimeException for SGEMV");
    267                         } catch (RSRuntimeException e) {
    268                         }
    269                         try {
    270                             mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
    271                             fail("should throw RSRuntimeException for DGEMV");
    272                         } catch (RSRuntimeException e) {
    273                         }
    274                         try {
    275                             mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
    276                             fail("should throw RSRuntimeException for CGEMV");
    277                         } catch (RSRuntimeException e) {
    278                         }
    279                         try {
    280                             mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    281                             fail("should throw RSRuntimeException for ZGEMV");
    282                         } catch (RSRuntimeException e) {
    283                         }
    284                     }
    285                 }
    286             }
    287         }
    288     }
    289 
    290     public void L2_xGEMV_API(ArrayList<Allocation> mMatrix) {
    291         for (int trans : mTranspose) {
    292             for (int incX : mInc) {
    293                 xGEMV_API_test(trans, incX, incX, mMatrix);
    294             }
    295         }
    296     }
    297 
    298     public void test_L2_SGEMV_API() {
    299         L2_xGEMV_API(mMatrixS);
    300     }
    301 
    302     public void test_L2_DGEMV_API() {
    303         L2_xGEMV_API(mMatrixD);
    304     }
    305 
    306     public void test_L2_CGEMV_API() {
    307         L2_xGEMV_API(mMatrixC);
    308     }
    309 
    310     public void test_L2_ZGEMV_API() {
    311         L2_xGEMV_API(mMatrixZ);
    312     }
    313 
    314     public void test_L2_SGEMV_Correctness() {
    315         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    316         int incX = 1;
    317         int incY = 1;
    318 
    319         // Populate input allocations
    320         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
    321         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
    322         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
    323         matrixAS.copyFrom(mBLASData.L2_sGEMV_A_mn);
    324         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1);
    325         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1);
    326 
    327         // Test for the default case: NO_TRANS
    328         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
    329         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
    330         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N);
    331         verifyMatrix(vectorYRef, vectorYS);
    332 
    333         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    334         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    335         // Reload vector Y, since it was overwritten by BLAS.
    336         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1);
    337         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    338         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
    339         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
    340         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_T);
    341         verifyMatrix(vectorYRef, vectorXS);
    342 
    343         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    344         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1);
    345         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
    346         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_H);
    347         verifyMatrix(vectorYRef, vectorXS);
    348 
    349         // Test for incX = 2 & incY = 3;
    350         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    351         incX = 2;
    352         incY = 3;
    353         int dimX = 1 + (mBLASData.dN - 1) * incX;
    354         int dimY = 1 + (mBLASData.dM - 1) * incY;
    355         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
    356         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
    357         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n2);
    358         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m2);
    359 
    360         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
    361         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
    362         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N2);
    363         verifyMatrix(vectorYRef, vectorYS);
    364 
    365         mRS.finish();
    366         checkError();
    367     }
    368 
    369     public void test_L2_DGEMV_Correctness() {
    370         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    371         int incX = 1;
    372         int incY = 1;
    373 
    374         // Populate input allocations
    375         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
    376         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
    377         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
    378         matrixAD.copyFrom(mBLASData.L2_dGEMV_A_mn);
    379         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1);
    380         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1);
    381 
    382         // Test for the default case: NO_TRANS
    383         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
    384         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
    385         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N);
    386         verifyMatrix(vectorYRef, vectorYD);
    387 
    388         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    389         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    390         // Reload vector Y, since it was overwritten by BLAS.
    391         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1);
    392         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    393         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
    394         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
    395         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_T);
    396         verifyMatrix(vectorYRef, vectorXD);
    397 
    398         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    399         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1);
    400         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
    401         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_H);
    402         verifyMatrix(vectorYRef, vectorXD);
    403 
    404         // Test for incX = 2 & incY = 3;
    405         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    406         incX = 2;
    407         incY = 3;
    408         int dimX = 1 + (mBLASData.dN - 1) * incX;
    409         int dimY = 1 + (mBLASData.dM - 1) * incY;
    410         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
    411         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
    412         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n2);
    413         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m2);
    414 
    415         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
    416         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
    417         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N2);
    418         verifyMatrix(vectorYRef, vectorYD);
    419 
    420         mRS.finish();
    421         checkError();
    422     }
    423 
    424     public void test_L2_CGEMV_Correctness() {
    425         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    426         int incX = 1;
    427         int incY = 1;
    428 
    429         // Populate input allocations
    430         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
    431         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    432         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
    433         matrixAC.copyFrom(mBLASData.L2_cGEMV_A_mn);
    434         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1);
    435         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1);
    436 
    437         // Test for the default case: NO_TRANS
    438         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    439         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
    440         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N);
    441         verifyMatrix(vectorYRef, vectorYC);
    442 
    443         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    444         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    445         // Reload vector Y, since it was overwritten by BLAS.
    446         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1);
    447         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    448         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
    449         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    450         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_T);
    451         verifyMatrix(vectorYRef, vectorXC);
    452 
    453         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    454         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1);
    455         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
    456         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_H);
    457         verifyMatrix(vectorYRef, vectorXC);
    458 
    459         // Test for incX = 2 & incY = 3;
    460         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    461         incX = 2;
    462         incY = 3;
    463         int dimX = 1 + (mBLASData.dN - 1) * incX;
    464         int dimY = 1 + (mBLASData.dM - 1) * incY;
    465         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
    466         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    467         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n2);
    468         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m2);
    469 
    470         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    471         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    472         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N2);
    473         verifyMatrix(vectorYRef, vectorYC);
    474 
    475         mRS.finish();
    476         checkError();
    477     }
    478 
    479     public void test_L2_ZGEMV_Correctness() {
    480         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    481         int incX = 1;
    482         int incY = 1;
    483 
    484         // Populate input allocations
    485         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
    486         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    487         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
    488         matrixAZ.copyFrom(mBLASData.L2_zGEMV_A_mn);
    489         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1);
    490         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1);
    491 
    492         // Test for the default case: NO_TRANS
    493         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    494         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
    495         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N);
    496         verifyMatrix(vectorYRef, vectorYZ);
    497 
    498         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    499         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    500         // Reload vector Y, since it was overwritten by BLAS.
    501         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1);
    502         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    503         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
    504         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    505         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_T);
    506         verifyMatrix(vectorYRef, vectorXZ);
    507 
    508         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    509         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1);
    510         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
    511         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_H);
    512         verifyMatrix(vectorYRef, vectorXZ);
    513 
    514         // Test for incX = 2 & incY = 3;
    515         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    516         incX = 2;
    517         incY = 3;
    518         int dimX = 1 + (mBLASData.dN - 1) * incX;
    519         int dimY = 1 + (mBLASData.dM - 1) * incY;
    520         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
    521         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    522         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n2);
    523         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m2);
    524 
    525         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    526         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    527         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N2);
    528         verifyMatrix(vectorYRef, vectorYZ);
    529 
    530         mRS.finish();
    531         checkError();
    532     }
    533 
    534 
    535 
    536     private void xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix) {
    537         for (Allocation matA : mMatrix) {
    538             for (Allocation vecX : mMatrix) {
    539                 if (!validateVecInput(vecX)) {
    540                     continue;
    541                 }
    542                 for (Allocation vecY : mMatrix) {
    543                     if (!validateVecInput(vecY)) {
    544                         continue;
    545                     }
    546                     Element elemA = matA.getType().getElement();
    547                     if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY) && KU >= 0 && KL >= 0) {
    548                         try {
    549                             if (elemA.isCompatible(Element.F32(mRS))) {
    550                                 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
    551                             } else if (elemA.isCompatible(Element.F64(mRS))) {
    552                                 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
    553                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
    554                                 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
    555                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
    556                                 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    557                             }
    558                         } catch (RSRuntimeException e) {
    559                             fail("should NOT throw RSRuntimeException");
    560                         }
    561                     } else {
    562                         try {
    563                             mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
    564                             fail("should throw RSRuntimeException for SGBMV");
    565                         } catch (RSRuntimeException e) {
    566                         }
    567                         try {
    568                             mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
    569                             fail("should throw RSRuntimeException for DGBMV");
    570                         } catch (RSRuntimeException e) {
    571                         }
    572                         try {
    573                             mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
    574                             fail("should throw RSRuntimeException for CGBMV");
    575                         } catch (RSRuntimeException e) {
    576                         }
    577                         try {
    578                             mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    579                             fail("should throw RSRuntimeException for ZGBMV");
    580                         } catch (RSRuntimeException e) {
    581                         }
    582                     }
    583                 }
    584             }
    585         }
    586     }
    587 
    588     public void L2_xGBMV_API(ArrayList<Allocation> mMatrix) {
    589         for (int trans : mTranspose) {
    590             for (int incX : mInc) {
    591                 for (int K : mK) {
    592                     xGBMV_API_test(trans, K, K, incX, incX, mMatrix);
    593                 }
    594             }
    595         }
    596     }
    597 
    598     public void test_L2_SGBMV_API() {
    599         L2_xGBMV_API(mMatrixS);
    600     }
    601 
    602     public void test_L2_DGBMV_API() {
    603         L2_xGBMV_API(mMatrixD);
    604     }
    605 
    606     public void test_L2_CGBMV_API() {
    607         L2_xGBMV_API(mMatrixC);
    608     }
    609 
    610     public void test_L2_ZGBMV_API() {
    611         L2_xGBMV_API(mMatrixZ);
    612     }
    613 
    614     public void test_L2_SGBMV_Correctness() {
    615         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    616         int incX = 1;
    617         int incY = 1;
    618 
    619         // Populate input allocations
    620         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
    621         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
    622         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
    623         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_sGBMV_A_mn);
    624         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1);
    625         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1);
    626 
    627         // Test for the default case: NO_TRANS
    628         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
    629         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
    630         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N);
    631         verifyMatrix(vectorYRef, vectorYS);
    632 
    633         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    634         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    635         // Reload vector Y, since it was overwritten by BLAS.
    636         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1);
    637         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    638         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
    639         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
    640         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_T);
    641         verifyMatrix(vectorYRef, vectorXS);
    642 
    643         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    644         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1);
    645         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
    646         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_H);
    647         verifyMatrix(vectorYRef, vectorXS);
    648 
    649         // Test for incX = 2 & incY = 3;
    650         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    651         incX = 2;
    652         incY = 3;
    653         int dimX = 1 + (mBLASData.dN - 1) * incX;
    654         int dimY = 1 + (mBLASData.dM - 1) * incY;
    655         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
    656         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
    657         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n2);
    658         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m2);
    659 
    660         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
    661         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
    662         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N2);
    663         verifyMatrix(vectorYRef, vectorYS);
    664 
    665         mRS.finish();
    666         checkError();
    667     }
    668 
    669     public void test_L2_DGBMV_Correctness() {
    670         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    671         int incX = 1;
    672         int incY = 1;
    673 
    674         // Populate input allocations
    675         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
    676         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
    677         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
    678         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_dGBMV_A_mn);
    679         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1);
    680         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1);
    681 
    682         // Test for the default case: NO_TRANS
    683         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
    684         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
    685         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N);
    686         verifyMatrix(vectorYRef, vectorYD);
    687 
    688         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    689         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    690         // Reload vector Y, since it was overwritten by BLAS.
    691         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1);
    692         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    693         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
    694         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
    695         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_T);
    696         verifyMatrix(vectorYRef, vectorXD);
    697 
    698         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    699         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1);
    700         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
    701         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_H);
    702         verifyMatrix(vectorYRef, vectorXD);
    703 
    704         // Test for incX = 2 & incY = 3;
    705         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    706         incX = 2;
    707         incY = 3;
    708         int dimX = 1 + (mBLASData.dN - 1) * incX;
    709         int dimY = 1 + (mBLASData.dM - 1) * incY;
    710         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
    711         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
    712         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n2);
    713         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m2);
    714 
    715         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
    716         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
    717         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N2);
    718         verifyMatrix(vectorYRef, vectorYD);
    719 
    720         mRS.finish();
    721         checkError();
    722     }
    723 
    724     public void test_L2_CGBMV_Correctness() {
    725         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    726         int incX = 1;
    727         int incY = 1;
    728 
    729         // Populate input allocations
    730         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
    731         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    732         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
    733         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_cGBMV_A_mn);
    734         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1);
    735         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1);
    736 
    737         // Test for the default case: NO_TRANS
    738         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    739         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
    740         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N);
    741         verifyMatrix(vectorYRef, vectorYC);
    742 
    743         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    744         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    745         // Reload vector Y, since it was overwritten by BLAS.
    746         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1);
    747         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    748         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
    749         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    750         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_T);
    751         verifyMatrix(vectorYRef, vectorXC);
    752 
    753         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    754         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1);
    755         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
    756         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_H);
    757         verifyMatrix(vectorYRef, vectorXC);
    758 
    759         // Test for incX = 2 & incY = 3;
    760         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    761         incX = 2;
    762         incY = 3;
    763         int dimX = 1 + (mBLASData.dN - 1) * incX;
    764         int dimY = 1 + (mBLASData.dM - 1) * incY;
    765         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
    766         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    767         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n2);
    768         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m2);
    769 
    770         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    771         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    772         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N2);
    773         verifyMatrix(vectorYRef, vectorYC);
    774 
    775         mRS.finish();
    776         checkError();
    777     }
    778 
    779     public void test_L2_ZGBMV_Correctness() {
    780         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    781         int incX = 1;
    782         int incY = 1;
    783 
    784         // Populate input allocations
    785         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
    786         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    787         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
    788         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_zGBMV_A_mn);
    789         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1);
    790         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1);
    791 
    792         // Test for the default case: NO_TRANS
    793         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    794         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
    795         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N);
    796         verifyMatrix(vectorYRef, vectorYZ);
    797 
    798         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
    799         trans = ScriptIntrinsicBLAS.TRANSPOSE;
    800         // Reload vector Y, since it was overwritten by BLAS.
    801         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1);
    802         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
    803         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
    804         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    805         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_T);
    806         verifyMatrix(vectorYRef, vectorXZ);
    807 
    808         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
    809         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1);
    810         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
    811         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_H);
    812         verifyMatrix(vectorYRef, vectorXZ);
    813 
    814         // Test for incX = 2 & incY = 3;
    815         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
    816         incX = 2;
    817         incY = 3;
    818         int dimX = 1 + (mBLASData.dN - 1) * incX;
    819         int dimY = 1 + (mBLASData.dM - 1) * incY;
    820         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
    821         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    822         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n2);
    823         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m2);
    824 
    825         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    826         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    827         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N2);
    828         verifyMatrix(vectorYRef, vectorYZ);
    829 
    830         mRS.finish();
    831         checkError();
    832     }
    833 
    834 
    835     private void xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
    836         for (Allocation matA : mMatrix) {
    837             for (Allocation vecX : mMatrix) {
    838                 if (!validateVecInput(vecX)) {
    839                     continue;
    840                 }
    841                 for (Allocation vecY : mMatrix) {
    842                     if (!validateVecInput(vecY)) {
    843                         continue;
    844                     }
    845                     Element elemA = matA.getType().getElement();
    846                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
    847                         try {
    848                             if (elemA.isCompatible(Element.F32_2(mRS))) {
    849                                 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
    850                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
    851                                 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    852                             }
    853                         } catch (RSRuntimeException e) {
    854                             fail("should NOT throw RSRuntimeException");
    855                         }
    856                     } else {
    857                         try {
    858                             mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
    859                             fail("should throw RSRuntimeException for CHEMV");
    860                         } catch (RSRuntimeException e) {
    861                         }
    862                         try {
    863                             mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    864                             fail("should throw RSRuntimeException for ZHEMV");
    865                         } catch (RSRuntimeException e) {
    866                         }
    867                     }
    868                 }
    869             }
    870         }
    871     }
    872 
    873     public void L2_xHEMV_API(ArrayList<Allocation> mMatrix) {
    874         for (int Uplo : mUplo) {
    875             for (int incX : mInc) {
    876                 xHEMV_API_test(Uplo, incX, incX, mMatrix);
    877             }
    878         }
    879     }
    880 
    881     public void test_L2_CHEMV_API() {
    882         L2_xHEMV_API(mMatrixC);
    883     }
    884 
    885     public void test_L2_ZHEMV_API() {
    886         L2_xHEMV_API(mMatrixZ);
    887     }
    888 
    889     public void test_L2_CHEMV_Correctness() {
    890         int uplo = ScriptIntrinsicBLAS.UPPER;
    891         int incX = 1;
    892         int incY = 1;
    893 
    894         // Populate input allocations
    895         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
    896         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    897         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    898         matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn);
    899         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1);
    900         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1);
    901 
    902         // Test for the default case:
    903         mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    904         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
    905         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N);
    906         verifyMatrix(vectorYRef, vectorYC);
    907 
    908         // Test for incX = 2 & incY = 3;
    909         incX = 2;
    910         incY = 3;
    911         int dimX = 1 + (mBLASData.dN - 1) * incX;
    912         int dimY = 1 + (mBLASData.dN - 1) * incY;
    913         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
    914         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    915         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2);
    916         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2);
    917 
    918         mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
    919         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
    920         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2);
    921         verifyMatrix(vectorYRef, vectorYC);
    922 
    923         mRS.finish();
    924         checkError();
    925     }
    926 
    927     public void test_L2_ZHEMV_Correctness() {
    928         int uplo = ScriptIntrinsicBLAS.UPPER;
    929         int incX = 1;
    930         int incY = 1;
    931 
    932         // Populate input allocations
    933         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
    934         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    935         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    936         matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn);
    937         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1);
    938         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1);
    939 
    940         // Test for the default case: NO_TRANS
    941         mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    942         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
    943         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N);
    944         verifyMatrix(vectorYRef, vectorYZ);
    945 
    946         // Test for incX = 2 & incY = 3;
    947         incX = 2;
    948         incY = 3;
    949         int dimX = 1 + (mBLASData.dN - 1) * incX;
    950         int dimY = 1 + (mBLASData.dN - 1) * incY;
    951         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
    952         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    953         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2);
    954         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2);
    955 
    956         mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
    957         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
    958         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2);
    959         verifyMatrix(vectorYRef, vectorYZ);
    960 
    961         mRS.finish();
    962         checkError();
    963     }
    964 
    965 
    966 
    967     private void xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
    968         for (Allocation matA : mMatrix) {
    969             for (Allocation vecX : mMatrix) {
    970                 if (!validateVecInput(vecX)) {
    971                     continue;
    972                 }
    973                 for (Allocation vecY : mMatrix) {
    974                     if (!validateVecInput(vecY)) {
    975                         continue;
    976                     }
    977                     Element elemA = matA.getType().getElement();
    978                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA) && K >= 0) {
    979                         try {
    980                             if (elemA.isCompatible(Element.F32_2(mRS))) {
    981                                 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
    982                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
    983                                 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    984                             }
    985                         } catch (RSRuntimeException e) {
    986                             fail("should NOT throw RSRuntimeException");
    987                         }
    988                     } else {
    989                         try {
    990                             mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
    991                             fail("should throw RSRuntimeException for CHBMV");
    992                         } catch (RSRuntimeException e) {
    993                         }
    994                         try {
    995                             mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
    996                             fail("should throw RSRuntimeException for ZHBMV");
    997                         } catch (RSRuntimeException e) {
    998                         }
    999                     }
   1000                 }
   1001             }
   1002         }
   1003     }
   1004 
   1005     public void L2_xHBMV_API(ArrayList<Allocation> mMatrix) {
   1006         for (int Uplo : mUplo) {
   1007             for (int K : mK) {
   1008                 for (int incX : mInc) {
   1009                         xHBMV_API_test(Uplo, K, incX, incX, mMatrix);
   1010                 }
   1011             }
   1012         }
   1013     }
   1014 
   1015     public void test_L2_CHBMV_API() {
   1016         L2_xHBMV_API(mMatrixC);
   1017     }
   1018 
   1019     public void test_L2_ZHBMV_API() {
   1020         L2_xHBMV_API(mMatrixZ);
   1021     }
   1022 
   1023     public void test_L2_CHBMV_Correctness() {
   1024         int uplo = ScriptIntrinsicBLAS.UPPER;
   1025         int incX = 1;
   1026         int incY = 1;
   1027 
   1028         // Populate input allocations
   1029         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   1030         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   1031         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   1032         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cHBMV_A_nn);
   1033         vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n1);
   1034         vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n1);
   1035 
   1036         // Test for the default case:
   1037         mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
   1038         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   1039         vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N);
   1040         verifyMatrix(vectorYRef, vectorYC);
   1041 
   1042         // Test for incX = 2 & incY = 3;
   1043         incX = 2;
   1044         incY = 3;
   1045         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1046         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1047         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   1048         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   1049         vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n2);
   1050         vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n2);
   1051 
   1052         mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
   1053         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   1054         vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N2);
   1055         verifyMatrix(vectorYRef, vectorYC);
   1056 
   1057         mRS.finish();
   1058         checkError();
   1059     }
   1060 
   1061     public void test_L2_ZHBMV_Correctness() {
   1062         int uplo = ScriptIntrinsicBLAS.UPPER;
   1063         int incX = 1;
   1064         int incY = 1;
   1065 
   1066         // Populate input allocations
   1067         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   1068         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   1069         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   1070         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zHBMV_A_nn);
   1071         vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n1);
   1072         vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n1);
   1073 
   1074         // Test for the default case: NO_TRANS
   1075         mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
   1076         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   1077         vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N);
   1078         verifyMatrix(vectorYRef, vectorYZ);
   1079 
   1080         // Test for incX = 2 & incY = 3;
   1081         incX = 2;
   1082         incY = 3;
   1083         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1084         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1085         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   1086         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   1087         vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n2);
   1088         vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n2);
   1089 
   1090         mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
   1091         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   1092         vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N2);
   1093         verifyMatrix(vectorYRef, vectorYZ);
   1094 
   1095         mRS.finish();
   1096         checkError();
   1097     }
   1098 
   1099 
   1100     private void xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   1101         for (Allocation matA : mMatrix) {
   1102             for (Allocation vecX : mMatrix) {
   1103                 if (!validateVecInput(vecX)) {
   1104                     continue;
   1105                 }
   1106                 for (Allocation vecY : mMatrix) {
   1107                     if (!validateVecInput(vecY)) {
   1108                         continue;
   1109                     }
   1110                     Element elemA = matA.getType().getElement();
   1111                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
   1112                         try {
   1113                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   1114                                 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
   1115                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   1116                                 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
   1117                             }
   1118                         } catch (RSRuntimeException e) {
   1119                             fail("should NOT throw RSRuntimeException");
   1120                         }
   1121                     } else {
   1122                         try {
   1123                             mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
   1124                             fail("should throw RSRuntimeException for CHPMV");
   1125                         } catch (RSRuntimeException e) {
   1126                         }
   1127                         try {
   1128                             mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
   1129                             fail("should throw RSRuntimeException for ZHPMV");
   1130                         } catch (RSRuntimeException e) {
   1131                         }
   1132                     }
   1133                 }
   1134             }
   1135         }
   1136     }
   1137 
   1138     public void L2_xHPMV_API(ArrayList<Allocation> mMatrix) {
   1139         for (int Uplo : mUplo) {
   1140             for (int incX : mInc) {
   1141                 xHPMV_API_test(Uplo, incX, incX, mMatrix);
   1142             }
   1143         }
   1144     }
   1145 
   1146     public void test_L2_CHPMV_API() {
   1147         L2_xHPMV_API(mMatrixC);
   1148     }
   1149 
   1150     public void test_L2_ZHPMV_API() {
   1151         L2_xHPMV_API(mMatrixZ);
   1152     }
   1153 
   1154     public void test_L2_CHPMV_Correctness() {
   1155         int uplo = ScriptIntrinsicBLAS.UPPER;
   1156         int incX = 1;
   1157         int incY = 1;
   1158 
   1159         // Populate input allocations
   1160         int N = mBLASData.dN;
   1161         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   1162         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   1163         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   1164         matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn_pu);
   1165         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1);
   1166         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1);
   1167 
   1168         // Test for the default case:
   1169         mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
   1170         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   1171         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N);
   1172         verifyMatrix(vectorYRef, vectorYC);
   1173 
   1174         // Test for incX = 2 & incY = 3;
   1175         incX = 2;
   1176         incY = 3;
   1177         int dimX = 1 + (N - 1) * incX;
   1178         int dimY = 1 + (N - 1) * incY;
   1179         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   1180         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   1181         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2);
   1182         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2);
   1183 
   1184         mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
   1185         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   1186         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2);
   1187         verifyMatrix(vectorYRef, vectorYC);
   1188 
   1189         mRS.finish();
   1190         checkError();
   1191     }
   1192 
   1193     public void test_L2_ZHPMV_Correctness() {
   1194         int uplo = ScriptIntrinsicBLAS.UPPER;
   1195         int incX = 1;
   1196         int incY = 1;
   1197 
   1198         // Populate input allocations
   1199         int N = mBLASData.dN;
   1200         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   1201         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   1202         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   1203         matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn_pu);
   1204         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1);
   1205         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1);
   1206 
   1207         // Test for the default case: NO_TRANS
   1208         mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
   1209         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   1210         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N);
   1211         verifyMatrix(vectorYRef, vectorYZ);
   1212 
   1213         // Test for incX = 2 & incY = 3;
   1214         incX = 2;
   1215         incY = 3;
   1216         int dimX = 1 + (N - 1) * incX;
   1217         int dimY = 1 + (N - 1) * incY;
   1218         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   1219         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   1220         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2);
   1221         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2);
   1222 
   1223         mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
   1224         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   1225         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2);
   1226         verifyMatrix(vectorYRef, vectorYZ);
   1227 
   1228         mRS.finish();
   1229         checkError();
   1230     }
   1231 
   1232 
   1233     private boolean validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
   1234         if (!validateUplo(Uplo)) {
   1235             return false;
   1236         }
   1237         int N = A.getType().getY();
   1238         if (A.getType().getX() != N) {
   1239             return false;
   1240         }
   1241         if (!A.getType().getElement().isCompatible(e) ||
   1242             !X.getType().getElement().isCompatible(e) ||
   1243             !Y.getType().getElement().isCompatible(e) ) {
   1244             return false;
   1245         }
   1246         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1247             return false;
   1248         }
   1249 
   1250         if (incX <= 0 || incY <= 0) {
   1251             return false;
   1252         }
   1253         int expectedXDim = 1 + (N - 1) * incX;
   1254         if (X.getType().getX() != expectedXDim) {
   1255             return false;
   1256         }
   1257         int expectedYDim = 1 + (N - 1) * incY;
   1258         if (Y.getType().getX() != expectedYDim) {
   1259             return false;
   1260         }
   1261         return true;
   1262     }
   1263 
   1264     private void xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   1265         for (Allocation matA : mMatrix) {
   1266             for (Allocation vecX : mMatrix) {
   1267                 if (!validateVecInput(vecX)) {
   1268                     continue;
   1269                 }
   1270                 for (Allocation vecY : mMatrix) {
   1271                     if (!validateVecInput(vecY)) {
   1272                         continue;
   1273                     }
   1274                     Element elemA = matA.getType().getElement();
   1275                     if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
   1276                         try {
   1277                             if (elemA.isCompatible(Element.F32(mRS))) {
   1278                                 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1279                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   1280                                 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1281                             }
   1282                         } catch (RSRuntimeException e) {
   1283                             fail("should NOT throw RSRuntimeException");
   1284                         }
   1285                     } else {
   1286                         try {
   1287                             mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1288                             fail("should throw RSRuntimeException for SSYMV");
   1289                         } catch (RSRuntimeException e) {
   1290                         }
   1291                         try {
   1292                             mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1293                             fail("should throw RSRuntimeException for DSYMV");
   1294                         } catch (RSRuntimeException e) {
   1295                         }
   1296                     }
   1297                 }
   1298             }
   1299         }
   1300     }
   1301 
   1302     public void L2_xSYMV_API(ArrayList<Allocation> mMatrix) {
   1303         for (int Uplo : mUplo) {
   1304             for (int incX : mInc) {
   1305                 xSYMV_API_test(Uplo, incX, incX, mMatrix);
   1306             }
   1307         }
   1308     }
   1309 
   1310     public void test_L2_SSYMV_API() {
   1311         L2_xSYMV_API(mMatrixS);
   1312     }
   1313 
   1314     public void test_L2_DSYMV_API() {
   1315         L2_xSYMV_API(mMatrixD);
   1316     }
   1317 
   1318     public void test_L2_SSYMV_Correctness() {
   1319         int uplo = ScriptIntrinsicBLAS.UPPER;
   1320         int incX = 1;
   1321         int incY = 1;
   1322 
   1323         // Populate input allocations
   1324         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   1325         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1326         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1327         matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn);
   1328         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1);
   1329         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1);
   1330 
   1331         // Test for the default case:
   1332         mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1333         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1334         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N);
   1335         verifyMatrix(vectorYRef, vectorYS);
   1336 
   1337         // Test for incX = 2 & incY = 3;
   1338         incX = 2;
   1339         incY = 3;
   1340         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1341         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1342         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   1343         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1344         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2);
   1345         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2);
   1346 
   1347         mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1348         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1349         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2);
   1350         verifyMatrix(vectorYRef, vectorYS);
   1351 
   1352         mRS.finish();
   1353         checkError();
   1354     }
   1355 
   1356     public void test_L2_DSYMV_Correctness() {
   1357         int uplo = ScriptIntrinsicBLAS.UPPER;
   1358         int incX = 1;
   1359         int incY = 1;
   1360 
   1361         // Populate input allocations
   1362         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   1363         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1364         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1365         matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn);
   1366         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1);
   1367         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1);
   1368 
   1369         // Test for the default case:
   1370         mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1371         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1372         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N);
   1373         verifyMatrix(vectorYRef, vectorYD);
   1374 
   1375         // Test for incX = 2 & incY = 3;
   1376         incX = 2;
   1377         incY = 3;
   1378         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1379         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1380         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   1381         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1382         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2);
   1383         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2);
   1384 
   1385         mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1386         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1387         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2);
   1388         verifyMatrix(vectorYRef, vectorYD);
   1389 
   1390         mRS.finish();
   1391         checkError();
   1392     }
   1393 
   1394 
   1395 
   1396     private void xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
   1397         for (Allocation matA : mMatrix) {
   1398             for (Allocation vecX : mMatrix) {
   1399                 if (!validateVecInput(vecX)) {
   1400                     continue;
   1401                 }
   1402                 for (Allocation vecY : mMatrix) {
   1403                     if (!validateVecInput(vecY)) {
   1404                         continue;
   1405                     }
   1406                     Element elemA = matA.getType().getElement();
   1407                     if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY) && K >= 0) {
   1408                         try {
   1409                             if (elemA.isCompatible(Element.F32(mRS))) {
   1410                                 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1411                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   1412                                 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1413                             }
   1414                         } catch (RSRuntimeException e) {
   1415                             fail("should NOT throw RSRuntimeException");
   1416                         }
   1417                     } else {
   1418                         try {
   1419                             mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1420                             fail("should throw RSRuntimeException for SSBMV");
   1421                         } catch (RSRuntimeException e) {
   1422                         }
   1423                         try {
   1424                             mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1425                             fail("should throw RSRuntimeException for DSBMV");
   1426                         } catch (RSRuntimeException e) {
   1427                         }
   1428                     }
   1429                 }
   1430             }
   1431         }
   1432     }
   1433 
   1434     public void L2_xSBMV_API(ArrayList<Allocation> mMatrix) {
   1435         for (int Uplo : mUplo) {
   1436             for (int K : mK) {
   1437                 for (int incX : mInc) {
   1438                     xSBMV_API_test(Uplo, K, incX, incX, mMatrix);
   1439                 }
   1440             }
   1441         }
   1442     }
   1443 
   1444     public void test_L2_SSBMV_API() {
   1445         L2_xSBMV_API(mMatrixS);
   1446     }
   1447 
   1448     public void test_L2_DSBMV_API() {
   1449         L2_xSBMV_API(mMatrixD);
   1450     }
   1451 
   1452     public void test_L2_SSBMV_Correctness() {
   1453         int uplo = ScriptIntrinsicBLAS.UPPER;
   1454         int incX = 1;
   1455         int incY = 1;
   1456 
   1457         // Populate input allocations
   1458         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   1459         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1460         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1461         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sSBMV_A_nn);
   1462         vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n1);
   1463         vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n1);
   1464 
   1465         // Test for the default case:
   1466         mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1467         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1468         vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N);
   1469         verifyMatrix(vectorYRef, vectorYS);
   1470 
   1471         // Test for incX = 2 & incY = 3;
   1472         incX = 2;
   1473         incY = 3;
   1474         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1475         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1476         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   1477         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1478         vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n2);
   1479         vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n2);
   1480 
   1481         mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1482         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1483         vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N2);
   1484         verifyMatrix(vectorYRef, vectorYS);
   1485 
   1486         mRS.finish();
   1487         checkError();
   1488     }
   1489 
   1490     public void test_L2_DSBMV_Correctness() {
   1491         int uplo = ScriptIntrinsicBLAS.UPPER;
   1492         int incX = 1;
   1493         int incY = 1;
   1494 
   1495         // Populate input allocations
   1496         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   1497         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1498         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1499         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dSBMV_A_nn);
   1500         vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n1);
   1501         vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n1);
   1502 
   1503         // Test for the default case:
   1504         mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1505         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1506         vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N);
   1507         verifyMatrix(vectorYRef, vectorYD);
   1508 
   1509         // Test for incX = 2 & incY = 3;
   1510         incX = 2;
   1511         incY = 3;
   1512         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1513         int dimY = 1 + (mBLASData.dN - 1) * incY;
   1514         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   1515         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1516         vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n2);
   1517         vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n2);
   1518 
   1519         mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1520         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1521         vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N2);
   1522         verifyMatrix(vectorYRef, vectorYD);
   1523 
   1524         mRS.finish();
   1525         checkError();
   1526     }
   1527 
   1528 
   1529     private boolean validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
   1530         if (!validateUplo(Uplo)) {
   1531             return false;
   1532         }
   1533         if (!Ap.getType().getElement().isCompatible(e) ||
   1534             !X.getType().getElement().isCompatible(e) ||
   1535             !Y.getType().getElement().isCompatible(e)) {
   1536             return false;
   1537         }
   1538         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1539             return false;
   1540         }
   1541 
   1542         if (Ap.getType().getY() > 1) {
   1543             return false;
   1544         }
   1545 
   1546         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   1547         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   1548             return false;
   1549         }
   1550         if (incX <= 0 || incY <= 0) {
   1551             return false;
   1552         }
   1553         int expectedXDim = 1 + (N - 1) * incX;
   1554         if (X.getType().getX() != expectedXDim) {
   1555             return false;
   1556         }
   1557         int expectedYDim = 1 + (N - 1) * incY;
   1558         if (Y.getType().getX() != expectedYDim) {
   1559             return false;
   1560         }
   1561 
   1562         return true;
   1563     }
   1564 
   1565     private void xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   1566         for (Allocation matA : mMatrix) {
   1567             for (Allocation vecX : mMatrix) {
   1568                 if (!validateVecInput(vecX)) {
   1569                     continue;
   1570                 }
   1571                 for (Allocation vecY : mMatrix) {
   1572                     if (!validateVecInput(vecY)) {
   1573                         continue;
   1574                     }
   1575                     Element elemA = matA.getType().getElement();
   1576                     if (validateSPMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
   1577                         try {
   1578                             if (elemA.isCompatible(Element.F32(mRS))) {
   1579                                 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1580                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   1581                                 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1582                             }
   1583                         } catch (RSRuntimeException e) {
   1584                             fail("should NOT throw RSRuntimeException");
   1585                         }
   1586                     } else {
   1587                         try {
   1588                             mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
   1589                             fail("should throw RSRuntimeException for SSPMV");
   1590                         } catch (RSRuntimeException e) {
   1591                         }
   1592                         try {
   1593                             mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
   1594                             fail("should throw RSRuntimeException for DSPMV");
   1595                         } catch (RSRuntimeException e) {
   1596                         }
   1597                     }
   1598                 }
   1599             }
   1600         }
   1601     }
   1602 
   1603     public void L2_xSPMV_API(ArrayList<Allocation> mMatrix) {
   1604         for (int Uplo : mUplo) {
   1605             for (int incX : mInc) {
   1606                 xSPMV_API_test(Uplo, incX, incX, mMatrix);
   1607             }
   1608         }
   1609     }
   1610 
   1611     public void test_L2_SSPMV_API() {
   1612         L2_xSPMV_API(mMatrixS);
   1613     }
   1614 
   1615     public void test_L2_DSPMV_API() {
   1616         L2_xSPMV_API(mMatrixD);
   1617     }
   1618 
   1619     public void test_L2_SSPMV_Correctness() {
   1620         int uplo = ScriptIntrinsicBLAS.UPPER;
   1621         int incX = 1;
   1622         int incY = 1;
   1623 
   1624         // Populate input allocations
   1625         int N = mBLASData.dN;
   1626         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   1627         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   1628         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   1629         matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn_pu);
   1630         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1);
   1631         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1);
   1632 
   1633         // Test for the default case:
   1634         mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1635         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   1636         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N);
   1637         verifyMatrix(vectorYRef, vectorYS);
   1638 
   1639         // Test for incX = 2 & incY = 3;
   1640         incX = 2;
   1641         incY = 3;
   1642         int dimX = 1 + (N - 1) * incX;
   1643         int dimY = 1 + (N - 1) * incY;
   1644         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   1645         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1646         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2);
   1647         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2);
   1648 
   1649         mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
   1650         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   1651         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2);
   1652         verifyMatrix(vectorYRef, vectorYS);
   1653 
   1654         mRS.finish();
   1655         checkError();
   1656     }
   1657 
   1658     public void test_L2_DSPMV_Correctness() {
   1659         int uplo = ScriptIntrinsicBLAS.UPPER;
   1660         int incX = 1;
   1661         int incY = 1;
   1662 
   1663         // Populate input allocations
   1664         int N = mBLASData.dN;
   1665         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   1666         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   1667         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   1668         matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn_pu);
   1669         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1);
   1670         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1);
   1671 
   1672         // Test for the default case:
   1673         mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1674         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   1675         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N);
   1676         verifyMatrix(vectorYRef, vectorYD);
   1677 
   1678         // Test for incX = 2 & incY = 3;
   1679         incX = 2;
   1680         incY = 3;
   1681         int dimX = 1 + (N - 1) * incX;
   1682         int dimY = 1 + (N - 1) * incY;
   1683         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   1684         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1685         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2);
   1686         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2);
   1687 
   1688         mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
   1689         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   1690         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2);
   1691         verifyMatrix(vectorYRef, vectorYD);
   1692 
   1693         mRS.finish();
   1694         checkError();
   1695     }
   1696 
   1697 
   1698 
   1699     private boolean validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX) {
   1700         if (!validateUplo(Uplo)) {
   1701             return false;
   1702         }
   1703         if (!validateTranspose(TransA)) {
   1704             return false;
   1705         }
   1706         if (!validateDiag(Diag)) {
   1707             return false;
   1708         }
   1709         int N = A.getType().getY();
   1710         if (A.getType().getX() != N) {
   1711             return false;
   1712         }
   1713         if (!A.getType().getElement().isCompatible(e) ||
   1714             !X.getType().getElement().isCompatible(e)) {
   1715             return false;
   1716         }
   1717         if (X.getType().getY() > 1) {
   1718             return false;
   1719         }
   1720 
   1721         if (incX <= 0) {
   1722             return false;
   1723         }
   1724         int expectedXDim = 1 + (N - 1) * incX;
   1725         if (X.getType().getX() != expectedXDim) {
   1726             return false;
   1727         }
   1728         return true;
   1729     }
   1730 
   1731     private void xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
   1732         for (Allocation matA : mMatrix) {
   1733             for (Allocation vecX : mMatrix) {
   1734                 if (!validateVecInput(vecX)) {
   1735                     continue;
   1736                 }
   1737                 Element elemA = matA.getType().getElement();
   1738                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
   1739                     try {
   1740                         if (elemA.isCompatible(Element.F32(mRS))) {
   1741                             mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1742                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   1743                             mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1744                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   1745                             mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1746                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   1747                             mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1748                         }
   1749                     } catch (RSRuntimeException e) {
   1750                         fail("should NOT throw RSRuntimeException");
   1751                     }
   1752                 } else {
   1753                     try {
   1754                         mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1755                         fail("should throw RSRuntimeException for STRMV");
   1756                     } catch (RSRuntimeException e) {
   1757                     }
   1758                     try {
   1759                         mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1760                         fail("should throw RSRuntimeException for DTRMV");
   1761                     } catch (RSRuntimeException e) {
   1762                     }
   1763                     try {
   1764                         mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1765                         fail("should throw RSRuntimeException for CTRMV");
   1766                     } catch (RSRuntimeException e) {
   1767                     }
   1768                     try {
   1769                         mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
   1770                         fail("should throw RSRuntimeException for ZTRMV");
   1771                     } catch (RSRuntimeException e) {
   1772                     }
   1773                 }
   1774             }
   1775         }
   1776     }
   1777 
   1778     public void L2_xTRMV_API(ArrayList<Allocation> mMatrix) {
   1779         for (int Uplo : mUplo) {
   1780             for (int TransA : mTranspose) {
   1781                 for (int Diag : mDiag) {
   1782                     for (int incX : mInc) {
   1783                         xTRMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
   1784                     }
   1785                 }
   1786             }
   1787         }
   1788     }
   1789 
   1790     public void test_L2_STRMV_API() {
   1791         L2_xTRMV_API(mMatrixS);
   1792     }
   1793 
   1794     public void test_L2_DTRMV_API() {
   1795         L2_xTRMV_API(mMatrixD);
   1796     }
   1797 
   1798     public void test_L2_CTRMV_API() {
   1799         L2_xTRMV_API(mMatrixC);
   1800     }
   1801 
   1802     public void test_L2_ZTRMV_API() {
   1803         L2_xTRMV_API(mMatrixZ);
   1804     }
   1805 
   1806     public void test_L2_STRMV_Correctness() {
   1807         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1808         int uplo = ScriptIntrinsicBLAS.UPPER;
   1809         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   1810         int incX = 1;
   1811 
   1812         // Populate input allocations
   1813         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   1814         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1815         matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn);
   1816         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   1817 
   1818         // Test for the default case: NO_TRANS
   1819         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   1820         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   1821         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN);
   1822         verifyMatrix(vectorXRef, vectorXS);
   1823 
   1824         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   1825         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   1826         // Reload vector X, since it was overwritten by BLAS.
   1827         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   1828         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   1829         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT);
   1830         verifyMatrix(vectorXRef, vectorXS);
   1831 
   1832         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   1833         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   1834         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   1835         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH);
   1836         verifyMatrix(vectorXRef, vectorXS);
   1837 
   1838         // Test for incX = 2;
   1839         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1840         incX = 2;
   1841         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1842         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   1843         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2);
   1844 
   1845         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   1846         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   1847         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2);
   1848         verifyMatrix(vectorXRef, vectorXS);
   1849 
   1850         mRS.finish();
   1851         checkError();
   1852     }
   1853 
   1854     public void test_L2_DTRMV_Correctness() {
   1855         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1856         int uplo = ScriptIntrinsicBLAS.UPPER;
   1857         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   1858         int incX = 1;
   1859 
   1860         // Populate input allocations
   1861         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   1862         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1863         matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn);
   1864         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   1865 
   1866         // Test for the default case: NO_TRANS
   1867         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   1868         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   1869         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN);
   1870         verifyMatrix(vectorXRef, vectorXD);
   1871 
   1872         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   1873         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   1874         // Reload vector X, since it was overwritten by BLAS.
   1875         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   1876         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   1877         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT);
   1878         verifyMatrix(vectorXRef, vectorXD);
   1879 
   1880         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   1881         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   1882         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   1883         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH);
   1884         verifyMatrix(vectorXRef, vectorXD);
   1885 
   1886         // Test for incX = 2;
   1887         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1888         incX = 2;
   1889         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1890         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   1891         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2);
   1892 
   1893         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   1894         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   1895         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2);
   1896         verifyMatrix(vectorXRef, vectorXD);
   1897 
   1898         mRS.finish();
   1899         checkError();
   1900     }
   1901 
   1902     public void test_L2_CTRMV_Correctness() {
   1903         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1904         int uplo = ScriptIntrinsicBLAS.UPPER;
   1905         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   1906         int incX = 1;
   1907 
   1908         // Populate input allocations
   1909         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   1910         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   1911         matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn);
   1912         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   1913 
   1914         // Test for the default case: NO_TRANS
   1915         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   1916         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   1917         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN);
   1918         verifyMatrix(vectorXRef, vectorXC);
   1919 
   1920         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   1921         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   1922         // Reload vector X, since it was overwritten by BLAS.
   1923         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   1924         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   1925         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT);
   1926         verifyMatrix(vectorXRef, vectorXC);
   1927 
   1928         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   1929         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   1930         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   1931         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH);
   1932         verifyMatrix(vectorXRef, vectorXC);
   1933 
   1934         // Test for incX = 2;
   1935         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1936         incX = 2;
   1937         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1938         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   1939         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2);
   1940 
   1941         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   1942         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   1943         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2);
   1944         verifyMatrix(vectorXRef, vectorXC);
   1945 
   1946         mRS.finish();
   1947         checkError();
   1948     }
   1949 
   1950     public void test_L2_ZTRMV_Correctness() {
   1951         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1952         int uplo = ScriptIntrinsicBLAS.UPPER;
   1953         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   1954         int incX = 1;
   1955 
   1956         // Populate input allocations
   1957         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   1958         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   1959         matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn);
   1960         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   1961 
   1962         // Test for the default case: NO_TRANS
   1963         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   1964         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   1965         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN);
   1966         verifyMatrix(vectorXRef, vectorXZ);
   1967 
   1968         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   1969         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   1970         // Reload vector X, since it was overwritten by BLAS.
   1971         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   1972         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   1973         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT);
   1974         verifyMatrix(vectorXRef, vectorXZ);
   1975 
   1976         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   1977         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   1978         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   1979         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH);
   1980         verifyMatrix(vectorXRef, vectorXZ);
   1981 
   1982         // Test for incX = 2;
   1983         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   1984         incX = 2;
   1985         int dimX = 1 + (mBLASData.dN - 1) * incX;
   1986         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   1987         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2);
   1988 
   1989         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   1990         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   1991         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2);
   1992         verifyMatrix(vectorXRef, vectorXZ);
   1993 
   1994         mRS.finish();
   1995         checkError();
   1996     }
   1997 
   1998 
   1999 
   2000     private void xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
   2001         for (Allocation matA : mMatrix) {
   2002             for (Allocation vecX : mMatrix) {
   2003                 Element elemA = matA.getType().getElement();
   2004                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
   2005                     try {
   2006                         if (elemA.isCompatible(Element.F32(mRS))) {
   2007                             mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2008                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   2009                             mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2010                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   2011                             mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2012                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   2013                             mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2014                         }
   2015                     } catch (RSRuntimeException e) {
   2016                         fail("should NOT throw RSRuntimeException");
   2017                     }
   2018                 } else {
   2019                     try {
   2020                         mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2021                         fail("should throw RSRuntimeException for STBMV");
   2022                     } catch (RSRuntimeException e) {
   2023                     }
   2024                     try {
   2025                         mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2026                         fail("should throw RSRuntimeException for DTBMV");
   2027                     } catch (RSRuntimeException e) {
   2028                     }
   2029                     try {
   2030                         mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2031                         fail("should throw RSRuntimeException for CTBMV");
   2032                     } catch (RSRuntimeException e) {
   2033                     }
   2034                     try {
   2035                         mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2036                         fail("should throw RSRuntimeException for ZTBMV");
   2037                     } catch (RSRuntimeException e) {
   2038                     }
   2039                 }
   2040             }
   2041         }
   2042     }
   2043 
   2044     public void L2_xTBMV_API(ArrayList<Allocation> mMatrix) {
   2045         for (int Uplo : mUplo) {
   2046             for (int TransA : mTranspose) {
   2047                 for (int Diag : mDiag) {
   2048                     for (int K : mK) {
   2049                         for (int incX : mInc) {
   2050                             xTBMV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
   2051                         }
   2052                     }
   2053                 }
   2054             }
   2055         }
   2056     }
   2057 
   2058     public void test_L2_STBMV_API() {
   2059         L2_xTBMV_API(mMatrixS);
   2060     }
   2061 
   2062     public void test_L2_DTBMV_API() {
   2063         L2_xTBMV_API(mMatrixD);
   2064     }
   2065 
   2066     public void test_L2_CTBMV_API() {
   2067         L2_xTBMV_API(mMatrixC);
   2068     }
   2069 
   2070     public void test_L2_ZTBMV_API() {
   2071         L2_xTBMV_API(mMatrixZ);
   2072     }
   2073 
   2074     public void test_L2_STBMV_Correctness() {
   2075         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2076         int uplo = ScriptIntrinsicBLAS.UPPER;
   2077         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2078         int incX = 1;
   2079 
   2080         // Populate input allocations
   2081         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   2082         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2083         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBMV_A_nn);
   2084         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
   2085 
   2086         // Test for the default case: NO_TRANS
   2087         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2088         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2089         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN);
   2090         verifyMatrix(vectorXRef, vectorXS);
   2091 
   2092         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2093         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2094         // Reload vector X, since it was overwritten by BLAS.
   2095         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
   2096         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2097         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UT);
   2098         verifyMatrix(vectorXRef, vectorXS);
   2099 
   2100         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2101         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
   2102         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2103         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UH);
   2104         verifyMatrix(vectorXRef, vectorXS);
   2105 
   2106         // Test for incX = 2;
   2107         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2108         incX = 2;
   2109         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2110         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2111         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n2);
   2112 
   2113         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2114         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2115         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN2);
   2116         verifyMatrix(vectorXRef, vectorXS);
   2117 
   2118         mRS.finish();
   2119         checkError();
   2120     }
   2121 
   2122     public void test_L2_DTBMV_Correctness() {
   2123         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2124         int uplo = ScriptIntrinsicBLAS.UPPER;
   2125         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2126         int incX = 1;
   2127 
   2128         // Populate input allocations
   2129         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   2130         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2131         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBMV_A_nn);
   2132         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
   2133 
   2134         // Test for the default case: NO_TRANS
   2135         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2136         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2137         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN);
   2138         verifyMatrix(vectorXRef, vectorXD);
   2139 
   2140         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2141         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2142         // Reload vector X, since it was overwritten by BLAS.
   2143         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
   2144         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2145         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UT);
   2146         verifyMatrix(vectorXRef, vectorXD);
   2147 
   2148         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2149         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
   2150         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2151         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UH);
   2152         verifyMatrix(vectorXRef, vectorXD);
   2153 
   2154         // Test for incX = 2;
   2155         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2156         incX = 2;
   2157         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2158         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2159         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n2);
   2160 
   2161         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2162         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2163         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN2);
   2164         verifyMatrix(vectorXRef, vectorXD);
   2165 
   2166         mRS.finish();
   2167         checkError();
   2168     }
   2169 
   2170     public void test_L2_CTBMV_Correctness() {
   2171         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2172         int uplo = ScriptIntrinsicBLAS.UPPER;
   2173         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2174         int incX = 1;
   2175 
   2176         // Populate input allocations
   2177         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   2178         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   2179         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBMV_A_nn);
   2180         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
   2181 
   2182         // Test for the default case: NO_TRANS
   2183         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   2184         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   2185         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN);
   2186         verifyMatrix(vectorXRef, vectorXC);
   2187 
   2188         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2189         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2190         // Reload vector X, since it was overwritten by BLAS.
   2191         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
   2192         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   2193         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UT);
   2194         verifyMatrix(vectorXRef, vectorXC);
   2195 
   2196         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2197         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
   2198         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   2199         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UH);
   2200         verifyMatrix(vectorXRef, vectorXC);
   2201 
   2202         // Test for incX = 2;
   2203         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2204         incX = 2;
   2205         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2206         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2207         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n2);
   2208 
   2209         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   2210         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2211         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN2);
   2212         verifyMatrix(vectorXRef, vectorXC);
   2213 
   2214         mRS.finish();
   2215         checkError();
   2216     }
   2217 
   2218     public void test_L2_ZTBMV_Correctness() {
   2219         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2220         int uplo = ScriptIntrinsicBLAS.UPPER;
   2221         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2222         int incX = 1;
   2223 
   2224         // Populate input allocations
   2225         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   2226         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   2227         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBMV_A_nn);
   2228         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
   2229 
   2230         // Test for the default case: NO_TRANS
   2231         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   2232         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   2233         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN);
   2234         verifyMatrix(vectorXRef, vectorXZ);
   2235 
   2236         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2237         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2238         // Reload vector X, since it was overwritten by BLAS.
   2239         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
   2240         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   2241         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UT);
   2242         verifyMatrix(vectorXRef, vectorXZ);
   2243 
   2244         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2245         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
   2246         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   2247         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UH);
   2248         verifyMatrix(vectorXRef, vectorXZ);
   2249 
   2250         // Test for incX = 2;
   2251         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2252         incX = 2;
   2253         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2254         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2255         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n2);
   2256 
   2257         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   2258         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2259         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN2);
   2260         verifyMatrix(vectorXRef, vectorXZ);
   2261 
   2262         mRS.finish();
   2263         checkError();
   2264     }
   2265 
   2266 
   2267     private boolean validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX) {
   2268         if (!validateUplo(Uplo)) {
   2269             return false;
   2270         }
   2271         if (!validateTranspose(TransA)) {
   2272             return false;
   2273         }
   2274         if (!validateDiag(Diag)) {
   2275             return false;
   2276         }
   2277         if (!Ap.getType().getElement().isCompatible(e) ||
   2278             !X.getType().getElement().isCompatible(e)) {
   2279             return false;
   2280         }
   2281         if (X.getType().getY() > 1) {
   2282             return false;
   2283         }
   2284 
   2285         if (Ap.getType().getY() > 1) {
   2286             return false;
   2287         }
   2288 
   2289         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   2290         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   2291             return false;
   2292         }
   2293         if (incX <= 0) {
   2294             return false;
   2295         }
   2296         int expectedXDim = 1 + (N - 1) * incX;
   2297         if (X.getType().getX() != expectedXDim) {
   2298             return false;
   2299         }
   2300 
   2301         return true;
   2302     }
   2303 
   2304     private void xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
   2305         for (Allocation matA : mMatrix) {
   2306             for (Allocation vecX : mMatrix) {
   2307                 if (!validateVecInput(vecX)) {
   2308                     continue;
   2309                 }
   2310                 Element elemA = matA.getType().getElement();
   2311                 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
   2312                     try {
   2313                         if (elemA.isCompatible(Element.F32(mRS))) {
   2314                             mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2315                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   2316                             mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2317                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   2318                             mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2319                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   2320                             mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2321                         }
   2322                     } catch (RSRuntimeException e) {
   2323                         fail("should NOT throw RSRuntimeException");
   2324                     }
   2325                 } else {
   2326                     try {
   2327                         mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2328                         fail("should throw RSRuntimeException for STPMV");
   2329                     } catch (RSRuntimeException e) {
   2330                     }
   2331                     try {
   2332                         mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2333                         fail("should throw RSRuntimeException for DTPMV");
   2334                     } catch (RSRuntimeException e) {
   2335                     }
   2336                     try {
   2337                         mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2338                         fail("should throw RSRuntimeException for CTPMV");
   2339                     } catch (RSRuntimeException e) {
   2340                     }
   2341                     try {
   2342                         mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
   2343                         fail("should throw RSRuntimeException for ZTPMV");
   2344                     } catch (RSRuntimeException e) {
   2345                     }
   2346                 }
   2347             }
   2348         }
   2349     }
   2350 
   2351     public void L2_xTPMV_API(ArrayList<Allocation> mMatrix) {
   2352         for (int Uplo : mUplo) {
   2353             for (int TransA : mTranspose) {
   2354                 for (int Diag : mDiag) {
   2355                     for (int incX : mInc) {
   2356                         xTPMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
   2357                     }
   2358                 }
   2359             }
   2360         }
   2361     }
   2362 
   2363     public void test_L2_STPMV_API() {
   2364         L2_xTPMV_API(mMatrixS);
   2365     }
   2366 
   2367     public void test_L2_DTPMV_API() {
   2368         L2_xTPMV_API(mMatrixD);
   2369     }
   2370 
   2371     public void test_L2_CTPMV_API() {
   2372         L2_xTPMV_API(mMatrixC);
   2373     }
   2374 
   2375     public void test_L2_ZTPMV_API() {
   2376         L2_xTPMV_API(mMatrixZ);
   2377     }
   2378 
   2379     public void test_L2_STPMV_Correctness() {
   2380         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2381         int uplo = ScriptIntrinsicBLAS.UPPER;
   2382         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2383         int incX = 1;
   2384 
   2385         // Populate input allocations
   2386         int N = mBLASData.dN;
   2387         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   2388         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   2389         matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn_pu);
   2390         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   2391 
   2392         // Test for the default case: NO_TRANS
   2393         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2394         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   2395         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN);
   2396         verifyMatrix(vectorXRef, vectorXS);
   2397 
   2398         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2399         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2400         // Reload vector X, since it was overwritten by BLAS.
   2401         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   2402         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2403         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT);
   2404         verifyMatrix(vectorXRef, vectorXS);
   2405 
   2406         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2407         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
   2408         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2409         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH);
   2410         verifyMatrix(vectorXRef, vectorXS);
   2411 
   2412         // Test for incX = 2;
   2413         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2414         incX = 2;
   2415         int dimX = 1 + (N - 1) * incX;
   2416         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2417         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2);
   2418 
   2419         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2420         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2421         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2);
   2422         verifyMatrix(vectorXRef, vectorXS);
   2423 
   2424         mRS.finish();
   2425         checkError();
   2426     }
   2427 
   2428     public void test_L2_DTPMV_Correctness() {
   2429         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2430         int uplo = ScriptIntrinsicBLAS.UPPER;
   2431         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2432         int incX = 1;
   2433 
   2434         // Populate input allocations
   2435         int N = mBLASData.dN;
   2436         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   2437         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   2438         matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn_pu);
   2439         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   2440 
   2441         // Test for the default case: NO_TRANS
   2442         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2443         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   2444         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN);
   2445         verifyMatrix(vectorXRef, vectorXD);
   2446 
   2447         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2448         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2449         // Reload vector X, since it was overwritten by BLAS.
   2450         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   2451         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2452         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT);
   2453         verifyMatrix(vectorXRef, vectorXD);
   2454 
   2455         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2456         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
   2457         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2458         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH);
   2459         verifyMatrix(vectorXRef, vectorXD);
   2460 
   2461         // Test for incX = 2;
   2462         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2463         incX = 2;
   2464         int dimX = 1 + (N - 1) * incX;
   2465         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2466         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2);
   2467 
   2468         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2469         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2470         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2);
   2471         verifyMatrix(vectorXRef, vectorXD);
   2472 
   2473         mRS.finish();
   2474         checkError();
   2475     }
   2476 
   2477     public void test_L2_CTPMV_Correctness() {
   2478         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2479         int uplo = ScriptIntrinsicBLAS.UPPER;
   2480         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2481         int incX = 1;
   2482 
   2483         // Populate input allocations
   2484         int N = mBLASData.dN;
   2485         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   2486         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   2487         matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn_pu);
   2488         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   2489 
   2490         // Test for the default case: NO_TRANS
   2491         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2492         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   2493         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN);
   2494         verifyMatrix(vectorXRef, vectorXC);
   2495 
   2496         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2497         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2498         // Reload vector X, since it was overwritten by BLAS.
   2499         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   2500         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2501         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT);
   2502         verifyMatrix(vectorXRef, vectorXC);
   2503 
   2504         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2505         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
   2506         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2507         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH);
   2508         verifyMatrix(vectorXRef, vectorXC);
   2509 
   2510         // Test for incX = 2;
   2511         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2512         incX = 2;
   2513         int dimX = 1 + (N - 1) * incX;
   2514         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2515         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2);
   2516 
   2517         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2518         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2519         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2);
   2520         verifyMatrix(vectorXRef, vectorXC);
   2521 
   2522         mRS.finish();
   2523         checkError();
   2524     }
   2525 
   2526     public void test_L2_ZTPMV_Correctness() {
   2527         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2528         int uplo = ScriptIntrinsicBLAS.UPPER;
   2529         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2530         int incX = 1;
   2531 
   2532         // Populate input allocations
   2533         int N = mBLASData.dN;
   2534         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   2535         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   2536         matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn_pu);
   2537         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   2538 
   2539         // Test for the default case: NO_TRANS
   2540         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2541         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   2542         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN);
   2543         verifyMatrix(vectorXRef, vectorXZ);
   2544 
   2545         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2546         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2547         // Reload vector X, since it was overwritten by BLAS.
   2548         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   2549         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2550         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT);
   2551         verifyMatrix(vectorXRef, vectorXZ);
   2552 
   2553         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2554         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
   2555         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2556         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH);
   2557         verifyMatrix(vectorXRef, vectorXZ);
   2558 
   2559         // Test for incX = 2;
   2560         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2561         incX = 2;
   2562         int dimX = 1 + (N - 1) * incX;
   2563         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2564         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2);
   2565 
   2566         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2567         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2568         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2);
   2569         verifyMatrix(vectorXRef, vectorXZ);
   2570 
   2571         mRS.finish();
   2572         checkError();
   2573     }
   2574 
   2575 
   2576     private void xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
   2577         for (Allocation matA : mMatrix) {
   2578             for (Allocation vecX : mMatrix) {
   2579                 if (!validateVecInput(vecX)) {
   2580                     continue;
   2581                 }
   2582                 Element elemA = matA.getType().getElement();
   2583                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
   2584                     try {
   2585                         if (elemA.isCompatible(Element.F32(mRS))) {
   2586                             mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2587                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   2588                             mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2589                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   2590                             mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2591                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   2592                             mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2593                         }
   2594                     } catch (RSRuntimeException e) {
   2595                         fail("should NOT throw RSRuntimeException");
   2596                     }
   2597                 } else {
   2598                     try {
   2599                         mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2600                         fail("should throw RSRuntimeException for STRSV");
   2601                     } catch (RSRuntimeException e) {
   2602                     }
   2603                     try {
   2604                         mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2605                         fail("should throw RSRuntimeException for DTRSV");
   2606                     } catch (RSRuntimeException e) {
   2607                     }
   2608                     try {
   2609                         mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2610                         fail("should throw RSRuntimeException for CTRSV");
   2611                     } catch (RSRuntimeException e) {
   2612                     }
   2613                     try {
   2614                         mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
   2615                         fail("should throw RSRuntimeException for ZTRSV");
   2616                     } catch (RSRuntimeException e) {
   2617                     }
   2618                 }
   2619             }
   2620         }
   2621     }
   2622 
   2623     public void L2_xTRSV_API(ArrayList<Allocation> mMatrix) {
   2624         for (int Uplo : mUplo) {
   2625             for (int TransA : mTranspose) {
   2626                 for (int Diag : mDiag) {
   2627                     for (int incX : mInc) {
   2628                         xTRSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
   2629                     }
   2630                 }
   2631             }
   2632         }
   2633     }
   2634 
   2635     public void test_L2_STRSV_API() {
   2636         L2_xTRSV_API(mMatrixS);
   2637     }
   2638 
   2639     public void test_L2_DTRSV_API() {
   2640         L2_xTRSV_API(mMatrixD);
   2641     }
   2642 
   2643     public void test_L2_CTRSV_API() {
   2644         L2_xTRSV_API(mMatrixC);
   2645     }
   2646 
   2647     public void test_L2_ZTRSV_API() {
   2648         L2_xTRSV_API(mMatrixZ);
   2649     }
   2650 
   2651     public void test_L2_STRSV_Correctness() {
   2652         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2653         int uplo = ScriptIntrinsicBLAS.UPPER;
   2654         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2655         int incX = 1;
   2656 
   2657         // Populate input allocations
   2658         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   2659         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2660         matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn);
   2661         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   2662 
   2663         // Test for the default case: NO_TRANS
   2664         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2665         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2666         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN);
   2667         verifyMatrix(vectorXRef, vectorXS);
   2668 
   2669         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2670         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2671         // Reload vector X, since it was overwritten by BLAS.
   2672         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   2673         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2674         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT);
   2675         verifyMatrix(vectorXRef, vectorXS);
   2676 
   2677         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2678         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   2679         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2680         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH);
   2681         verifyMatrix(vectorXRef, vectorXS);
   2682 
   2683         // Test for incX = 2;
   2684         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2685         incX = 2;
   2686         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2687         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2688         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2);
   2689 
   2690         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   2691         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2692         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2);
   2693         verifyMatrix(vectorXRef, vectorXS);
   2694 
   2695         mRS.finish();
   2696         checkError();
   2697     }
   2698 
   2699     public void test_L2_DTRSV_Correctness() {
   2700         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2701         int uplo = ScriptIntrinsicBLAS.UPPER;
   2702         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2703         int incX = 1;
   2704 
   2705         // Populate input allocations
   2706         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   2707         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2708         matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn);
   2709         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   2710 
   2711         // Test for the default case: NO_TRANS
   2712         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2713         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2714         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN);
   2715         verifyMatrix(vectorXRef, vectorXD);
   2716 
   2717         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2718         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2719         // Reload vector X, since it was overwritten by BLAS.
   2720         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   2721         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2722         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT);
   2723         verifyMatrix(vectorXRef, vectorXD);
   2724 
   2725         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2726         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   2727         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2728         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH);
   2729         verifyMatrix(vectorXRef, vectorXD);
   2730 
   2731         // Test for incX = 2;
   2732         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2733         incX = 2;
   2734         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2735         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2736         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2);
   2737 
   2738         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   2739         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   2740         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2);
   2741         verifyMatrix(vectorXRef, vectorXD);
   2742 
   2743         mRS.finish();
   2744         checkError();
   2745     }
   2746 
   2747     public void test_L2_CTRSV_Correctness() {
   2748         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2749         int uplo = ScriptIntrinsicBLAS.UPPER;
   2750         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2751         int incX = 1;
   2752 
   2753         // Populate input allocations
   2754         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   2755         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   2756         matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn);
   2757         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   2758 
   2759         // Test for the default case: NO_TRANS
   2760         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2761         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   2762         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN);
   2763         verifyMatrix(vectorXRef, vectorXC);
   2764 
   2765         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2766         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2767         // Reload vector X, since it was overwritten by BLAS.
   2768         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   2769         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2770         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT);
   2771         verifyMatrix(vectorXRef, vectorXC);
   2772 
   2773         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2774         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   2775         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2776         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH);
   2777         verifyMatrix(vectorXRef, vectorXC);
   2778 
   2779         // Test for incX = 2;
   2780         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2781         incX = 2;
   2782         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2783         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2784         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2);
   2785 
   2786         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   2787         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   2788         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2);
   2789         verifyMatrix(vectorXRef, vectorXC);
   2790 
   2791         mRS.finish();
   2792         checkError();
   2793     }
   2794 
   2795     public void test_L2_ZTRSV_Correctness() {
   2796         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2797         int uplo = ScriptIntrinsicBLAS.UPPER;
   2798         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2799         int incX = 1;
   2800 
   2801         // Populate input allocations
   2802         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   2803         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   2804         matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn);
   2805         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   2806 
   2807         // Test for the default case: NO_TRANS
   2808         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2809         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   2810         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN);
   2811         verifyMatrix(vectorXRef, vectorXZ);
   2812 
   2813         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2814         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2815         // Reload vector X, since it was overwritten by BLAS.
   2816         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   2817         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2818         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT);
   2819         verifyMatrix(vectorXRef, vectorXZ);
   2820 
   2821         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2822         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   2823         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2824         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH);
   2825         verifyMatrix(vectorXRef, vectorXZ);
   2826 
   2827         // Test for incX = 2;
   2828         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2829         incX = 2;
   2830         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2831         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2832         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2);
   2833 
   2834         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   2835         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   2836         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2);
   2837         verifyMatrix(vectorXRef, vectorXZ);
   2838 
   2839         mRS.finish();
   2840         checkError();
   2841     }
   2842 
   2843 
   2844     private void xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
   2845         for (Allocation matA : mMatrix) {
   2846             for (Allocation vecX : mMatrix) {
   2847                 if (!validateVecInput(vecX)) {
   2848                     continue;
   2849                 }
   2850                 Element elemA = matA.getType().getElement();
   2851                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
   2852                     try {
   2853                         if (elemA.isCompatible(Element.F32(mRS))) {
   2854                             mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2855                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   2856                             mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2857                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   2858                             mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2859                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   2860                             mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2861                         }
   2862                     } catch (RSRuntimeException e) {
   2863                         fail("should NOT throw RSRuntimeException");
   2864                     }
   2865                 } else {
   2866                     try {
   2867                         mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2868                         fail("should throw RSRuntimeException for STBSV");
   2869                     } catch (RSRuntimeException e) {
   2870                     }
   2871                     try {
   2872                         mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2873                         fail("should throw RSRuntimeException for DTBSV");
   2874                     } catch (RSRuntimeException e) {
   2875                     }
   2876                     try {
   2877                         mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2878                         fail("should throw RSRuntimeException for CTBSV");
   2879                     } catch (RSRuntimeException e) {
   2880                     }
   2881                     try {
   2882                         mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
   2883                         fail("should throw RSRuntimeException for ZTBSV");
   2884                     } catch (RSRuntimeException e) {
   2885                     }
   2886                 }
   2887             }
   2888         }
   2889     }
   2890 
   2891     public void L2_xTBSV_API(ArrayList<Allocation> mMatrix) {
   2892         for (int Uplo : mUplo) {
   2893             for (int TransA : mTranspose) {
   2894                 for (int Diag : mDiag) {
   2895                     for (int K : mK) {
   2896                         for (int incX : mInc) {
   2897                             xTBSV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
   2898                         }
   2899                     }
   2900                 }
   2901             }
   2902         }
   2903     }
   2904 
   2905     public void test_L2_STBSV_API() {
   2906         L2_xTBSV_API(mMatrixS);
   2907     }
   2908 
   2909     public void test_L2_DTBSV_API() {
   2910         L2_xTBSV_API(mMatrixD);
   2911     }
   2912 
   2913     public void test_L2_CTBSV_API() {
   2914         L2_xTBSV_API(mMatrixC);
   2915     }
   2916 
   2917     public void test_L2_ZTBSV_API() {
   2918         L2_xTBSV_API(mMatrixZ);
   2919     }
   2920 
   2921     public void test_L2_STBSV_Correctness() {
   2922         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2923         int uplo = ScriptIntrinsicBLAS.UPPER;
   2924         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2925         int incX = 1;
   2926 
   2927         // Populate input allocations
   2928         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   2929         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2930         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBSV_A_nn);
   2931         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
   2932 
   2933         // Test for the default case: NO_TRANS
   2934         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2935         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   2936         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN);
   2937         verifyMatrix(vectorXRef, vectorXS);
   2938 
   2939         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2940         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2941         // Reload vector X, since it was overwritten by BLAS.
   2942         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
   2943         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2944         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UT);
   2945         verifyMatrix(vectorXRef, vectorXS);
   2946 
   2947         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2948         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
   2949         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2950         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UH);
   2951         verifyMatrix(vectorXRef, vectorXS);
   2952 
   2953         // Test for incX = 2;
   2954         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2955         incX = 2;
   2956         int dimX = 1 + (mBLASData.dN - 1) * incX;
   2957         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2958         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n2);
   2959 
   2960         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
   2961         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   2962         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN2);
   2963         verifyMatrix(vectorXRef, vectorXS);
   2964 
   2965         mRS.finish();
   2966         checkError();
   2967     }
   2968 
   2969     public void test_L2_DTBSV_Correctness() {
   2970         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   2971         int uplo = ScriptIntrinsicBLAS.UPPER;
   2972         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   2973         int incX = 1;
   2974 
   2975         // Populate input allocations
   2976         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   2977         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2978         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBSV_A_nn);
   2979         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
   2980 
   2981         // Test for the default case: NO_TRANS
   2982         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2983         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   2984         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN);
   2985         verifyMatrix(vectorXRef, vectorXD);
   2986 
   2987         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   2988         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   2989         // Reload vector X, since it was overwritten by BLAS.
   2990         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
   2991         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2992         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UT);
   2993         verifyMatrix(vectorXRef, vectorXD);
   2994 
   2995         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   2996         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
   2997         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   2998         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UH);
   2999         verifyMatrix(vectorXRef, vectorXD);
   3000 
   3001         // Test for incX = 2;
   3002         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3003         incX = 2;
   3004         int dimX = 1 + (mBLASData.dN - 1) * incX;
   3005         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   3006         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n2);
   3007 
   3008         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
   3009         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   3010         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN2);
   3011         verifyMatrix(vectorXRef, vectorXD);
   3012 
   3013         mRS.finish();
   3014         checkError();
   3015     }
   3016 
   3017     public void test_L2_CTBSV_Correctness() {
   3018         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3019         int uplo = ScriptIntrinsicBLAS.UPPER;
   3020         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3021         int incX = 1;
   3022 
   3023         // Populate input allocations
   3024         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   3025         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   3026         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBSV_A_nn);
   3027         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
   3028 
   3029         // Test for the default case: NO_TRANS
   3030         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   3031         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   3032         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN);
   3033         verifyMatrix(vectorXRef, vectorXC);
   3034 
   3035         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3036         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3037         // Reload vector X, since it was overwritten by BLAS.
   3038         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
   3039         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   3040         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UT);
   3041         verifyMatrix(vectorXRef, vectorXC);
   3042 
   3043         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3044         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
   3045         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   3046         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UH);
   3047         verifyMatrix(vectorXRef, vectorXC);
   3048 
   3049         // Test for incX = 2;
   3050         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3051         incX = 2;
   3052         int dimX = 1 + (mBLASData.dN - 1) * incX;
   3053         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3054         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n2);
   3055 
   3056         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
   3057         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3058         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN2);
   3059         verifyMatrix(vectorXRef, vectorXC);
   3060 
   3061         mRS.finish();
   3062         checkError();
   3063     }
   3064 
   3065     public void test_L2_ZTBSV_Correctness() {
   3066         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3067         int uplo = ScriptIntrinsicBLAS.UPPER;
   3068         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3069         int incX = 1;
   3070 
   3071         // Populate input allocations
   3072         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   3073         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   3074         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBSV_A_nn);
   3075         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
   3076 
   3077         // Test for the default case: NO_TRANS
   3078         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   3079         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   3080         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN);
   3081         verifyMatrix(vectorXRef, vectorXZ);
   3082 
   3083         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3084         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3085         // Reload vector X, since it was overwritten by BLAS.
   3086         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
   3087         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   3088         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UT);
   3089         verifyMatrix(vectorXRef, vectorXZ);
   3090 
   3091         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3092         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
   3093         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   3094         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UH);
   3095         verifyMatrix(vectorXRef, vectorXZ);
   3096 
   3097         // Test for incX = 2;
   3098         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3099         incX = 2;
   3100         int dimX = 1 + (mBLASData.dN - 1) * incX;
   3101         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3102         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n2);
   3103 
   3104         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
   3105         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3106         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN2);
   3107         verifyMatrix(vectorXRef, vectorXZ);
   3108 
   3109         mRS.finish();
   3110         checkError();
   3111     }
   3112 
   3113 
   3114     private void xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
   3115         for (Allocation matA : mMatrix) {
   3116             for (Allocation vecX : mMatrix) {
   3117                 if (!validateVecInput(vecX)) {
   3118                     continue;
   3119                 }
   3120                 Element elemA = matA.getType().getElement();
   3121                 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
   3122                     try {
   3123                         if (elemA.isCompatible(Element.F32(mRS))) {
   3124                             mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3125                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   3126                             mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3127                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   3128                             mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3129                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   3130                             mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3131                         }
   3132                     } catch (RSRuntimeException e) {
   3133                         fail("should NOT throw RSRuntimeException");
   3134                     }
   3135                 } else {
   3136                     try {
   3137                         mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3138                         fail("should throw RSRuntimeException for STPSV");
   3139                     } catch (RSRuntimeException e) {
   3140                     }
   3141                     try {
   3142                         mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3143                         fail("should throw RSRuntimeException for DTPSV");
   3144                     } catch (RSRuntimeException e) {
   3145                     }
   3146                     try {
   3147                         mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3148                         fail("should throw RSRuntimeException for CTPSV");
   3149                     } catch (RSRuntimeException e) {
   3150                     }
   3151                     try {
   3152                         mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
   3153                         fail("should throw RSRuntimeException for ZTPSV");
   3154                     } catch (RSRuntimeException e) {
   3155                     }
   3156                 }
   3157             }
   3158         }
   3159     }
   3160 
   3161     public void L2_xTPSV_API(ArrayList<Allocation> mMatrix) {
   3162         for (int Uplo : mUplo) {
   3163             for (int TransA : mTranspose) {
   3164                 for (int Diag : mDiag) {
   3165                     for (int incX : mInc) {
   3166                         xTPSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
   3167                     }
   3168                 }
   3169             }
   3170         }
   3171     }
   3172 
   3173     public void test_L2_STPSV_API() {
   3174         L2_xTPSV_API(mMatrixS);
   3175     }
   3176 
   3177     public void test_L2_DTPSV_API() {
   3178         L2_xTPSV_API(mMatrixD);
   3179     }
   3180 
   3181     public void test_L2_CTPSV_API() {
   3182         L2_xTPSV_API(mMatrixC);
   3183     }
   3184 
   3185     public void test_L2_ZTPSV_API() {
   3186         L2_xTPSV_API(mMatrixZ);
   3187     }
   3188 
   3189     public void test_L2_STPSV_Correctness() {
   3190         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3191         int uplo = ScriptIntrinsicBLAS.UPPER;
   3192         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3193         int incX = 1;
   3194 
   3195         // Populate input allocations
   3196         int N = mBLASData.dN;
   3197         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   3198         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   3199         matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn_pu);
   3200         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   3201 
   3202         // Test for the default case: NO_TRANS
   3203         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   3204         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   3205         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN);
   3206         verifyMatrix(vectorXRef, vectorXS);
   3207 
   3208         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3209         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3210         // Reload vector X, since it was overwritten by BLAS.
   3211         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   3212         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   3213         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT);
   3214         verifyMatrix(vectorXRef, vectorXS);
   3215 
   3216         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3217         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
   3218         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   3219         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH);
   3220         verifyMatrix(vectorXRef, vectorXS);
   3221 
   3222         // Test for incX = 2;
   3223         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3224         incX = 2;
   3225         int dimX = 1 + (N - 1) * incX;
   3226         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   3227         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2);
   3228 
   3229         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
   3230         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   3231         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2);
   3232         verifyMatrix(vectorXRef, vectorXS);
   3233 
   3234         mRS.finish();
   3235         checkError();
   3236     }
   3237 
   3238     public void test_L2_DTPSV_Correctness() {
   3239         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3240         int uplo = ScriptIntrinsicBLAS.UPPER;
   3241         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3242         int incX = 1;
   3243 
   3244         // Populate input allocations
   3245         int N = mBLASData.dN;
   3246         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   3247         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   3248         matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn_pu);
   3249         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   3250 
   3251         // Test for the default case: NO_TRANS
   3252         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   3253         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   3254         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN);
   3255         verifyMatrix(vectorXRef, vectorXD);
   3256 
   3257         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3258         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3259         // Reload vector X, since it was overwritten by BLAS.
   3260         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   3261         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   3262         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT);
   3263         verifyMatrix(vectorXRef, vectorXD);
   3264 
   3265         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3266         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
   3267         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   3268         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH);
   3269         verifyMatrix(vectorXRef, vectorXD);
   3270 
   3271         // Test for incX = 2;
   3272         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3273         incX = 2;
   3274         int dimX = 1 + (N - 1) * incX;
   3275         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   3276         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2);
   3277 
   3278         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
   3279         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   3280         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2);
   3281         verifyMatrix(vectorXRef, vectorXD);
   3282 
   3283         mRS.finish();
   3284         checkError();
   3285     }
   3286 
   3287     public void test_L2_CTPSV_Correctness() {
   3288         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3289         int uplo = ScriptIntrinsicBLAS.UPPER;
   3290         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3291         int incX = 1;
   3292 
   3293         // Populate input allocations
   3294         int N = mBLASData.dN;
   3295         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   3296         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   3297         matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn_pu);
   3298         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   3299 
   3300         // Test for the default case: NO_TRANS
   3301         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   3302         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   3303         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN);
   3304         verifyMatrix(vectorXRef, vectorXC);
   3305 
   3306         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3307         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3308         // Reload vector X, since it was overwritten by BLAS.
   3309         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   3310         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   3311         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT);
   3312         verifyMatrix(vectorXRef, vectorXC);
   3313 
   3314         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3315         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
   3316         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   3317         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH);
   3318         verifyMatrix(vectorXRef, vectorXC);
   3319 
   3320         // Test for incX = 2;
   3321         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3322         incX = 2;
   3323         int dimX = 1 + (N - 1) * incX;
   3324         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3325         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2);
   3326 
   3327         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
   3328         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3329         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2);
   3330         verifyMatrix(vectorXRef, vectorXC);
   3331 
   3332         mRS.finish();
   3333         checkError();
   3334     }
   3335 
   3336     public void test_L2_ZTPSV_Correctness() {
   3337         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3338         int uplo = ScriptIntrinsicBLAS.UPPER;
   3339         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   3340         int incX = 1;
   3341 
   3342         // Populate input allocations
   3343         int N = mBLASData.dN;
   3344         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   3345         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   3346         matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn_pu);
   3347         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   3348 
   3349         // Test for the default case: NO_TRANS
   3350         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   3351         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   3352         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN);
   3353         verifyMatrix(vectorXRef, vectorXZ);
   3354 
   3355         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   3356         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   3357         // Reload vector X, since it was overwritten by BLAS.
   3358         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   3359         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   3360         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT);
   3361         verifyMatrix(vectorXRef, vectorXZ);
   3362 
   3363         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   3364         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
   3365         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   3366         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH);
   3367         verifyMatrix(vectorXRef, vectorXZ);
   3368 
   3369         // Test for incX = 2;
   3370         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   3371         incX = 2;
   3372         int dimX = 1 + (N - 1) * incX;
   3373         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3374         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2);
   3375 
   3376         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
   3377         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3378         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2);
   3379         verifyMatrix(vectorXRef, vectorXZ);
   3380 
   3381         mRS.finish();
   3382         checkError();
   3383     }
   3384 
   3385 
   3386     private boolean validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   3387         if (!A.getType().getElement().isCompatible(e) ||
   3388             !X.getType().getElement().isCompatible(e) ||
   3389             !Y.getType().getElement().isCompatible(e) ) {
   3390             return false;
   3391         }
   3392 
   3393         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   3394             return false;
   3395         }
   3396 
   3397         int M = A.getType().getY();
   3398         int N = A.getType().getX();
   3399 
   3400         if (N < 1 || M < 1) {
   3401             return false;
   3402         }
   3403         if (incX <= 0 || incY <= 0) {
   3404             return false;
   3405         }
   3406         int expectedXDim = 1 + (M - 1) * incX;
   3407         if (X.getType().getX() != expectedXDim) {
   3408             return false;
   3409         }
   3410         int expectedYDim = 1 + (N - 1) * incY;
   3411         if (Y.getType().getX() != expectedYDim) {
   3412             return false;
   3413         }
   3414         return true;
   3415     }
   3416 
   3417 
   3418     private void xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
   3419         for (Allocation matA : mMatrix) {
   3420             for (Allocation vecX : mMatrix) {
   3421                 if (!validateVecInput(vecX)) {
   3422                     continue;
   3423                 }
   3424                 for (Allocation vecY : mMatrix) {
   3425                     if (!validateVecInput(vecY)) {
   3426                         continue;
   3427                     }
   3428                     Element elemA = matA.getType().getElement();
   3429                     if (validateGER(elemA, vecX, incX, vecY, incY, matA)) {
   3430                         try {
   3431                             if (elemA.isCompatible(Element.F32(mRS))) {
   3432                                 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
   3433                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   3434                                 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
   3435                             }
   3436                         } catch (RSRuntimeException e) {
   3437                             fail("should NOT throw RSRuntimeException");
   3438                         }
   3439                     } else {
   3440                         try {
   3441                             mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
   3442                             fail("should throw RSRuntimeException for SGER");
   3443                         } catch (RSRuntimeException e) {
   3444                         }
   3445                         try {
   3446                             mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
   3447                             fail("should throw RSRuntimeException for DGER");
   3448                         } catch (RSRuntimeException e) {
   3449                         }
   3450                     }
   3451                 }
   3452             }
   3453         }
   3454     }
   3455 
   3456     private void L2_xGER_API(ArrayList<Allocation> mMatrix) {
   3457         for (int incX : mInc) {
   3458             for (int incY : mInc) {
   3459                 xGERU_API_test(incX, incY, mMatrix);
   3460             }
   3461         }
   3462     }
   3463 
   3464     public void test_L2_SGER_API() {
   3465         L2_xGER_API(mMatrixS);
   3466     }
   3467 
   3468     public void test_L2_DGER_API() {
   3469         L2_xGER_API(mMatrixD);
   3470     }
   3471 
   3472     public void test_L2_SGER_Correctness() {
   3473         int incX = 1;
   3474         int incY = 1;
   3475 
   3476         // Populate input allocations
   3477         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   3478         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
   3479         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   3480         matrixAS.copyFrom(mBLASData.L2_sGER_A_mn);
   3481         vectorXS.copyFrom(mBLASData.L2_sGER_x_m1);
   3482         vectorYS.copyFrom(mBLASData.L2_sGER_y_n1);
   3483 
   3484         // Test for the default case: NO_TRANS
   3485         mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   3486         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   3487         matrixARef.copyFrom(mBLASData.L2_sGER_o_N);
   3488         verifyMatrix(matrixARef, matrixAS);
   3489 
   3490         // Test for incX = 2 & incY = 3;
   3491         incX = 2;
   3492         incY = 3;
   3493         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3494         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3495         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   3496         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   3497         vectorXS.copyFrom(mBLASData.L2_sGER_x_m2);
   3498         vectorYS.copyFrom(mBLASData.L2_sGER_y_n2);
   3499         matrixAS.copyFrom(mBLASData.L2_sGER_A_mn);
   3500 
   3501         mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   3502         verifyMatrix(matrixARef, matrixAS);
   3503 
   3504         mRS.finish();
   3505         checkError();
   3506     }
   3507 
   3508     public void test_L2_DGER_Correctness() {
   3509         int incX = 1;
   3510         int incY = 1;
   3511 
   3512         // Populate input allocations
   3513         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   3514         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
   3515         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   3516         matrixAD.copyFrom(mBLASData.L2_dGER_A_mn);
   3517         vectorXD.copyFrom(mBLASData.L2_dGER_x_m1);
   3518         vectorYD.copyFrom(mBLASData.L2_dGER_y_n1);
   3519 
   3520         // Test for the default case: NO_TRANS
   3521         mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   3522         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   3523         matrixARef.copyFrom(mBLASData.L2_dGER_o_N);
   3524         verifyMatrix(matrixARef, matrixAD);
   3525 
   3526         // Test for incX = 2 & incY = 3;
   3527         incX = 2;
   3528         incY = 3;
   3529         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3530         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3531         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   3532         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   3533         vectorXD.copyFrom(mBLASData.L2_dGER_x_m2);
   3534         vectorYD.copyFrom(mBLASData.L2_dGER_y_n2);
   3535         matrixAD.copyFrom(mBLASData.L2_dGER_A_mn);
   3536 
   3537         mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   3538         verifyMatrix(matrixARef, matrixAD);
   3539 
   3540         mRS.finish();
   3541         checkError();
   3542     }
   3543 
   3544 
   3545     private boolean validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   3546         if (!A.getType().getElement().isCompatible(e) ||
   3547             !X.getType().getElement().isCompatible(e) ||
   3548             !Y.getType().getElement().isCompatible(e)) {
   3549             return false;
   3550         }
   3551         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   3552             return false;
   3553         }
   3554 
   3555         int M = A.getType().getY();
   3556         int N = A.getType().getX();
   3557         if (incX <= 0 || incY <= 0) {
   3558             return false;
   3559         }
   3560         int expectedXDim = 1 + (M - 1) * incX;
   3561         if (X.getType().getX() != expectedXDim) {
   3562             return false;
   3563         }
   3564         int expectedYDim = 1 + (N - 1) * incY;
   3565         if (Y.getType().getX() != expectedYDim) {
   3566             return false;
   3567         }
   3568         return true;
   3569     }
   3570 
   3571     private void xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
   3572         for (Allocation matA : mMatrix) {
   3573             for (Allocation vecX : mMatrix) {
   3574                 if (!validateVecInput(vecX)) {
   3575                     continue;
   3576                 }
   3577                 for (Allocation vecY : mMatrix) {
   3578                     if (!validateVecInput(vecY)) {
   3579                         continue;
   3580                     }
   3581                     Element elemA = matA.getType().getElement();
   3582                     if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
   3583                         try {
   3584                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   3585                                 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
   3586                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   3587                                 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
   3588                             }
   3589                         } catch (RSRuntimeException e) {
   3590                             fail("should NOT throw RSRuntimeException");
   3591                         }
   3592                     } else {
   3593                         try {
   3594                             mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
   3595                             fail("should throw RSRuntimeException for CGERU");
   3596                         } catch (RSRuntimeException e) {
   3597                         }
   3598                         try {
   3599                             mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
   3600                             fail("should throw RSRuntimeException for ZGERU");
   3601                         } catch (RSRuntimeException e) {
   3602                         }
   3603                     }
   3604                 }
   3605             }
   3606         }
   3607     }
   3608 
   3609     private void L2_xGERU_API(ArrayList<Allocation> mMatrix) {
   3610         for (int incX : mInc) {
   3611             for (int incY : mInc) {
   3612                 xGERU_API_test(incX, incY, mMatrix);
   3613             }
   3614         }
   3615     }
   3616 
   3617     public void test_L2_CGERU_API() {
   3618         L2_xGERU_API(mMatrixC);
   3619     }
   3620 
   3621     public void test_L2_ZGERU_API() {
   3622         L2_xGERU_API(mMatrixZ);
   3623     }
   3624 
   3625     public void test_L2_CGERU_Correctness() {
   3626         int incX = 1;
   3627         int incY = 1;
   3628 
   3629         // Populate input allocations
   3630         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   3631         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
   3632         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   3633         matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn);
   3634         vectorXC.copyFrom(mBLASData.L2_cGERU_x_m1);
   3635         vectorYC.copyFrom(mBLASData.L2_cGERU_y_n1);
   3636 
   3637         // Test for the default case: NO_TRANS
   3638         mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   3639         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   3640         matrixARef.copyFrom(mBLASData.L2_cGERU_o_N);
   3641         verifyMatrix(matrixARef, matrixAC);
   3642 
   3643         // Test for incX = 2 & incY = 3;
   3644         incX = 2;
   3645         incY = 3;
   3646         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3647         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3648         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3649         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   3650         vectorXC.copyFrom(mBLASData.L2_cGERU_x_m2);
   3651         vectorYC.copyFrom(mBLASData.L2_cGERU_y_n2);
   3652         matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn);
   3653 
   3654         mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   3655         verifyMatrix(matrixARef, matrixAC);
   3656 
   3657         mRS.finish();
   3658         checkError();
   3659     }
   3660 
   3661     public void test_L2_ZGERU_Correctness() {
   3662         int incX = 1;
   3663         int incY = 1;
   3664 
   3665         // Populate input allocations
   3666         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   3667         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
   3668         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   3669         matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn);
   3670         vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m1);
   3671         vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n1);
   3672 
   3673         // Test for the default case: NO_TRANS
   3674         mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   3675         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   3676         matrixARef.copyFrom(mBLASData.L2_zGERU_o_N);
   3677         verifyMatrix(matrixARef, matrixAZ);
   3678 
   3679         // Test for incX = 2 & incY = 3;
   3680         incX = 2;
   3681         incY = 3;
   3682         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3683         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3684         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3685         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   3686         vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m2);
   3687         vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n2);
   3688         matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn);
   3689 
   3690         mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   3691         verifyMatrix(matrixARef, matrixAZ);
   3692 
   3693         mRS.finish();
   3694         checkError();
   3695     }
   3696 
   3697 
   3698 
   3699     private void xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
   3700         for (Allocation matA : mMatrix) {
   3701             for (Allocation vecX : mMatrix) {
   3702                 if (!validateVecInput(vecX)) {
   3703                     continue;
   3704                 }
   3705                 for (Allocation vecY : mMatrix) {
   3706                     if (!validateVecInput(vecY)) {
   3707                         continue;
   3708                     }
   3709                     Element elemA = matA.getType().getElement();
   3710                     if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
   3711                         try {
   3712                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   3713                                 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
   3714                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   3715                                 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
   3716                             }
   3717                         } catch (RSRuntimeException e) {
   3718                             fail("should NOT throw RSRuntimeException");
   3719                         }
   3720                     } else {
   3721                         try {
   3722                             mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
   3723                             fail("should throw RSRuntimeException for CGERC");
   3724                         } catch (RSRuntimeException e) {
   3725                         }
   3726                         try {
   3727                             mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
   3728                             fail("should throw RSRuntimeException for ZGERC");
   3729                         } catch (RSRuntimeException e) {
   3730                         }
   3731                     }
   3732                 }
   3733             }
   3734         }
   3735     }
   3736 
   3737     private void L2_xGERC_API(ArrayList<Allocation> mMatrix) {
   3738         for (int incX : mInc) {
   3739             for (int incY : mInc) {
   3740                 xGERC_API_test(incX, incY, mMatrix);
   3741             }
   3742         }
   3743     }
   3744 
   3745     public void test_L2_CGERC_API() {
   3746         L2_xGERC_API(mMatrixC);
   3747     }
   3748 
   3749     public void test_L2_ZGERC_API() {
   3750         L2_xGERC_API(mMatrixZ);
   3751     }
   3752 
   3753     public void test_L2_CGERC_Correctness() {
   3754         int incX = 1;
   3755         int incY = 1;
   3756 
   3757         // Populate input allocations
   3758         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   3759         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
   3760         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   3761         matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn);
   3762         vectorXC.copyFrom(mBLASData.L2_cGERC_x_m1);
   3763         vectorYC.copyFrom(mBLASData.L2_cGERC_y_n1);
   3764 
   3765         // Test for the default case: NO_TRANS
   3766         mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   3767         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   3768         matrixARef.copyFrom(mBLASData.L2_cGERC_o_N);
   3769         verifyMatrix(matrixARef, matrixAC);
   3770 
   3771         // Test for incX = 2 & incY = 3;
   3772         incX = 2;
   3773         incY = 3;
   3774         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3775         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3776         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3777         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   3778         vectorXC.copyFrom(mBLASData.L2_cGERC_x_m2);
   3779         vectorYC.copyFrom(mBLASData.L2_cGERC_y_n2);
   3780         matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn);
   3781 
   3782         mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   3783         verifyMatrix(matrixARef, matrixAC);
   3784 
   3785         mRS.finish();
   3786         checkError();
   3787     }
   3788 
   3789     public void test_L2_ZGERC_Correctness() {
   3790         int incX = 1;
   3791         int incY = 1;
   3792 
   3793         // Populate input allocations
   3794         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   3795         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
   3796         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   3797         matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn);
   3798         vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m1);
   3799         vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n1);
   3800 
   3801         // Test for the default case: NO_TRANS
   3802         mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   3803         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   3804         matrixARef.copyFrom(mBLASData.L2_zGERC_o_N);
   3805         verifyMatrix(matrixARef, matrixAZ);
   3806 
   3807         // Test for incX = 2 & incY = 3;
   3808         incX = 2;
   3809         incY = 3;
   3810         int dimX = 1 + (mBLASData.dM - 1) * incX;
   3811         int dimY = 1 + (mBLASData.dN - 1) * incY;
   3812         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3813         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   3814         vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m2);
   3815         vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n2);
   3816         matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn);
   3817 
   3818         mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   3819         verifyMatrix(matrixARef, matrixAZ);
   3820 
   3821         mRS.finish();
   3822         checkError();
   3823     }
   3824 
   3825 
   3826     private void xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
   3827         for (Allocation matA : mMatrix) {
   3828             for (Allocation vecX : mMatrix) {
   3829                 if (!validateVecInput(vecX)) {
   3830                     continue;
   3831                 }
   3832                 Element elemA = matA.getType().getElement();
   3833                 if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
   3834                     try {
   3835                         if (elemA.isCompatible(Element.F32_2(mRS))) {
   3836                             mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
   3837                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   3838                             mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
   3839                         }
   3840                     } catch (RSRuntimeException e) {
   3841                         fail("should NOT throw RSRuntimeException");
   3842                     }
   3843                 } else {
   3844                     try {
   3845                         mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
   3846                         fail("should throw RSRuntimeException for CHER");
   3847                     } catch (RSRuntimeException e) {
   3848                     }
   3849                     try {
   3850                         mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
   3851                         fail("should throw RSRuntimeException for ZHER");
   3852                     } catch (RSRuntimeException e) {
   3853                     }
   3854                 }
   3855             }
   3856         }
   3857     }
   3858 
   3859     public void L2_xHER_API(ArrayList<Allocation> mMatrix) {
   3860         for (int Uplo : mUplo) {
   3861             for (int incX : mInc) {
   3862                 xHER_API_test(Uplo, incX, mMatrix);
   3863             }
   3864         }
   3865     }
   3866 
   3867     public void test_L2_CHER_API() {
   3868         L2_xHER_API(mMatrixC);
   3869     }
   3870 
   3871     public void test_L2_ZHER_API() {
   3872         L2_xHER_API(mMatrixZ);
   3873     }
   3874 
   3875     public void test_L2_CHER_Correctness() {
   3876         int uplo = ScriptIntrinsicBLAS.UPPER;
   3877         int incX = 1;
   3878 
   3879         // Populate input allocations
   3880         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   3881         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   3882         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn);
   3883         vectorXC.copyFrom(mBLASData.L2_cHER_x_n1);
   3884 
   3885         // Test for the default case: NO_TRANS
   3886         mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
   3887         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   3888         matrixARef.copyFrom(mBLASData.L2_cHER_o_N);
   3889         verifyMatrix(matrixARef, matrixAC, true);
   3890 
   3891         // Test for incX = 2;
   3892         incX = 2;
   3893         int dimX = 1 + (mBLASData.dN - 1) * incX;
   3894         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   3895         vectorXC.copyFrom(mBLASData.L2_cHER_x_n2);
   3896         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn);
   3897 
   3898         mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
   3899         verifyMatrix(matrixARef, matrixAC, true);
   3900 
   3901         mRS.finish();
   3902         checkError();
   3903     }
   3904 
   3905     public void test_L2_ZHER_Correctness() {
   3906         int uplo = ScriptIntrinsicBLAS.UPPER;
   3907         int incX = 1;
   3908 
   3909         // Populate input allocations
   3910         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   3911         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   3912         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn);
   3913         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1);
   3914 
   3915         // Test for the default case: NO_TRANS
   3916         mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
   3917         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   3918         matrixARef.copyFrom(mBLASData.L2_zHER_o_N);
   3919         verifyMatrix(matrixARef, matrixAZ, true);
   3920 
   3921         // Test for incX = 2;
   3922         incX = 2;
   3923         int dimX = 1 + (mBLASData.dN - 1) * incX;
   3924         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   3925         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2);
   3926         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn);
   3927 
   3928         mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
   3929         verifyMatrix(matrixARef, matrixAZ, true);
   3930 
   3931         mRS.finish();
   3932         checkError();
   3933     }
   3934 
   3935 
   3936     private void xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
   3937         for (Allocation matA : mMatrix) {
   3938             for (Allocation vecX : mMatrix) {
   3939                 if (!validateVecInput(vecX)) {
   3940                     continue;
   3941                 }
   3942                 Element elemA = matA.getType().getElement();
   3943                 if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
   3944                     try {
   3945                         if (elemA.isCompatible(Element.F32_2(mRS))) {
   3946                             mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
   3947                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   3948                             mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
   3949                         }
   3950                     } catch (RSRuntimeException e) {
   3951                         fail("should NOT throw RSRuntimeException");
   3952                     }
   3953                 } else {
   3954                     try {
   3955                         mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
   3956                         fail("should throw RSRuntimeException for CHPR");
   3957                     } catch (RSRuntimeException e) {
   3958                     }
   3959                     try {
   3960                         mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
   3961                         fail("should throw RSRuntimeException for ZHPR");
   3962                     } catch (RSRuntimeException e) {
   3963                     }
   3964                 }
   3965             }
   3966         }
   3967     }
   3968 
   3969     public void L2_xHPR_API(ArrayList<Allocation> mMatrix) {
   3970         for (int Uplo : mUplo) {
   3971             for (int incX : mInc) {
   3972                 xHPR_API_test(Uplo, incX, mMatrix);
   3973             }
   3974         }
   3975     }
   3976 
   3977     public void test_L2_CHPR_API() {
   3978         L2_xHPR_API(mMatrixC);
   3979     }
   3980 
   3981     public void test_L2_ZHPR_API() {
   3982         L2_xHPR_API(mMatrixZ);
   3983     }
   3984 
   3985     public void test_L2_CHPR_Correctness() {
   3986         int uplo = ScriptIntrinsicBLAS.UPPER;
   3987         int incX = 1;
   3988 
   3989         // Populate input allocations
   3990         int N = mBLASData.dN;
   3991         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   3992         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   3993         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu);
   3994         vectorXC.copyFrom(mBLASData.L2_cHER_x_n1);
   3995 
   3996         // Test for the default case: NO_TRANS
   3997         mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
   3998         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   3999         matrixARef.copyFrom(mBLASData.L2_cHER_o_N_pu);
   4000         verifyMatrix(matrixARef, matrixAC, true);
   4001 
   4002         // Test for incX = 2;
   4003         incX = 2;
   4004         int dimX = 1 + (N - 1) * incX;
   4005         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   4006         vectorXC.copyFrom(mBLASData.L2_cHER_x_n2);
   4007         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu);
   4008 
   4009         mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
   4010         verifyMatrix(matrixARef, matrixAC, true);
   4011 
   4012         mRS.finish();
   4013         checkError();
   4014     }
   4015 
   4016     public void test_L2_ZHPR_Correctness() {
   4017         int uplo = ScriptIntrinsicBLAS.UPPER;
   4018         int incX = 1;
   4019 
   4020         // Populate input allocations
   4021         int N = mBLASData.dN;
   4022         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   4023         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   4024         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu);
   4025         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1);
   4026 
   4027         // Test for the default case: NO_TRANS
   4028         mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
   4029         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   4030         matrixARef.copyFrom(mBLASData.L2_zHER_o_N_pu);
   4031         verifyMatrix(matrixARef, matrixAZ, true);
   4032 
   4033         // Test for incX = 2;
   4034         incX = 2;
   4035         int dimX = 1 + (N - 1) * incX;
   4036         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   4037         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2);
   4038         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu);
   4039 
   4040         mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
   4041         verifyMatrix(matrixARef, matrixAZ, true);
   4042 
   4043         mRS.finish();
   4044         checkError();
   4045     }
   4046 
   4047 
   4048     private void xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   4049         for (Allocation matA : mMatrix) {
   4050             for (Allocation vecX : mMatrix) {
   4051                 if (!validateVecInput(vecX)) {
   4052                     continue;
   4053                 }
   4054                 for (Allocation vecY : mMatrix) {
   4055                     if (!validateVecInput(vecY)) {
   4056                         continue;
   4057                     }
   4058                     Element elemA = matA.getType().getElement();
   4059                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
   4060                         try {
   4061                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   4062                                 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
   4063                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   4064                                 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
   4065                             }
   4066                         } catch (RSRuntimeException e) {
   4067                             fail("should NOT throw RSRuntimeException");
   4068                         }
   4069                     } else {
   4070                         try {
   4071                             mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
   4072                             fail("should throw RSRuntimeException for CHER2");
   4073                         } catch (RSRuntimeException e) {
   4074                         }
   4075                         try {
   4076                             mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
   4077                             fail("should throw RSRuntimeException for ZHER2");
   4078                         } catch (RSRuntimeException e) {
   4079                         }
   4080                     }
   4081                 }
   4082             }
   4083         }
   4084     }
   4085 
   4086     public void L2_xHER2_API(ArrayList<Allocation> mMatrix) {
   4087         for (int Uplo : mUplo) {
   4088             for (int incX : mInc) {
   4089                 xHER2_API_test(Uplo, incX, incX, mMatrix);
   4090             }
   4091         }
   4092     }
   4093 
   4094     public void test_L2_CHER2_API() {
   4095         L2_xHER2_API(mMatrixC);
   4096     }
   4097 
   4098     public void test_L2_ZHER2_API() {
   4099         L2_xHER2_API(mMatrixZ);
   4100     }
   4101 
   4102     public void test_L2_CHER2_Correctness() {
   4103         int uplo = ScriptIntrinsicBLAS.UPPER;
   4104         int incX = 1;
   4105         int incY = 1;
   4106 
   4107         // Populate input allocations
   4108         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   4109         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   4110         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
   4111         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn);
   4112         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1);
   4113         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1);
   4114 
   4115         // Test for the default case: NO_TRANS
   4116         mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   4117         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   4118         matrixARef.copyFrom(mBLASData.L2_cHER2_o_N);
   4119         verifyMatrix(matrixARef, matrixAC, true);
   4120 
   4121         // Test for incX = 2 & incY = 3;
   4122         incX = 2;
   4123         incY = 3;
   4124         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4125         int dimY = 1 + (mBLASData.dN - 1) * incY;
   4126         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   4127         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   4128         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2);
   4129         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2);
   4130         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn);
   4131 
   4132         mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   4133         verifyMatrix(matrixARef, matrixAC, true);
   4134 
   4135         mRS.finish();
   4136         checkError();
   4137     }
   4138 
   4139     public void test_L2_ZHER2_Correctness() {
   4140         int uplo = ScriptIntrinsicBLAS.UPPER;
   4141         int incX = 1;
   4142         int incY = 1;
   4143 
   4144         // Populate input allocations
   4145         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   4146         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   4147         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
   4148         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn);
   4149         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1);
   4150         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1);
   4151 
   4152         // Test for the default case: NO_TRANS
   4153         mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   4154         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   4155         matrixARef.copyFrom(mBLASData.L2_zHER2_o_N);
   4156         verifyMatrix(matrixARef, matrixAZ, true);
   4157 
   4158         // Test for incX = 2 & incY = 3;
   4159         incX = 2;
   4160         incY = 3;
   4161         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4162         int dimY = 1 + (mBLASData.dN - 1) * incY;
   4163         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   4164         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   4165         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2);
   4166         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2);
   4167         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn);
   4168 
   4169         mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   4170         verifyMatrix(matrixARef, matrixAZ, true);
   4171 
   4172         mRS.finish();
   4173         checkError();
   4174     }
   4175 
   4176 
   4177 
   4178     private void xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   4179         for (Allocation matA : mMatrix) {
   4180             for (Allocation vecX : mMatrix) {
   4181                 if (!validateVecInput(vecX)) {
   4182                     continue;
   4183                 }
   4184                 for (Allocation vecY : mMatrix) {
   4185                     if (!validateVecInput(vecY)) {
   4186                         continue;
   4187                     }
   4188                     Element elemA = matA.getType().getElement();
   4189                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
   4190                         try {
   4191                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   4192                                 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
   4193                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   4194                                 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
   4195                             }
   4196                         } catch (RSRuntimeException e) {
   4197                             fail("should NOT throw RSRuntimeException");
   4198                         }
   4199                     } else {
   4200                         try {
   4201                             mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
   4202                             fail("should throw RSRuntimeException for CHPR2");
   4203                         } catch (RSRuntimeException e) {
   4204                         }
   4205                         try {
   4206                             mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
   4207                             fail("should throw RSRuntimeException for ZHPR2");
   4208                         } catch (RSRuntimeException e) {
   4209                         }
   4210                     }
   4211                 }
   4212             }
   4213         }
   4214     }
   4215 
   4216     public void L2_xHPR2_API(ArrayList<Allocation> mMatrix) {
   4217         for (int Uplo : mUplo) {
   4218             for (int incX : mInc) {
   4219                 xHPR2_API_test(Uplo, incX, incX, mMatrix);
   4220             }
   4221         }
   4222     }
   4223 
   4224     public void test_L2_CHPR2_API() {
   4225         L2_xHPR2_API(mMatrixC);
   4226     }
   4227 
   4228     public void test_L2_ZHPR2_API() {
   4229         L2_xHPR2_API(mMatrixZ);
   4230     }
   4231 
   4232     public void test_L2_CHPR2_Correctness() {
   4233         int uplo = ScriptIntrinsicBLAS.UPPER;
   4234         int incX = 1;
   4235         int incY = 1;
   4236 
   4237         // Populate input allocations
   4238         int N = mBLASData.dN;
   4239         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   4240         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   4241         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
   4242         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu);
   4243         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1);
   4244         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1);
   4245 
   4246         // Test for the default case: NO_TRANS
   4247         mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   4248         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
   4249         matrixARef.copyFrom(mBLASData.L2_cHER2_o_N_pu);
   4250         verifyMatrix(matrixARef, matrixAC, true);
   4251 
   4252         // Test for incX = 2 & incY = 3;
   4253         incX = 2;
   4254         incY = 3;
   4255         int dimX = 1 + (N - 1) * incX;
   4256         int dimY = 1 + (N - 1) * incY;
   4257         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
   4258         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
   4259         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2);
   4260         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2);
   4261         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu);
   4262 
   4263         mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
   4264         verifyMatrix(matrixARef, matrixAC, true);
   4265 
   4266         mRS.finish();
   4267         checkError();
   4268     }
   4269 
   4270     public void test_L2_ZHPR2_Correctness() {
   4271         int uplo = ScriptIntrinsicBLAS.UPPER;
   4272         int incX = 1;
   4273         int incY = 1;
   4274 
   4275         // Populate input allocations
   4276         int N = mBLASData.dN;
   4277         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   4278         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   4279         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
   4280         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu);
   4281         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1);
   4282         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1);
   4283 
   4284         // Test for the default case: NO_TRANS
   4285         mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   4286         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
   4287         matrixARef.copyFrom(mBLASData.L2_zHER2_o_N_pu);
   4288         verifyMatrix(matrixARef, matrixAZ, true);
   4289 
   4290         // Test for incX = 2 & incY = 3;
   4291         incX = 2;
   4292         incY = 3;
   4293         int dimX = 1 + (N - 1) * incX;
   4294         int dimY = 1 + (N - 1) * incY;
   4295         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
   4296         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
   4297         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2);
   4298         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2);
   4299         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu);
   4300 
   4301         mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
   4302         verifyMatrix(matrixARef, matrixAZ, true);
   4303 
   4304         mRS.finish();
   4305         checkError();
   4306     }
   4307 
   4308 
   4309 
   4310     private boolean validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A) {
   4311         if (!validateUplo(Uplo)) {
   4312             return false;
   4313         }
   4314         if (!A.getType().getElement().isCompatible(e) ||
   4315             !X.getType().getElement().isCompatible(e)) {
   4316             return false;
   4317         }
   4318 
   4319         int N = A.getType().getX();
   4320 
   4321         if (X.getType().getY() > 1) {
   4322             return false;
   4323         }
   4324         if (N != A.getType().getY()) {
   4325             return false;
   4326         }
   4327         if (incX <= 0) {
   4328             return false;
   4329         }
   4330         int expectedXDim = 1 + (N - 1) * incX;
   4331         if (X.getType().getX() != expectedXDim) {
   4332             return false;
   4333         }
   4334         return true;
   4335     }
   4336 
   4337     private void xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
   4338         for (Allocation matA : mMatrix) {
   4339             for (Allocation vecX : mMatrix) {
   4340                 if (!validateVecInput(vecX)) {
   4341                     continue;
   4342                 }
   4343                 Element elemA = matA.getType().getElement();
   4344                 if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
   4345                     try {
   4346                         if (elemA.isCompatible(Element.F32(mRS))) {
   4347                             mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
   4348                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   4349                             mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
   4350                         }
   4351                     } catch (RSRuntimeException e) {
   4352                         fail("should NOT throw RSRuntimeException");
   4353                     }
   4354                 } else {
   4355                     try {
   4356                         mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
   4357                         fail("should throw RSRuntimeException for SSYR");
   4358                     } catch (RSRuntimeException e) {
   4359                     }
   4360                     try {
   4361                         mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
   4362                         fail("should throw RSRuntimeException for DSYR");
   4363                     } catch (RSRuntimeException e) {
   4364                     }
   4365                 }
   4366             }
   4367         }
   4368     }
   4369 
   4370     public void L2_xSYR_API(ArrayList<Allocation> mMatrix) {
   4371         for (int Uplo : mUplo) {
   4372             for (int incX : mInc) {
   4373                 xSYR_API_test(Uplo, incX, mMatrix);
   4374             }
   4375         }
   4376     }
   4377 
   4378     public void test_L2_SSYR_API() {
   4379         L2_xSYR_API(mMatrixS);
   4380     }
   4381 
   4382     public void test_L2_DSYR_API() {
   4383         L2_xSYR_API(mMatrixD);
   4384     }
   4385 
   4386     public void test_L2_SSYR_Correctness() {
   4387         int uplo = ScriptIntrinsicBLAS.UPPER;
   4388         int incX = 1;
   4389 
   4390         // Populate input allocations
   4391         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   4392         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   4393         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn);
   4394         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1);
   4395 
   4396         // Test for the default case: NO_TRANS
   4397         mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
   4398         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   4399         matrixARef.copyFrom(mBLASData.L2_sSYR_o_N);
   4400         verifyMatrix(matrixARef, matrixAS, true);
   4401 
   4402         // Test for incX = 2;
   4403         incX = 2;
   4404         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4405         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   4406         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2);
   4407         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn);
   4408 
   4409         mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
   4410         verifyMatrix(matrixARef, matrixAS, true);
   4411 
   4412         mRS.finish();
   4413         checkError();
   4414     }
   4415 
   4416     public void test_L2_DSYR_Correctness() {
   4417         int uplo = ScriptIntrinsicBLAS.UPPER;
   4418         int incX = 1;
   4419 
   4420         // Populate input allocations
   4421         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   4422         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   4423         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn);
   4424         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1);
   4425 
   4426         // Test for the default case: NO_TRANS
   4427         mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
   4428         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   4429         matrixARef.copyFrom(mBLASData.L2_dSYR_o_N);
   4430         verifyMatrix(matrixARef, matrixAD, true);
   4431 
   4432         // Test for incX = 2;
   4433         incX = 2;
   4434         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4435         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   4436         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2);
   4437         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn);
   4438 
   4439         mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
   4440         verifyMatrix(matrixARef, matrixAD, true);
   4441 
   4442         mRS.finish();
   4443         checkError();
   4444     }
   4445 
   4446 
   4447     private boolean validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap) {
   4448         if (!validateUplo(Uplo)) {
   4449             return false;
   4450         }
   4451         if (!Ap.getType().getElement().isCompatible(e) ||
   4452             !X.getType().getElement().isCompatible(e)) {
   4453             return false;
   4454         }
   4455         if (X.getType().getY() > 1) {
   4456             return false;
   4457         }
   4458 
   4459         if (Ap.getType().getY() > 1) {
   4460             return false;
   4461         }
   4462 
   4463         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   4464         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   4465             return false;
   4466         }
   4467         if (incX <= 0) {
   4468             return false;
   4469         }
   4470         int expectedXDim = 1 + (N - 1) * incX;
   4471         if (X.getType().getX() != expectedXDim) {
   4472             return false;
   4473         }
   4474 
   4475         return true;
   4476     }
   4477 
   4478     private void xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
   4479         for (Allocation matA : mMatrix) {
   4480             for (Allocation vecX : mMatrix) {
   4481                 if (!validateVecInput(vecX)) {
   4482                     continue;
   4483                 }
   4484                 Element elemA = matA.getType().getElement();
   4485                 if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
   4486                     try {
   4487                         if (elemA.isCompatible(Element.F32(mRS))) {
   4488                             mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
   4489                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   4490                             mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
   4491                         }
   4492                     } catch (RSRuntimeException e) {
   4493                         fail("should NOT throw RSRuntimeException");
   4494                     }
   4495                 } else {
   4496                     try {
   4497                         mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
   4498                         fail("should throw RSRuntimeException for SSPR");
   4499                     } catch (RSRuntimeException e) {
   4500                     }
   4501                     try {
   4502                         mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
   4503                         fail("should throw RSRuntimeException for DSPR");
   4504                     } catch (RSRuntimeException e) {
   4505                     }
   4506                 }
   4507             }
   4508         }
   4509     }
   4510 
   4511     public void L2_xSPR_API(ArrayList<Allocation> mMatrix) {
   4512         for (int Uplo : mUplo) {
   4513             for (int incX : mInc) {
   4514                 xSPR_API_test(Uplo, incX, mMatrix);
   4515             }
   4516         }
   4517     }
   4518 
   4519     public void test_L2_SSPR_API() {
   4520         L2_xSPR_API(mMatrixS);
   4521     }
   4522 
   4523     public void test_L2_DSPR_API() {
   4524         L2_xSPR_API(mMatrixD);
   4525     }
   4526 
   4527     public void test_L2_SSPR_Correctness() {
   4528         int uplo = ScriptIntrinsicBLAS.UPPER;
   4529         int incX = 1;
   4530 
   4531         // Populate input allocations
   4532         int N = mBLASData.dN;
   4533         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   4534         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   4535         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu);
   4536         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1);
   4537 
   4538         // Test for the default case: NO_TRANS
   4539         mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
   4540         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   4541         matrixARef.copyFrom(mBLASData.L2_sSYR_o_N_pu);
   4542         verifyMatrix(matrixARef, matrixAS, true);
   4543 
   4544         // Test for incX = 2;
   4545         incX = 2;
   4546         int dimX = 1 + (N - 1) * incX;
   4547         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   4548         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2);
   4549         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu);
   4550 
   4551         mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
   4552         verifyMatrix(matrixARef, matrixAS, true);
   4553 
   4554         mRS.finish();
   4555         checkError();
   4556     }
   4557 
   4558     public void test_L2_DSPR_Correctness() {
   4559         int uplo = ScriptIntrinsicBLAS.UPPER;
   4560         int incX = 1;
   4561 
   4562         // Populate input allocations
   4563         int N = mBLASData.dN;
   4564         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   4565         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   4566         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu);
   4567         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1);
   4568 
   4569         // Test for the default case: NO_TRANS
   4570         mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
   4571         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   4572         matrixARef.copyFrom(mBLASData.L2_dSYR_o_N_pu);
   4573         verifyMatrix(matrixARef, matrixAD, true);
   4574 
   4575         // Test for incX = 2;
   4576         incX = 2;
   4577         int dimX = 1 + (N - 1) * incX;
   4578         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   4579         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2);
   4580         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu);
   4581 
   4582         mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
   4583         verifyMatrix(matrixARef, matrixAD, true);
   4584 
   4585         mRS.finish();
   4586         checkError();
   4587     }
   4588 
   4589 
   4590     private boolean validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   4591         if (!validateUplo(Uplo)) {
   4592             return false;
   4593         }
   4594         if (!A.getType().getElement().isCompatible(e) ||
   4595             !X.getType().getElement().isCompatible(e) ||
   4596             !Y.getType().getElement().isCompatible(e)) {
   4597             return false;
   4598         }
   4599 
   4600         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   4601             return false;
   4602         }
   4603 
   4604         int N = A.getType().getX();
   4605 
   4606         if (N != A.getType().getY()) {
   4607             return false;
   4608         }
   4609         if (incX <= 0 || incY <= 0) {
   4610             return false;
   4611         }
   4612         int expectedXDim = 1 + (N - 1) * incX;
   4613         int expectedYDim = 1 + (N - 1) * incY;
   4614         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
   4615             return false;
   4616         }
   4617         return true;
   4618     }
   4619 
   4620     private void xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   4621         for (Allocation matA : mMatrix) {
   4622             for (Allocation vecX : mMatrix) {
   4623                 if (!validateVecInput(vecX)) {
   4624                     continue;
   4625                 }
   4626                 for (Allocation vecY : mMatrix) {
   4627                     if (!validateVecInput(vecY)) {
   4628                         continue;
   4629                     }
   4630                     Element elemA = matA.getType().getElement();
   4631                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
   4632                         try {
   4633                             if (elemA.isCompatible(Element.F32(mRS))) {
   4634                                 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
   4635                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   4636                                 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
   4637                             }
   4638                         } catch (RSRuntimeException e) {
   4639                             fail("should NOT throw RSRuntimeException");
   4640                         }
   4641                     } else {
   4642                         try {
   4643                             mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
   4644                             fail("should throw RSRuntimeException for SSYR2");
   4645                         } catch (RSRuntimeException e) {
   4646                         }
   4647                         try {
   4648                             mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
   4649                             fail("should throw RSRuntimeException for DSYR2");
   4650                         } catch (RSRuntimeException e) {
   4651                         }
   4652                     }
   4653                 }
   4654             }
   4655         }
   4656     }
   4657 
   4658     public void L2_xSYR2_API(ArrayList<Allocation> mMatrix) {
   4659         for (int Uplo : mUplo) {
   4660             for (int incX : mInc) {
   4661                 xSYR2_API_test(Uplo, incX, incX, mMatrix);
   4662             }
   4663         }
   4664     }
   4665 
   4666     public void test_L2_SSYR2_API() {
   4667         L2_xSYR2_API(mMatrixS);
   4668     }
   4669 
   4670     public void test_L2_DSYR2_API() {
   4671         L2_xSYR2_API(mMatrixD);
   4672     }
   4673 
   4674     public void test_L2_SSYR2_Correctness() {
   4675         int uplo = ScriptIntrinsicBLAS.UPPER;
   4676         int incX = 1;
   4677         int incY = 1;
   4678 
   4679         // Populate input allocations
   4680         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   4681         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   4682         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
   4683         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn);
   4684         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1);
   4685         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1);
   4686 
   4687         // Test for the default case: NO_TRANS
   4688         mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   4689         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   4690         matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N);
   4691         verifyMatrix(matrixARef, matrixAS, true);
   4692 
   4693         // Test for incX = 2 & incY = 3;
   4694         incX = 2;
   4695         incY = 3;
   4696         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4697         int dimY = 1 + (mBLASData.dN - 1) * incY;
   4698         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   4699         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   4700         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2);
   4701         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2);
   4702         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn);
   4703 
   4704         mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   4705         verifyMatrix(matrixARef, matrixAS, true);
   4706 
   4707         mRS.finish();
   4708         checkError();
   4709     }
   4710 
   4711     public void test_L2_DSYR2_Correctness() {
   4712         int uplo = ScriptIntrinsicBLAS.UPPER;
   4713         int incX = 1;
   4714         int incY = 1;
   4715 
   4716         // Populate input allocations
   4717         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   4718         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   4719         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
   4720         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn);
   4721         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1);
   4722         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1);
   4723 
   4724         // Test for the default case: NO_TRANS
   4725         mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   4726         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   4727         matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N);
   4728         verifyMatrix(matrixARef, matrixAD, true);
   4729 
   4730         // Test for incX = 2 & incY = 3;
   4731         incX = 2;
   4732         incY = 3;
   4733         int dimX = 1 + (mBLASData.dN - 1) * incX;
   4734         int dimY = 1 + (mBLASData.dN - 1) * incY;
   4735         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   4736         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   4737         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2);
   4738         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2);
   4739         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn);
   4740 
   4741         mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   4742         verifyMatrix(matrixARef, matrixAD, true);
   4743 
   4744         mRS.finish();
   4745         checkError();
   4746     }
   4747 
   4748 
   4749     private boolean validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   4750         if (!validateUplo(Uplo)) {
   4751             return false;
   4752         }
   4753         if (!Ap.getType().getElement().isCompatible(e) ||
   4754             !X.getType().getElement().isCompatible(e) ||
   4755             !Y.getType().getElement().isCompatible(e)) {
   4756             return false;
   4757         }
   4758         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   4759             return false;
   4760         }
   4761 
   4762         if (Ap.getType().getY() > 1) {
   4763             return false;
   4764         }
   4765 
   4766         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   4767         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   4768             return false;
   4769         }
   4770         if (incX <= 0 || incY <= 0) {
   4771             return false;
   4772         }
   4773         int expectedXDim = 1 + (N - 1) * incX;
   4774         int expectedYDim = 1 + (N - 1) * incY;
   4775         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
   4776             return false;
   4777         }
   4778 
   4779         return true;
   4780     }
   4781 
   4782     private void xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
   4783         for (Allocation matA : mMatrix) {
   4784             for (Allocation vecX : mMatrix) {
   4785                 if (!validateVecInput(vecX)) {
   4786                     continue;
   4787                 }
   4788                 for (Allocation vecY : mMatrix) {
   4789                     if (!validateVecInput(vecY)) {
   4790                         continue;
   4791                     }
   4792                     Element elemA = matA.getType().getElement();
   4793                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
   4794                         try {
   4795                             if (elemA.isCompatible(Element.F32(mRS))) {
   4796                                 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
   4797                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   4798                                 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
   4799                             }
   4800                         } catch (RSRuntimeException e) {
   4801                             fail("should NOT throw RSRuntimeException");
   4802                         }
   4803                     } else {
   4804                         try {
   4805                             mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
   4806                             fail("should throw RSRuntimeException for SSPR2");
   4807                         } catch (RSRuntimeException e) {
   4808                         }
   4809                         try {
   4810                             mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
   4811                             fail("should throw RSRuntimeException for DSPR2");
   4812                         } catch (RSRuntimeException e) {
   4813                         }
   4814                     }
   4815                 }
   4816             }
   4817         }
   4818     }
   4819 
   4820     public void L2_xSPR2_API(ArrayList<Allocation> mMatrix) {
   4821         for (int Uplo : mUplo) {
   4822             for (int incX : mInc) {
   4823                 xSPR2_API_test(Uplo, incX, incX, mMatrix);
   4824             }
   4825         }
   4826     }
   4827 
   4828     public void test_L2_SSPR2_API() {
   4829         L2_xSPR2_API(mMatrixS);
   4830     }
   4831 
   4832     public void test_L2_DSPR2_API() {
   4833         L2_xSPR2_API(mMatrixD);
   4834     }
   4835 
   4836     public void test_L2_SSPR2_Correctness() {
   4837         int uplo = ScriptIntrinsicBLAS.UPPER;
   4838         int incX = 1;
   4839         int incY = 1;
   4840 
   4841         // Populate input allocations
   4842         int N = mBLASData.dN;
   4843         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   4844         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   4845         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
   4846         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu);
   4847         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1);
   4848         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1);
   4849 
   4850         // Test for the default case: NO_TRANS
   4851         mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   4852         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
   4853         matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N_pu);
   4854         verifyMatrix(matrixARef, matrixAS, true);
   4855 
   4856         // Test for incX = 2 & incY = 3;
   4857         incX = 2;
   4858         incY = 3;
   4859         int dimX = 1 + (N - 1) * incX;
   4860         int dimY = 1 + (N - 1) * incY;
   4861         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
   4862         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
   4863         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2);
   4864         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2);
   4865         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu);
   4866 
   4867         mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
   4868         verifyMatrix(matrixARef, matrixAS, true);
   4869 
   4870         mRS.finish();
   4871         checkError();
   4872     }
   4873 
   4874     public void test_L2_DSPR2_Correctness() {
   4875         int uplo = ScriptIntrinsicBLAS.UPPER;
   4876         int incX = 1;
   4877         int incY = 1;
   4878 
   4879         // Populate input allocations
   4880         int N = mBLASData.dN;
   4881         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   4882         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   4883         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
   4884         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu);
   4885         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1);
   4886         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1);
   4887 
   4888         // Test for the default case: NO_TRANS
   4889         mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   4890         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
   4891         matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N_pu);
   4892         verifyMatrix(matrixARef, matrixAD, true);
   4893 
   4894         // Test for incX = 2 & incY = 3;
   4895         incX = 2;
   4896         incY = 3;
   4897         int dimX = 1 + (N - 1) * incX;
   4898         int dimY = 1 + (N - 1) * incY;
   4899         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
   4900         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
   4901         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2);
   4902         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2);
   4903         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu);
   4904 
   4905         mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
   4906         verifyMatrix(matrixARef, matrixAD, true);
   4907 
   4908         mRS.finish();
   4909         checkError();
   4910     }
   4911 
   4912 
   4913 
   4914     private boolean validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
   4915         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
   4916         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
   4917             (B != null && !B.getType().getElement().isCompatible(e)) ||
   4918             (C != null && !C.getType().getElement().isCompatible(e))) {
   4919             return false;
   4920         }
   4921         if (C == null) {
   4922             //since matrix C is used to store the result, it cannot be null.
   4923             return false;
   4924         }
   4925         cM = C.getType().getY();
   4926         cN = C.getType().getX();
   4927 
   4928         if (Side == ScriptIntrinsicBLAS.RIGHT) {
   4929             if ((A == null && B != null) || (A != null && B == null)) {
   4930                 return false;
   4931             }
   4932             if (B != null) {
   4933                 bM = A.getType().getY();
   4934                 bN = A.getType().getX();
   4935             }
   4936             if (A != null) {
   4937                 aM = B.getType().getY();
   4938                 aN = B.getType().getX();
   4939             }
   4940         } else {
   4941             if (A != null) {
   4942                 if (TransA == ScriptIntrinsicBLAS.TRANSPOSE ||
   4943                     TransA == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
   4944                     aN = A.getType().getY();
   4945                     aM = A.getType().getX();
   4946                 } else {
   4947                     aM = A.getType().getY();
   4948                     aN = A.getType().getX();
   4949                 }
   4950             }
   4951             if (B != null) {
   4952                 if (TransB == ScriptIntrinsicBLAS.TRANSPOSE ||
   4953                     TransB == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
   4954                     bN = B.getType().getY();
   4955                     bM = B.getType().getX();
   4956                 } else {
   4957                     bM = B.getType().getY();
   4958                     bN = B.getType().getX();
   4959                 }
   4960             }
   4961         }
   4962         if (A != null && B != null && C != null) {
   4963             if (aN != bM || aM != cM || bN != cN) {
   4964                 return false;
   4965             }
   4966         } else if (A != null && C != null) {
   4967             // A and C only, for SYRK
   4968             if (cM != cN) {
   4969                 return false;
   4970             }
   4971             if (aM != cM) {
   4972                 return false;
   4973             }
   4974         } else if (A != null && B != null) {
   4975             // A and B only
   4976             if (aN != bM) {
   4977                 return false;
   4978             }
   4979         }
   4980 
   4981         return true;
   4982     }
   4983 
   4984     private boolean validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C) {
   4985         boolean result = true;
   4986         result &= validateTranspose(TransA);
   4987         result &= validateTranspose(TransB);
   4988         result &= validateL3(e, TransA, TransB, 0, A, B, C);
   4989 
   4990         return result;
   4991     }
   4992 
   4993     private void xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix) {
   4994         for (Allocation matA : mMatrix) {
   4995             for (Allocation matB : mMatrix) {
   4996                 for (Allocation matC : mMatrix) {
   4997                     Element elemA = matA.getType().getElement();
   4998                     if (validateL3_xGEMM(elemA, transA, transB, matA, matB, matC)) {
   4999                         try {
   5000                             if (elemA.isCompatible(Element.F32(mRS))) {
   5001                                 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
   5002                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   5003                                 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
   5004                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   5005                                 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
   5006                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5007                                 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
   5008                             }
   5009                         } catch (RSRuntimeException e) {
   5010                             fail("should NOT throw RSRuntimeException");
   5011                         }
   5012                     } else {
   5013                         try {
   5014                             mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
   5015                             fail("should throw RSRuntimeException for SGEMM");
   5016                         } catch (RSRuntimeException e) {
   5017                         }
   5018                         try {
   5019                             mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
   5020                             fail("should throw RSRuntimeException for DGEMM");
   5021                         } catch (RSRuntimeException e) {
   5022                         }
   5023                         try {
   5024                             mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
   5025                             fail("should throw RSRuntimeException for CGEMM");
   5026                         } catch (RSRuntimeException e) {
   5027                         }
   5028                         try {
   5029                             mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
   5030                             fail("should throw RSRuntimeException for ZGEMM");
   5031                         } catch (RSRuntimeException e) {
   5032                         }
   5033                     }
   5034                 }
   5035             }
   5036         }
   5037     }
   5038 
   5039     private void L3_xGEMM_API(ArrayList<Allocation> mMatrix) {
   5040         for (int transA : mTranspose) {
   5041             for (int transB : mTranspose) {
   5042                 xGEMM_API_test(transA, transB, mMatrix);
   5043             }
   5044         }
   5045     }
   5046 
   5047     public void test_L3_SGEMM_API() {
   5048         L3_xGEMM_API(mMatrixS);
   5049     }
   5050 
   5051     public void test_L3_DGEMM_API() {
   5052         L3_xGEMM_API(mMatrixD);
   5053     }
   5054 
   5055     public void test_L3_CGEMM_API() {
   5056         L3_xGEMM_API(mMatrixC);
   5057     }
   5058 
   5059     public void test_L3_ZGEMM_API() {
   5060         L3_xGEMM_API(mMatrixZ);
   5061     }
   5062 
   5063 
   5064     public void test_L3_SGEMM_Correctness() {
   5065         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5066         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5067 
   5068         // Populate input allocations
   5069         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dM));
   5070         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
   5071         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   5072         matrixAS.copyFrom(mBLASData.L3_sGEMM_A_mk);
   5073         matrixBS.copyFrom(mBLASData.L3_sGEMM_B_kn);
   5074         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
   5075 
   5076         // Test for the default case: NO_TRANS
   5077         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   5078         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   5079         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_NN);
   5080         verifyMatrix(matrixCRef, matrixCS);
   5081 
   5082         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   5083         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dK));
   5084         matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
   5085         matrixAS.copyFrom(mBLASData.L3_sGEMM_A_km);
   5086         matrixBS.copyFrom(mBLASData.L3_sGEMM_B_nk);
   5087 
   5088         transA = ScriptIntrinsicBLAS.TRANSPOSE;
   5089         transB = ScriptIntrinsicBLAS.TRANSPOSE;
   5090         // Reload matrix C, since it was overwritten by BLAS.
   5091         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
   5092         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   5093         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_TT);
   5094         verifyMatrix(matrixCRef, matrixCS);
   5095 
   5096         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5097         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5098         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
   5099         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   5100         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_HH);
   5101         verifyMatrix(matrixCRef, matrixCS);
   5102 
   5103         mRS.finish();
   5104         checkError();
   5105     }
   5106 
   5107     public void test_L3_DGEMM_Correctness() {
   5108         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5109         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5110 
   5111         // Populate input allocations
   5112         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dM));
   5113         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
   5114         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   5115         matrixAD.copyFrom(mBLASData.L3_dGEMM_A_mk);
   5116         matrixBD.copyFrom(mBLASData.L3_dGEMM_B_kn);
   5117         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
   5118         // Test for the default case: NO_TRANS
   5119         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   5120         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   5121         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_NN);
   5122         verifyMatrix(matrixCRef, matrixCD);
   5123 
   5124         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   5125         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dK));
   5126         matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
   5127         matrixAD.copyFrom(mBLASData.L3_dGEMM_A_km);
   5128         matrixBD.copyFrom(mBLASData.L3_dGEMM_B_nk);
   5129 
   5130         transA = ScriptIntrinsicBLAS.TRANSPOSE;
   5131         transB = ScriptIntrinsicBLAS.TRANSPOSE;
   5132         // Reload matrix C, since it was overwritten by BLAS.
   5133         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
   5134         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   5135         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_TT);
   5136         verifyMatrix(matrixCRef, matrixCD);
   5137 
   5138         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5139         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5140         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
   5141         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   5142         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_HH);
   5143         verifyMatrix(matrixCRef, matrixCD);
   5144 
   5145         mRS.finish();
   5146         checkError();
   5147     }
   5148 
   5149     public void test_L3_CGEMM_Correctness() {
   5150         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5151         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5152 
   5153         // Populate input allocations
   5154         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dM));
   5155         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   5156         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5157         matrixAC.copyFrom(mBLASData.L3_cGEMM_A_mk);
   5158         matrixBC.copyFrom(mBLASData.L3_cGEMM_B_kn);
   5159         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
   5160 
   5161         // Test for the default case: NO_TRANS
   5162         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5163         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5164         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_NN);
   5165         verifyMatrix(matrixCRef, matrixCC);
   5166 
   5167         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   5168         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dK));
   5169         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   5170         matrixAC.copyFrom(mBLASData.L3_cGEMM_A_km);
   5171         matrixBC.copyFrom(mBLASData.L3_cGEMM_B_nk);
   5172 
   5173         transA = ScriptIntrinsicBLAS.TRANSPOSE;
   5174         transB = ScriptIntrinsicBLAS.TRANSPOSE;
   5175         // Reload matrix C, since it was overwritten by BLAS.
   5176         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
   5177         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5178         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_TT);
   5179         verifyMatrix(matrixCRef, matrixCC);
   5180 
   5181         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5182         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5183         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
   5184         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5185         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_HH);
   5186         verifyMatrix(matrixCRef, matrixCC);
   5187 
   5188         mRS.finish();
   5189         checkError();
   5190     }
   5191 
   5192     public void test_L3_ZGEMM_Correctness() {
   5193         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5194         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5195 
   5196         // Populate input allocations
   5197         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dM));
   5198         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   5199         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5200         matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_mk);
   5201         matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_kn);
   5202         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
   5203 
   5204         // Test for the default case: NO_TRANS
   5205         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5206         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5207         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_NN);
   5208         verifyMatrix(matrixCRef, matrixCZ);
   5209 
   5210         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
   5211         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dK));
   5212         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   5213         matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_km);
   5214         matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_nk);
   5215 
   5216         transA = ScriptIntrinsicBLAS.TRANSPOSE;
   5217         transB = ScriptIntrinsicBLAS.TRANSPOSE;
   5218         // Reload matrix C, since it was overwritten by BLAS.
   5219         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
   5220         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5221         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_TT);
   5222         verifyMatrix(matrixCRef, matrixCZ);
   5223 
   5224         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5225         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5226         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
   5227         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5228         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_HH);
   5229         verifyMatrix(matrixCRef, matrixCZ);
   5230 
   5231         mRS.finish();
   5232         checkError();
   5233     }
   5234 
   5235 
   5236 
   5237     private boolean validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
   5238         boolean result = true;
   5239         result &= validateSide(Side);
   5240         result &= validateUplo(Uplo);
   5241         result &= validateL3(e, 0, 0, Side, A, B, C);
   5242         result &= (A.getType().getX() == A.getType().getY());
   5243         return result;
   5244     }
   5245 
   5246     private void xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
   5247         for (Allocation matA : mMatrix) {
   5248             for (Allocation matB : mMatrix) {
   5249                 for (Allocation matC : mMatrix) {
   5250                     Element elemA = matA.getType().getElement();
   5251                     if (validateL3_xSYMM(elemA, Side, Uplo, matA, matB, matC)) {
   5252                         try {
   5253                             if (elemA.isCompatible(Element.F32(mRS))) {
   5254                                 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
   5255                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   5256                                 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
   5257                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   5258                                 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
   5259                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5260                                 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
   5261                             }
   5262                         } catch (RSRuntimeException e) {
   5263                             fail("should NOT throw RSRuntimeException");
   5264                         }
   5265                     } else {
   5266                         try {
   5267                             mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
   5268                             fail("should throw RSRuntimeException for SSYMM");
   5269                         } catch (RSRuntimeException e) {
   5270                         }
   5271                         try {
   5272                             mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
   5273                             fail("should throw RSRuntimeException for DSYMM");
   5274                         } catch (RSRuntimeException e) {
   5275                         }
   5276                         try {
   5277                             mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
   5278                             fail("should throw RSRuntimeException for CSYMM");
   5279                         } catch (RSRuntimeException e) {
   5280                         }
   5281                         try {
   5282                             mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
   5283                             fail("should throw RSRuntimeException for ZSYMM");
   5284                         } catch (RSRuntimeException e) {
   5285                         }
   5286                     }
   5287                 }
   5288             }
   5289         }
   5290     }
   5291 
   5292     private void L3_xSYMM_API(ArrayList<Allocation> mMatrix) {
   5293         for (int Side : mSide) {
   5294             for (int Uplo : mUplo) {
   5295                 xSYMM_API_test(Side, Uplo, mMatrix);
   5296             }
   5297         }
   5298     }
   5299 
   5300     public void test_L3_SSYMM_API() {
   5301         L3_xSYMM_API(mMatrixS);
   5302     }
   5303 
   5304     public void test_L3_DSYMM_API() {
   5305         L3_xSYMM_API(mMatrixD);
   5306     }
   5307 
   5308     public void test_L3_CSYMM_API() {
   5309         L3_xSYMM_API(mMatrixC);
   5310     }
   5311 
   5312     public void test_L3_ZSYMM_API() {
   5313         L3_xSYMM_API(mMatrixZ);
   5314     }
   5315 
   5316 
   5317     public void test_L3_SSYMM_Correctness() {
   5318         int side = ScriptIntrinsicBLAS.LEFT;
   5319         int uplo = ScriptIntrinsicBLAS.UPPER;
   5320 
   5321         // Populate input allocations
   5322         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
   5323         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   5324         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   5325         matrixAS.copyFrom(mBLASData.L3_sSYMM_A_mm);
   5326         matrixBS.copyFrom(mBLASData.L3_sSYMM_B_mn);
   5327         matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn);
   5328 
   5329         // Default case: SIDE = LEFT
   5330         mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   5331         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   5332         matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_L);
   5333         verifyMatrix(matrixCRef, matrixCS);
   5334 
   5335         // SIDE = RIGHT
   5336         side = ScriptIntrinsicBLAS.RIGHT;
   5337         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   5338         matrixAS.copyFrom(mBLASData.L3_sSYMM_A_nn);
   5339         // Reload matrix C, since it was overwritten by BLAS.
   5340         matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn);
   5341         mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   5342         matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_R);
   5343         verifyMatrix(matrixCRef, matrixCS);
   5344 
   5345         mRS.finish();
   5346         checkError();
   5347     }
   5348 
   5349     public void test_L3_DSYMM_Correctness() {
   5350         int side = ScriptIntrinsicBLAS.LEFT;
   5351         int uplo = ScriptIntrinsicBLAS.UPPER;
   5352 
   5353         // Populate input allocations
   5354         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
   5355         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   5356         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   5357         matrixAD.copyFrom(mBLASData.L3_dSYMM_A_mm);
   5358         matrixBD.copyFrom(mBLASData.L3_dSYMM_B_mn);
   5359         matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn);
   5360 
   5361         // Default case: SIDE = LEFT
   5362         mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   5363         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   5364         matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_L);
   5365         verifyMatrix(matrixCRef, matrixCD);
   5366 
   5367         // SIDE = RIGHT
   5368         side = ScriptIntrinsicBLAS.RIGHT;
   5369         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   5370         matrixAD.copyFrom(mBLASData.L3_dSYMM_A_nn);
   5371         // Reload matrix C, since it was overwritten by BLAS.
   5372         matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn);
   5373         mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   5374         matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_R);
   5375         verifyMatrix(matrixCRef, matrixCD);
   5376 
   5377         mRS.finish();
   5378         checkError();
   5379     }
   5380 
   5381     public void test_L3_CSYMM_Correctness() {
   5382         int side = ScriptIntrinsicBLAS.LEFT;
   5383         int uplo = ScriptIntrinsicBLAS.UPPER;
   5384 
   5385         // Populate input allocations
   5386         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
   5387         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5388         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5389         matrixAC.copyFrom(mBLASData.L3_cSYMM_A_mm);
   5390         matrixBC.copyFrom(mBLASData.L3_cSYMM_B_mn);
   5391         matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn);
   5392 
   5393         // Default case: SIDE = LEFT
   5394         mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5395         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5396         matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_L);
   5397         verifyMatrix(matrixCRef, matrixCC);
   5398 
   5399         // SIDE = RIGHT
   5400         side = ScriptIntrinsicBLAS.RIGHT;
   5401         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5402         matrixAC.copyFrom(mBLASData.L3_cSYMM_A_nn);
   5403         // Reload matrix C, since it was overwritten by BLAS.
   5404         matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn);
   5405         mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5406         matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_R);
   5407         verifyMatrix(matrixCRef, matrixCC);
   5408 
   5409         mRS.finish();
   5410         checkError();
   5411     }
   5412 
   5413     public void test_L3_ZSYMM_Correctness() {
   5414         int side = ScriptIntrinsicBLAS.LEFT;
   5415         int uplo = ScriptIntrinsicBLAS.UPPER;
   5416 
   5417         // Populate input allocations
   5418         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
   5419         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5420         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5421         matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_mm);
   5422         matrixBZ.copyFrom(mBLASData.L3_zSYMM_B_mn);
   5423         matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn);
   5424 
   5425         // Default case: SIDE = LEFT
   5426         mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5427         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5428         matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_L);
   5429         verifyMatrix(matrixCRef, matrixCZ);
   5430 
   5431         // SIDE = RIGHT
   5432         side = ScriptIntrinsicBLAS.RIGHT;
   5433         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5434         matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_nn);
   5435         // Reload matrix C, since it was overwritten by BLAS.
   5436         matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn);
   5437         mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5438         matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_R);
   5439         verifyMatrix(matrixCRef, matrixCZ);
   5440 
   5441         mRS.finish();
   5442         checkError();
   5443     }
   5444 
   5445 
   5446     private boolean validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
   5447         if (!validateSide(Side)) {
   5448             return false;
   5449         }
   5450 
   5451         if (!validateUplo(Uplo)) {
   5452             return false;
   5453         }
   5454 
   5455         if (!A.getType().getElement().isCompatible(e) ||
   5456             !B.getType().getElement().isCompatible(e) ||
   5457             !C.getType().getElement().isCompatible(e)) {
   5458             return false;
   5459         }
   5460 
   5461         // A must be square; can potentially be relaxed similar to TRSM
   5462         int adim = A.getType().getX();
   5463         if (adim != A.getType().getY()) {
   5464             return false;
   5465         }
   5466         if ((Side == ScriptIntrinsicBLAS.LEFT && adim != B.getType().getY()) ||
   5467             (Side == ScriptIntrinsicBLAS.RIGHT && adim != B.getType().getX())) {
   5468             return false;
   5469         }
   5470         if (B.getType().getX() != C.getType().getX() ||
   5471             B.getType().getY() != C.getType().getY()) {
   5472             return false;
   5473         }
   5474 
   5475         return true;
   5476     }
   5477 
   5478     private void xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
   5479         for (Allocation matA : mMatrix) {
   5480             for (Allocation matB : mMatrix) {
   5481                 for (Allocation matC : mMatrix) {
   5482                     Element elemA = matA.getType().getElement();
   5483                     if (validateHEMM(elemA, Side, Uplo, matA, matB, matC)) {
   5484                         try {
   5485                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   5486                                 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
   5487                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5488                                 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
   5489                             }
   5490                         } catch (RSRuntimeException e) {
   5491                             fail("should NOT throw RSRuntimeException");
   5492                         }
   5493                     } else {
   5494                         try {
   5495                             mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
   5496                             fail("should throw RSRuntimeException for CHEMM");
   5497                         } catch (RSRuntimeException e) {
   5498                         }
   5499                         try {
   5500                             mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
   5501                             fail("should throw RSRuntimeException for ZHEMM");
   5502                         } catch (RSRuntimeException e) {
   5503                         }
   5504                     }
   5505                 }
   5506             }
   5507         }
   5508     }
   5509 
   5510     public void L3_xHEMM_API(ArrayList<Allocation> mMatrix) {
   5511         for (int Side : mSide) {
   5512             for (int Uplo : mUplo) {
   5513                 xHEMM_API_test(Side, Uplo, mMatrix);
   5514             }
   5515         }
   5516     }
   5517 
   5518     public void test_L3_CHEMM_API() {
   5519         L3_xHEMM_API(mMatrixC);
   5520     }
   5521 
   5522     public void test_L3_ZHEMM_API() {
   5523         L3_xHEMM_API(mMatrixZ);
   5524     }
   5525 
   5526     public void test_L3_CHEMM_Correctness() {
   5527         int side = ScriptIntrinsicBLAS.LEFT;
   5528         int uplo = ScriptIntrinsicBLAS.UPPER;
   5529 
   5530         // Populate input allocations
   5531         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
   5532         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5533         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5534         matrixAC.copyFrom(mBLASData.L3_cHEMM_A_mm);
   5535         matrixBC.copyFrom(mBLASData.L3_cHEMM_B_mn);
   5536         matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn);
   5537 
   5538         // Default case: SIDE = LEFT
   5539         mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5540         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   5541         matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_L);
   5542         verifyMatrix(matrixCRef, matrixCC);
   5543 
   5544         // SIDE = RIGHT
   5545         side = ScriptIntrinsicBLAS.RIGHT;
   5546         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5547         matrixAC.copyFrom(mBLASData.L3_cHEMM_A_nn);
   5548         // Reload matrix C, since it was overwritten by BLAS.
   5549         matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn);
   5550         mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   5551         matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_R);
   5552         verifyMatrix(matrixCRef, matrixCC);
   5553 
   5554         mRS.finish();
   5555         checkError();
   5556     }
   5557 
   5558     public void test_L3_ZHEMM_Correctness() {
   5559         int side = ScriptIntrinsicBLAS.LEFT;
   5560         int uplo = ScriptIntrinsicBLAS.UPPER;
   5561 
   5562         // Populate input allocations
   5563         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
   5564         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5565         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5566         matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_mm);
   5567         matrixBZ.copyFrom(mBLASData.L3_zHEMM_B_mn);
   5568         matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn);
   5569 
   5570         // Default case: SIDE = LEFT
   5571         mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5572         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   5573         matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_L);
   5574         verifyMatrix(matrixCRef, matrixCZ);
   5575 
   5576         // SIDE = RIGHT
   5577         side = ScriptIntrinsicBLAS.RIGHT;
   5578         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5579         matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_nn);
   5580         // Reload matrix C, since it was overwritten by BLAS.
   5581         matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn);
   5582         mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   5583         matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_R);
   5584         verifyMatrix(matrixCRef, matrixCZ);
   5585 
   5586         mRS.finish();
   5587         checkError();
   5588     }
   5589 
   5590 
   5591 
   5592     private boolean validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
   5593         boolean result = true;
   5594         result &= validateTranspose(Trans);
   5595         result &= validateUplo(Uplo);
   5596         result &= validateL3(e, Trans, 0, 0, A, null, C);
   5597 
   5598         return result;
   5599     }
   5600 
   5601     private void xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
   5602         for (Allocation matA : mMatrix) {
   5603             for (Allocation matC : mMatrix) {
   5604                 Element elemA = matA.getType().getElement();
   5605                 if (validateL3_xSYRK(elemA, Uplo, Trans, matA, matC)) {
   5606                     try {
   5607                         if (elemA.isCompatible(Element.F32(mRS))) {
   5608                             mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
   5609                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   5610                             mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
   5611                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   5612                             mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
   5613                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5614                             mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
   5615                         }
   5616                     } catch (RSRuntimeException e) {
   5617                         fail("should NOT throw RSRuntimeException");
   5618                     }
   5619                 } else {
   5620                     try {
   5621                         mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
   5622                         fail("should throw RSRuntimeException for SSYRK");
   5623                     } catch (RSRuntimeException e) {
   5624                     }
   5625                     try {
   5626                         mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
   5627                         fail("should throw RSRuntimeException for DSYRK");
   5628                     } catch (RSRuntimeException e) {
   5629                     }
   5630                     try {
   5631                         mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
   5632                         fail("should throw RSRuntimeException for CSYRK");
   5633                     } catch (RSRuntimeException e) {
   5634                     }
   5635                     try {
   5636                         mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
   5637                         fail("should throw RSRuntimeException for ZSYRK");
   5638                     } catch (RSRuntimeException e) {
   5639                     }
   5640                 }
   5641             }
   5642         }
   5643     }
   5644 
   5645     public void L3_xSYRK_API(ArrayList<Allocation> mMatrix) {
   5646         for (int Uplo : mUplo) {
   5647             for (int Trans : mTranspose) {
   5648                 xSYRK_API_test(Uplo, Trans, mMatrix);
   5649             }
   5650         }
   5651     }
   5652 
   5653     public void test_L3_SSYRK_API() {
   5654         L3_xSYRK_API(mMatrixS);
   5655     }
   5656 
   5657     public void test_L3_DSYRK_API() {
   5658         L3_xSYRK_API(mMatrixD);
   5659     }
   5660 
   5661     public void test_L3_CSYRK_API() {
   5662         L3_xSYRK_API(mMatrixC);
   5663     }
   5664 
   5665     public void test_L3_ZSYRK_API() {
   5666         L3_xSYRK_API(mMatrixZ);
   5667     }
   5668 
   5669 
   5670     public void test_L3_SSYRK_Correctness() {
   5671         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5672         int uplo = ScriptIntrinsicBLAS.UPPER;
   5673 
   5674         // Populate input allocations
   5675         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
   5676         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   5677         matrixAS.copyFrom(mBLASData.L3_sSYRK_A_nk);
   5678         matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn);
   5679 
   5680         // Default case: NO_TRANSPOSE
   5681         mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
   5682         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   5683         matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_N);
   5684         verifyMatrix(matrixCRef, matrixCS, true);
   5685 
   5686         // Case: TRANSPOSE
   5687         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
   5688         matrixAS.copyFrom(mBLASData.L3_sSYRK_A_kn);
   5689         // Reload matrix C, since it was overwritten by BLAS.
   5690         matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn);
   5691 
   5692         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   5693         mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
   5694         matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_T);
   5695         verifyMatrix(matrixCRef, matrixCS, true);
   5696 
   5697         mRS.finish();
   5698         checkError();
   5699     }
   5700 
   5701     public void test_L3_DSYRK_Correctness() {
   5702         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5703         int uplo = ScriptIntrinsicBLAS.UPPER;
   5704 
   5705         // Populate input allocations
   5706         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
   5707         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   5708         matrixAD.copyFrom(mBLASData.L3_dSYRK_A_nk);
   5709         matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn);
   5710 
   5711         // Default case: NO_TRANSPOSE
   5712         mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
   5713         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   5714         matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_N);
   5715         verifyMatrix(matrixCRef, matrixCD, true);
   5716 
   5717         // Case: TRANSPOSE
   5718         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
   5719         matrixAD.copyFrom(mBLASData.L3_dSYRK_A_kn);
   5720         // Reload matrix C, since it was overwritten by BLAS.
   5721         matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn);
   5722 
   5723         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   5724         mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
   5725         matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_T);
   5726         verifyMatrix(matrixCRef, matrixCD, true);
   5727 
   5728         mRS.finish();
   5729         checkError();
   5730     }
   5731 
   5732     public void test_L3_CSYRK_Correctness() {
   5733         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5734         int uplo = ScriptIntrinsicBLAS.UPPER;
   5735 
   5736         // Populate input allocations
   5737         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   5738         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5739         matrixAC.copyFrom(mBLASData.L3_cSYRK_A_nk);
   5740         matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn);
   5741 
   5742         // Default case: NO_TRANSPOSE
   5743         mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
   5744         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5745         matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_N);
   5746         verifyMatrix(matrixCRef, matrixCC, true);
   5747 
   5748         // Case: TRANSPOSE
   5749         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   5750         matrixAC.copyFrom(mBLASData.L3_cSYRK_A_kn);
   5751         // Reload matrix C, since it was overwritten by BLAS.
   5752         matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn);
   5753 
   5754         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   5755         mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
   5756         matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_T);
   5757         verifyMatrix(matrixCRef, matrixCC, true);
   5758 
   5759         mRS.finish();
   5760         checkError();
   5761     }
   5762 
   5763     public void test_L3_ZSYRK_Correctness() {
   5764         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5765         int uplo = ScriptIntrinsicBLAS.UPPER;
   5766 
   5767         // Populate input allocations
   5768         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   5769         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5770         matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_nk);
   5771         matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn);
   5772 
   5773         // Default case: NO_TRANSPOSE
   5774         mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
   5775         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5776         matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_N);
   5777         verifyMatrix(matrixCRef, matrixCZ, true);
   5778 
   5779         // Case: TRANSPOSE
   5780         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   5781         matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_kn);
   5782         // Reload matrix C, since it was overwritten by BLAS.
   5783         matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn);
   5784 
   5785         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   5786         mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
   5787         matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_T);
   5788         verifyMatrix(matrixCRef, matrixCZ, true);
   5789 
   5790         mRS.finish();
   5791         checkError();
   5792     }
   5793 
   5794 
   5795     private boolean validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
   5796         if (!validateUplo(Uplo)) {
   5797             return false;
   5798         }
   5799         if (!A.getType().getElement().isCompatible(e) ||
   5800             !C.getType().getElement().isCompatible(e)) {
   5801             return false;
   5802         }
   5803         if (!validateConjTranspose(Trans)) {
   5804             return false;
   5805         }
   5806         int cdim = C.getType().getX();
   5807         if (cdim != C.getType().getY()) {
   5808             return false;
   5809         }
   5810         if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
   5811             if (cdim != A.getType().getY()) {
   5812                 return false;
   5813             }
   5814         } else {
   5815             if (cdim != A.getType().getX()) {
   5816                 return false;
   5817             }
   5818         }
   5819         return true;
   5820     }
   5821 
   5822     private void xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
   5823         for (Allocation matA : mMatrix) {
   5824             for (Allocation matC : mMatrix) {
   5825                 Element elemA = matA.getType().getElement();
   5826                 if (validateHERK(elemA, Uplo, Trans, matA, matC)) {
   5827                     try {
   5828                         if (elemA.isCompatible(Element.F32_2(mRS))) {
   5829                             mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
   5830                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5831                             mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
   5832                         }
   5833                     } catch (RSRuntimeException e) {
   5834                         fail("should NOT throw RSRuntimeException");
   5835                     }
   5836                 } else {
   5837                     try {
   5838                         mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
   5839                         fail("should throw RSRuntimeException for CHERK");
   5840                     } catch (RSRuntimeException e) {
   5841                     }
   5842                     try {
   5843                         mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
   5844                         fail("should throw RSRuntimeException for ZHERK");
   5845                     } catch (RSRuntimeException e) {
   5846                     }
   5847                 }
   5848             }
   5849         }
   5850     }
   5851 
   5852     public void L3_xHERK_API(ArrayList<Allocation> mMatrix) {
   5853         for (int Uplo : mUplo) {
   5854             for (int Trans : mTranspose) {
   5855                 xHERK_API_test(Uplo, Trans, mMatrix);
   5856             }
   5857         }
   5858     }
   5859 
   5860     public void test_L3_CHERK_API() {
   5861         L3_xHERK_API(mMatrixC);
   5862     }
   5863 
   5864     public void test_L3_ZHERK_API() {
   5865         L3_xHERK_API(mMatrixZ);
   5866     }
   5867 
   5868     public void test_L3_CHERK_Correctness() {
   5869         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5870         int uplo = ScriptIntrinsicBLAS.UPPER;
   5871 
   5872         // Populate input allocations
   5873         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   5874         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5875         matrixAC.copyFrom(mBLASData.L3_cHERK_A_nk);
   5876         matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn);
   5877 
   5878         // Default case: NO_TRANSPOSE
   5879         mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
   5880         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   5881         matrixCRef.copyFrom(mBLASData.L3_cHERK_o_N);
   5882         verifyMatrix(matrixCRef, matrixCC, true);
   5883 
   5884         // Case: TRANSPOSE
   5885         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   5886         matrixAC.copyFrom(mBLASData.L3_cHERK_A_kn);
   5887         // Reload matrix C, since it was overwritten by BLAS.
   5888         matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn);
   5889 
   5890         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5891         mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
   5892         matrixCRef.copyFrom(mBLASData.L3_cHERK_o_H);
   5893         verifyMatrix(matrixCRef, matrixCC, true);
   5894 
   5895         mRS.finish();
   5896         checkError();
   5897     }
   5898 
   5899     public void test_L3_ZHERK_Correctness() {
   5900         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   5901         int uplo = ScriptIntrinsicBLAS.UPPER;
   5902 
   5903         // Populate input allocations
   5904         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   5905         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5906         matrixAZ.copyFrom(mBLASData.L3_zHERK_A_nk);
   5907         matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn);
   5908 
   5909         // Default case: NO_TRANSPOSE
   5910         mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
   5911         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   5912         matrixCRef.copyFrom(mBLASData.L3_zHERK_o_N);
   5913         verifyMatrix(matrixCRef, matrixCZ, true);
   5914 
   5915         // Case: TRANSPOSE
   5916         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   5917         matrixAZ.copyFrom(mBLASData.L3_zHERK_A_kn);
   5918         // Reload matrix C, since it was overwritten by BLAS.
   5919         matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn);
   5920 
   5921         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   5922         mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
   5923         matrixCRef.copyFrom(mBLASData.L3_zHERK_o_H);
   5924         verifyMatrix(matrixCRef, matrixCZ, true);
   5925 
   5926         mRS.finish();
   5927         checkError();
   5928     }
   5929 
   5930 
   5931     private boolean validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
   5932         if (!validateTranspose(Trans)) {
   5933             return false;
   5934         }
   5935         if (!validateUplo(Uplo)) {
   5936             return false;
   5937         }
   5938 
   5939         if (!A.getType().getElement().isCompatible(e) ||
   5940             !B.getType().getElement().isCompatible(e) ||
   5941             !C.getType().getElement().isCompatible(e)) {
   5942             return false;
   5943         }
   5944         int Cdim = -1;
   5945         // A is n x k if no transpose, k x n if transpose
   5946         // C is n x n
   5947         if (Trans == ScriptIntrinsicBLAS.TRANSPOSE) {
   5948             // check columns versus C
   5949             Cdim = A.getType().getX();
   5950         } else {
   5951             // check rows versus C
   5952             Cdim = A.getType().getY();
   5953         }
   5954         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
   5955             return false;
   5956         }
   5957         // A dims == B dims
   5958         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
   5959             return false;
   5960         }
   5961         return true;
   5962     }
   5963 
   5964     private void xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
   5965         for (Allocation matA : mMatrix) {
   5966             for (Allocation matB : mMatrix) {
   5967                 for (Allocation matC : mMatrix) {
   5968                     Element elemA = matA.getType().getElement();
   5969                     if (validateSYR2K(elemA, Uplo, Trans, matA, matB, matC)) {
   5970                         try {
   5971                             if (elemA.isCompatible(Element.F32(mRS))) {
   5972                                 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
   5973                             } else if (elemA.isCompatible(Element.F64(mRS))) {
   5974                                 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
   5975                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   5976                                 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
   5977                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   5978                                 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
   5979                             }
   5980                         } catch (RSRuntimeException e) {
   5981                             fail("should NOT throw RSRuntimeException");
   5982                         }
   5983                     } else {
   5984                         try {
   5985                             mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
   5986                             fail("should throw RSRuntimeException for SSYR2K");
   5987                         } catch (RSRuntimeException e) {
   5988                         }
   5989                         try {
   5990                             mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
   5991                             fail("should throw RSRuntimeException for DSYR2K");
   5992                         } catch (RSRuntimeException e) {
   5993                         }
   5994                         try {
   5995                             mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
   5996                             fail("should throw RSRuntimeException for CSYR2K");
   5997                         } catch (RSRuntimeException e) {
   5998                         }
   5999                         try {
   6000                             mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
   6001                             fail("should throw RSRuntimeException for ZSYR2K");
   6002                         } catch (RSRuntimeException e) {
   6003                         }
   6004                     }
   6005                 }
   6006             }
   6007         }
   6008     }
   6009 
   6010     public void L3_xSYR2K_API(ArrayList<Allocation> mMatrix) {
   6011         for (int Uplo : mUplo) {
   6012             for (int Trans : mTranspose) {
   6013                 xSYR2K_API_test(Uplo, Trans, mMatrix);
   6014             }
   6015         }
   6016     }
   6017 
   6018     public void test_L3_SSYR2K_API() {
   6019         L3_xSYR2K_API(mMatrixS);
   6020     }
   6021 
   6022     public void test_L3_DSYR2K_API() {
   6023         L3_xSYR2K_API(mMatrixD);
   6024     }
   6025 
   6026     public void test_L3_CSYR2K_API() {
   6027         L3_xSYR2K_API(mMatrixC);
   6028     }
   6029 
   6030     public void test_L3_ZSYR2K_API() {
   6031         L3_xSYR2K_API(mMatrixZ);
   6032     }
   6033 
   6034 
   6035     public void test_L3_SSYR2K_Correctness() {
   6036         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6037         int uplo = ScriptIntrinsicBLAS.UPPER;
   6038 
   6039         // Populate input allocations
   6040         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
   6041         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
   6042         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   6043         matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_nk);
   6044         matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_nk);
   6045         matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn);
   6046 
   6047         // Default case: NO_TRANSPOSE
   6048         mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   6049         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   6050         matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_N);
   6051         verifyMatrix(matrixCRef, matrixCS, true);
   6052 
   6053         // Case: TRANSPOSE
   6054         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
   6055         matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
   6056         matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_kn);
   6057         matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_kn);
   6058         // Reload matrix C, since it was overwritten by BLAS.
   6059         matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn);
   6060 
   6061         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6062         mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
   6063         matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_T);
   6064         verifyMatrix(matrixCRef, matrixCS, true);
   6065 
   6066         mRS.finish();
   6067         checkError();
   6068     }
   6069 
   6070     public void test_L3_DSYR2K_Correctness() {
   6071         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6072         int uplo = ScriptIntrinsicBLAS.UPPER;
   6073 
   6074         // Populate input allocations
   6075         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
   6076         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
   6077         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   6078         matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_nk);
   6079         matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_nk);
   6080         matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn);
   6081 
   6082         // Default case: NO_TRANSPOSE
   6083         mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   6084         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   6085         matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_N);
   6086         verifyMatrix(matrixCRef, matrixCD, true);
   6087 
   6088         // Case: TRANSPOSE
   6089         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
   6090         matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
   6091         matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_kn);
   6092         matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_kn);
   6093         // Reload matrix C, since it was overwritten by BLAS.
   6094         matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn);
   6095 
   6096         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6097         mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
   6098         matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_T);
   6099         verifyMatrix(matrixCRef, matrixCD, true);
   6100 
   6101         mRS.finish();
   6102         checkError();
   6103     }
   6104 
   6105     public void test_L3_CSYR2K_Correctness() {
   6106         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6107         int uplo = ScriptIntrinsicBLAS.UPPER;
   6108 
   6109         // Populate input allocations
   6110         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   6111         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   6112         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6113         matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_nk);
   6114         matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_nk);
   6115         matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn);
   6116 
   6117         // Default case: NO_TRANSPOSE
   6118         mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   6119         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6120         matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_N);
   6121         verifyMatrix(matrixCRef, matrixCC, true);
   6122 
   6123         // Case: TRANSPOSE
   6124         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   6125         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   6126         matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_kn);
   6127         matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_kn);
   6128         // Reload matrix C, since it was overwritten by BLAS.
   6129         matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn);
   6130 
   6131         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6132         mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
   6133         matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_T);
   6134         verifyMatrix(matrixCRef, matrixCC, true);
   6135 
   6136         mRS.finish();
   6137         checkError();
   6138     }
   6139 
   6140     public void test_L3_ZSYR2K_Correctness() {
   6141         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6142         int uplo = ScriptIntrinsicBLAS.UPPER;
   6143 
   6144         // Populate input allocations
   6145         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   6146         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   6147         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6148         matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_nk);
   6149         matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_nk);
   6150         matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn);
   6151 
   6152         // Default case: NO_TRANSPOSE
   6153         mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   6154         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6155         matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_N);
   6156         verifyMatrix(matrixCRef, matrixCZ, true);
   6157 
   6158         // Case: TRANSPOSE
   6159         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   6160         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   6161         matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_kn);
   6162         matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_kn);
   6163         // Reload matrix C, since it was overwritten by BLAS.
   6164         matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn);
   6165 
   6166         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6167         mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
   6168         matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_T);
   6169         verifyMatrix(matrixCRef, matrixCZ, true);
   6170 
   6171         mRS.finish();
   6172         checkError();
   6173     }
   6174 
   6175 
   6176     private boolean validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
   6177         if (!validateUplo(Uplo)) {
   6178             return false;
   6179         }
   6180         if (!A.getType().getElement().isCompatible(e) ||
   6181             !B.getType().getElement().isCompatible(e) ||
   6182             !C.getType().getElement().isCompatible(e)) {
   6183             return false;
   6184         }
   6185         if (!validateConjTranspose(Trans)) {
   6186             return false;
   6187         }
   6188         int cdim = C.getType().getX();
   6189         if (cdim != C.getType().getY()) {
   6190             return false;
   6191         }
   6192         if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
   6193             if (A.getType().getY() != cdim) {
   6194                 return false;
   6195             }
   6196         } else {
   6197             if (A.getType().getX() != cdim) {
   6198                 return false;
   6199             }
   6200         }
   6201         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
   6202             return false;
   6203         }
   6204         return true;
   6205     }
   6206 
   6207     private void xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
   6208         for (Allocation matA : mMatrix) {
   6209             for (Allocation matB : mMatrix) {
   6210                 for (Allocation matC : mMatrix) {
   6211                     Element elemA = matA.getType().getElement();
   6212                     if (validateHER2K(elemA, Uplo, Trans, matA, matB, matC)) {
   6213                         try {
   6214                             if (elemA.isCompatible(Element.F32_2(mRS))) {
   6215                                 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
   6216                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   6217                                 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
   6218                             }
   6219                         } catch (RSRuntimeException e) {
   6220                             fail("should NOT throw RSRuntimeException");
   6221                         }
   6222                     } else {
   6223                         try {
   6224                             mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
   6225                             fail("should throw RSRuntimeException for CHER2K");
   6226                         } catch (RSRuntimeException e) {
   6227                         }
   6228                         try {
   6229                             mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
   6230                             fail("should throw RSRuntimeException for ZHER2K");
   6231                         } catch (RSRuntimeException e) {
   6232                         }
   6233                     }
   6234                 }
   6235             }
   6236         }
   6237     }
   6238 
   6239     public void L3_xHER2K_API(ArrayList<Allocation> mMatrix) {
   6240         for (int Uplo : mUplo) {
   6241             for (int Trans : mTranspose) {
   6242                 xHER2K_API_test(Uplo, Trans, mMatrix);
   6243             }
   6244         }
   6245     }
   6246 
   6247     public void test_L3_CHER2K_API() {
   6248         L3_xHER2K_API(mMatrixC);
   6249     }
   6250 
   6251     public void test_L3_ZHER2K_API() {
   6252         L3_xHER2K_API(mMatrixZ);
   6253     }
   6254 
   6255     public void test_L3_CHER2K_Correctness() {
   6256         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6257         int uplo = ScriptIntrinsicBLAS.UPPER;
   6258 
   6259         // Populate input allocations
   6260         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   6261         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
   6262         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6263         matrixAC.copyFrom(mBLASData.L3_cHER2K_A_nk);
   6264         matrixBC.copyFrom(mBLASData.L3_cHER2K_B_nk);
   6265         matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn);
   6266 
   6267         // Default case: NO_TRANSPOSE
   6268         mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
   6269         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6270         matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_N);
   6271         verifyMatrix(matrixCRef, matrixCC, true);
   6272 
   6273         // Case: TRANSPOSE
   6274         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   6275         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
   6276         matrixAC.copyFrom(mBLASData.L3_cHER2K_A_kn);
   6277         matrixBC.copyFrom(mBLASData.L3_cHER2K_B_kn);
   6278         // Reload matrix C, since it was overwritten by BLAS.
   6279         matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn);
   6280 
   6281         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   6282         mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
   6283         matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_H);
   6284         verifyMatrix(matrixCRef, matrixCC, true);
   6285 
   6286         mRS.finish();
   6287         checkError();
   6288     }
   6289 
   6290     public void test_L3_ZHER2K_Correctness() {
   6291         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6292         int uplo = ScriptIntrinsicBLAS.UPPER;
   6293 
   6294         // Populate input allocations
   6295         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   6296         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
   6297         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6298         matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_nk);
   6299         matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_nk);
   6300         matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn);
   6301 
   6302         // Default case: NO_TRANSPOSE
   6303         mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
   6304         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6305         matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_N);
   6306         verifyMatrix(matrixCRef, matrixCZ, true);
   6307 
   6308         // Case: TRANSPOSE
   6309         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   6310         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
   6311         matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_kn);
   6312         matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_kn);
   6313         // Reload matrix C, since it was overwritten by BLAS.
   6314         matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn);
   6315 
   6316         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
   6317         mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
   6318         matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_H);
   6319         verifyMatrix(matrixCRef, matrixCZ, true);
   6320 
   6321         mRS.finish();
   6322         checkError();
   6323     }
   6324 
   6325 
   6326     private boolean validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
   6327         if (!validateSide(Side)) {
   6328             return false;
   6329         }
   6330         if (!validateUplo(Uplo)) {
   6331             return false;
   6332         }
   6333         if (!validateTranspose(TransA)) {
   6334             return false;
   6335         }
   6336         if (!validateDiag(Diag)) {
   6337             return false;
   6338         }
   6339         int aM = -1, aN = -1, bM = -1, bN = -1;
   6340         if (!A.getType().getElement().isCompatible(e) ||
   6341             !B.getType().getElement().isCompatible(e)) {
   6342             return false;
   6343         }
   6344 
   6345         aM = A.getType().getY();
   6346         aN = A.getType().getX();
   6347         if (aM != aN) {
   6348             return false;
   6349         }
   6350 
   6351         bM = B.getType().getY();
   6352         bN = B.getType().getX();
   6353         if (Side == ScriptIntrinsicBLAS.LEFT) {
   6354             if (aN != bM) {
   6355                 return false;
   6356             }
   6357         } else {
   6358             if (bN != aM) {
   6359                 return false;
   6360             }
   6361         }
   6362         return true;
   6363     }
   6364 
   6365     private void xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
   6366         for (Allocation matA : mMatrix) {
   6367             for (Allocation matB : mMatrix) {
   6368                 Element elemA = matA.getType().getElement();
   6369                 if (validateTRMM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
   6370                     try {
   6371                         if (elemA.isCompatible(Element.F32(mRS))) {
   6372                             mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
   6373                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   6374                             mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
   6375                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   6376                             mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
   6377                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   6378                             mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
   6379                         }
   6380                     } catch (RSRuntimeException e) {
   6381                         fail("should NOT throw RSRuntimeException");
   6382                     }
   6383                 } else {
   6384                     try {
   6385                         mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
   6386                         fail("should throw RSRuntimeException for STRMM");
   6387                     } catch (RSRuntimeException e) {
   6388                     }
   6389                     try {
   6390                         mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
   6391                         fail("should throw RSRuntimeException for DTRMM");
   6392                     } catch (RSRuntimeException e) {
   6393                     }
   6394                     try {
   6395                         mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
   6396                         fail("should throw RSRuntimeException for CTRMM");
   6397                     } catch (RSRuntimeException e) {
   6398                     }
   6399                     try {
   6400                         mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
   6401                         fail("should throw RSRuntimeException for ZTRMM");
   6402                     } catch (RSRuntimeException e) {
   6403                     }
   6404                 }
   6405             }
   6406         }
   6407     }
   6408 
   6409     public void L3_xTRMM_API(ArrayList<Allocation> mMatrix) {
   6410         for (int Side : mSide) {
   6411             for (int Uplo : mUplo) {
   6412                 for (int TransA : mTranspose) {
   6413                     for (int Diag : mDiag) {
   6414                         xTRMM_API_test(Side, Uplo, TransA, Diag, mMatrix);
   6415                     }
   6416                 }
   6417             }
   6418         }
   6419     }
   6420 
   6421     public void test_L3_STRMM_API() {
   6422         L3_xTRMM_API(mMatrixS);
   6423     }
   6424 
   6425     public void test_L3_DTRMM_API() {
   6426         L3_xTRMM_API(mMatrixD);
   6427     }
   6428 
   6429     public void test_L3_CTRMM_API() {
   6430         L3_xTRMM_API(mMatrixC);
   6431     }
   6432 
   6433     public void test_L3_ZTRMM_API() {
   6434         L3_xTRMM_API(mMatrixZ);
   6435     }
   6436 
   6437 
   6438     public void test_L3_STRMM_Correctness() {
   6439         int side = ScriptIntrinsicBLAS.LEFT;
   6440         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6441         int uplo = ScriptIntrinsicBLAS.UPPER;
   6442         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6443 
   6444         // Populate input allocations
   6445         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
   6446         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   6447         matrixAS.copyFrom(mBLASData.L3_sTRMM_A_mm);
   6448         matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn);
   6449 
   6450         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6451         mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
   6452         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   6453         matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_LUN);
   6454         verifyMatrix(matrixBRef, matrixBS);
   6455 
   6456         // Case: RIGHT, LOWER, TRANSPOSE
   6457         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   6458         matrixAS.copyFrom(mBLASData.L3_sTRMM_A_nn);
   6459         // Reload matrix B, since it was overwritten by BLAS.
   6460         matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn);
   6461 
   6462         side = ScriptIntrinsicBLAS.RIGHT;
   6463         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6464         uplo = ScriptIntrinsicBLAS.LOWER;
   6465         mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
   6466         matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_RLT);
   6467         verifyMatrix(matrixBRef, matrixBS);
   6468 
   6469         mRS.finish();
   6470         checkError();
   6471     }
   6472 
   6473     public void test_L3_DTRMM_Correctness() {
   6474         int side = ScriptIntrinsicBLAS.LEFT;
   6475         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6476         int uplo = ScriptIntrinsicBLAS.UPPER;
   6477         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6478 
   6479         // Populate input allocations
   6480         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
   6481         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   6482         matrixAD.copyFrom(mBLASData.L3_dTRMM_A_mm);
   6483         matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn);
   6484 
   6485         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6486         mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
   6487         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   6488         matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_LUN);
   6489         verifyMatrix(matrixBRef, matrixBD);
   6490 
   6491         // Case: RIGHT, LOWER, TRANSPOSE
   6492         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   6493         matrixAD.copyFrom(mBLASData.L3_dTRMM_A_nn);
   6494         // Reload matrix B, since it was overwritten by BLAS.
   6495         matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn);
   6496 
   6497         side = ScriptIntrinsicBLAS.RIGHT;
   6498         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6499         uplo = ScriptIntrinsicBLAS.LOWER;
   6500         mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
   6501         matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_RLT);
   6502         verifyMatrix(matrixBRef, matrixBD);
   6503 
   6504         mRS.finish();
   6505         checkError();
   6506     }
   6507 
   6508     public void test_L3_CTRMM_Correctness() {
   6509         int side = ScriptIntrinsicBLAS.LEFT;
   6510         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6511         int uplo = ScriptIntrinsicBLAS.UPPER;
   6512         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6513 
   6514         // Populate input allocations
   6515         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
   6516         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   6517         matrixAC.copyFrom(mBLASData.L3_cTRMM_A_mm);
   6518         matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn);
   6519 
   6520         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6521         mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
   6522         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   6523         matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_LUN);
   6524         verifyMatrix(matrixBRef, matrixBC);
   6525 
   6526         // Case: RIGHT, LOWER, TRANSPOSE
   6527         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6528         matrixAC.copyFrom(mBLASData.L3_cTRMM_A_nn);
   6529         // Reload matrix B, since it was overwritten by BLAS.
   6530         matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn);
   6531 
   6532         side = ScriptIntrinsicBLAS.RIGHT;
   6533         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6534         uplo = ScriptIntrinsicBLAS.LOWER;
   6535         mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
   6536         matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_RLT);
   6537         verifyMatrix(matrixBRef, matrixBC);
   6538 
   6539         mRS.finish();
   6540         checkError();
   6541     }
   6542 
   6543     public void test_L3_ZTRMM_Correctness() {
   6544         int side = ScriptIntrinsicBLAS.LEFT;
   6545         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6546         int uplo = ScriptIntrinsicBLAS.UPPER;
   6547         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6548 
   6549         // Populate input allocations
   6550         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
   6551         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   6552         matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_mm);
   6553         matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn);
   6554 
   6555         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6556         mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
   6557         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   6558         matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_LUN);
   6559         verifyMatrix(matrixBRef, matrixBZ);
   6560 
   6561         // Case: RIGHT, LOWER, TRANSPOSE
   6562         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6563         matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_nn);
   6564         // Reload matrix B, since it was overwritten by BLAS.
   6565         matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn);
   6566 
   6567         side = ScriptIntrinsicBLAS.RIGHT;
   6568         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6569         uplo = ScriptIntrinsicBLAS.LOWER;
   6570         mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
   6571         matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_RLT);
   6572         verifyMatrix(matrixBRef, matrixBZ);
   6573 
   6574         mRS.finish();
   6575         checkError();
   6576     }
   6577 
   6578 
   6579     private boolean validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
   6580         int adim = -1, bM = -1, bN = -1;
   6581         if (!validateSide(Side)) {
   6582             return false;
   6583         }
   6584         if (!validateTranspose(TransA)) {
   6585             return false;
   6586         }
   6587         if (!validateUplo(Uplo)) {
   6588             return false;
   6589         }
   6590         if (!validateDiag(Diag)) {
   6591             return false;
   6592         }
   6593         if (!A.getType().getElement().isCompatible(e) ||
   6594             !B.getType().getElement().isCompatible(e)) {
   6595             return false;
   6596         }
   6597         adim = A.getType().getX();
   6598         if (adim != A.getType().getY()) {
   6599             // this may be unnecessary, the restriction could potentially be relaxed
   6600             // A needs to contain at least that symmetric matrix but could theoretically be larger
   6601             // for now we assume adapters are sufficient, will reevaluate in the future
   6602             return false;
   6603         }
   6604         bM = B.getType().getY();
   6605         bN = B.getType().getX();
   6606         if (Side == ScriptIntrinsicBLAS.LEFT) {
   6607             // A is M*M
   6608             if (adim != bM) {
   6609                 return false;
   6610             }
   6611         } else {
   6612             // A is N*N
   6613             if (adim != bN) {
   6614                 return false;
   6615             }
   6616         }
   6617         return true;
   6618     }
   6619 
   6620     private void xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
   6621         for (Allocation matA : mMatrix) {
   6622             for (Allocation matB : mMatrix) {
   6623                 Element elemA = matA.getType().getElement();
   6624                 if (validateTRSM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
   6625                     try {
   6626                         if (elemA.isCompatible(Element.F32(mRS))) {
   6627                             mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
   6628                         } else if (elemA.isCompatible(Element.F64(mRS))) {
   6629                             mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
   6630                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
   6631                             mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
   6632                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
   6633                             mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
   6634                         }
   6635                     } catch (RSRuntimeException e) {
   6636                         fail("should NOT throw RSRuntimeException");
   6637                     }
   6638                 } else {
   6639                     try {
   6640                         mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
   6641                         fail("should throw RSRuntimeException for STRSM");
   6642                     } catch (RSRuntimeException e) {
   6643                     }
   6644                     try {
   6645                         mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
   6646                         fail("should throw RSRuntimeException for DTRSM");
   6647                     } catch (RSRuntimeException e) {
   6648                     }
   6649                     try {
   6650                         mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
   6651                         fail("should throw RSRuntimeException for CTRSM");
   6652                     } catch (RSRuntimeException e) {
   6653                     }
   6654                     try {
   6655                         mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
   6656                         fail("should throw RSRuntimeException for ZTRSM");
   6657                     } catch (RSRuntimeException e) {
   6658                     }
   6659                 }
   6660             }
   6661         }
   6662     }
   6663 
   6664     public void L3_xTRSM_API(ArrayList<Allocation> mMatrix) {
   6665         for (int Side : mSide) {
   6666             for (int Uplo : mUplo) {
   6667                 for (int TransA : mTranspose) {
   6668                     for (int Diag : mDiag) {
   6669                         xTRSM_API_test(Side, Uplo, TransA, Diag, mMatrix);
   6670                     }
   6671                 }
   6672             }
   6673         }
   6674     }
   6675 
   6676     public void test_L3_STRSM_API() {
   6677         L3_xTRSM_API(mMatrixS);
   6678     }
   6679 
   6680     public void test_L3_DTRSM_API() {
   6681         L3_xTRSM_API(mMatrixD);
   6682     }
   6683 
   6684     public void test_L3_CTRSM_API() {
   6685         L3_xTRSM_API(mMatrixC);
   6686     }
   6687 
   6688     public void test_L3_ZTRSM_API() {
   6689         L3_xTRSM_API(mMatrixZ);
   6690     }
   6691 
   6692     public void test_L3_STRSM_Correctness() {
   6693         int side = ScriptIntrinsicBLAS.LEFT;
   6694         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6695         int uplo = ScriptIntrinsicBLAS.UPPER;
   6696         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6697 
   6698         // Populate input allocations
   6699         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
   6700         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   6701         matrixAS.copyFrom(mBLASData.L3_sTRSM_A_mm);
   6702         matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn);
   6703 
   6704         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6705         mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
   6706         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
   6707         matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_LUN);
   6708         verifyMatrix(matrixBRef, matrixBS);
   6709 
   6710         // Case: RIGHT, LOWER, TRANSPOSE
   6711         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
   6712         matrixAS.copyFrom(mBLASData.L3_sTRSM_A_nn);
   6713         // Reload matrix B, since it was overwritten by BLAS.
   6714         matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn);
   6715 
   6716         side = ScriptIntrinsicBLAS.RIGHT;
   6717         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6718         uplo = ScriptIntrinsicBLAS.LOWER;
   6719         mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
   6720         matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_RLT);
   6721         verifyMatrix(matrixBRef, matrixBS);
   6722 
   6723         mRS.finish();
   6724         checkError();
   6725     }
   6726 
   6727     public void test_L3_DTRSM_Correctness() {
   6728         int side = ScriptIntrinsicBLAS.LEFT;
   6729         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6730         int uplo = ScriptIntrinsicBLAS.UPPER;
   6731         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6732 
   6733         // Populate input allocations
   6734         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
   6735         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   6736         matrixAD.copyFrom(mBLASData.L3_dTRSM_A_mm);
   6737         matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn);
   6738 
   6739         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6740         mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
   6741         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
   6742         matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_LUN);
   6743         verifyMatrix(matrixBRef, matrixBD);
   6744 
   6745         // Case: RIGHT, LOWER, TRANSPOSE
   6746         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
   6747         matrixAD.copyFrom(mBLASData.L3_dTRSM_A_nn);
   6748         // Reload matrix B, since it was overwritten by BLAS.
   6749         matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn);
   6750 
   6751         side = ScriptIntrinsicBLAS.RIGHT;
   6752         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6753         uplo = ScriptIntrinsicBLAS.LOWER;
   6754         mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
   6755         matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_RLT);
   6756         verifyMatrix(matrixBRef, matrixBD);
   6757 
   6758         mRS.finish();
   6759         checkError();
   6760     }
   6761 
   6762     public void test_L3_CTRSM_Correctness() {
   6763         int side = ScriptIntrinsicBLAS.LEFT;
   6764         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6765         int uplo = ScriptIntrinsicBLAS.UPPER;
   6766         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6767 
   6768         // Populate input allocations
   6769         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
   6770         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   6771         matrixAC.copyFrom(mBLASData.L3_cTRSM_A_mm);
   6772         matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn);
   6773 
   6774         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6775         mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
   6776         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
   6777         matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_LUN);
   6778         verifyMatrix(matrixBRef, matrixBC);
   6779 
   6780         // Case: RIGHT, LOWER, TRANSPOSE
   6781         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
   6782         matrixAC.copyFrom(mBLASData.L3_cTRSM_A_nn);
   6783         // Reload matrix B, since it was overwritten by BLAS.
   6784         matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn);
   6785 
   6786         side = ScriptIntrinsicBLAS.RIGHT;
   6787         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6788         uplo = ScriptIntrinsicBLAS.LOWER;
   6789         mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
   6790         matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_RLT);
   6791         verifyMatrix(matrixBRef, matrixBC);
   6792 
   6793         mRS.finish();
   6794         checkError();
   6795     }
   6796 
   6797     public void test_L3_ZTRSM_Correctness() {
   6798         int side = ScriptIntrinsicBLAS.LEFT;
   6799         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
   6800         int uplo = ScriptIntrinsicBLAS.UPPER;
   6801         int diag = ScriptIntrinsicBLAS.NON_UNIT;
   6802 
   6803         // Populate input allocations
   6804         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
   6805         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   6806         matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_mm);
   6807         matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn);
   6808 
   6809         // Default case: LEFT, UPPER, NO_TRANSPOSE
   6810         mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
   6811         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
   6812         matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_LUN);
   6813         verifyMatrix(matrixBRef, matrixBZ);
   6814 
   6815         // Case: RIGHT, LOWER, TRANSPOSE
   6816         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
   6817         matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_nn);
   6818         // Reload matrix B, since it was overwritten by BLAS.
   6819         matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn);
   6820 
   6821         side = ScriptIntrinsicBLAS.RIGHT;
   6822         trans = ScriptIntrinsicBLAS.TRANSPOSE;
   6823         uplo = ScriptIntrinsicBLAS.LOWER;
   6824         mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
   6825         matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_RLT);
   6826         verifyMatrix(matrixBRef, matrixBZ);
   6827 
   6828         mRS.finish();
   6829         checkError();
   6830     }
   6831 }
   6832