Home | History | Annotate | Download | only in renderscript
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package androidx.renderscript;
     18 
     19 import android.support.annotation.IntDef;
     20 import java.lang.annotation.Retention;
     21 import java.lang.annotation.RetentionPolicy;
     22 
     23 /**
     24  *
     25  * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
     26  *
     27  * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
     28  * building blocks for performing basic vector and matrix operations.
     29  *
     30  * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
     31  *
     32  **/
     33 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
     34     private Allocation mLUT;
     35     private static final int INTRINSIC_API_LEVEL = 23;
     36 
     37     private ScriptIntrinsicBLAS(long id, RenderScript rs) {
     38         super(id, rs);
     39     }
     40 
     41     private static final int RsBlas_sdsdot = 1;
     42     private static final int RsBlas_dsdot = 2;
     43     private static final int RsBlas_sdot = 3;
     44     private static final int RsBlas_ddot = 4;
     45     private static final int RsBlas_cdotu_sub = 5;
     46     private static final int RsBlas_cdotc_sub = 6;
     47     private static final int RsBlas_zdotu_sub = 7;
     48     private static final int RsBlas_zdotc_sub = 8;
     49     private static final int RsBlas_snrm2 = 9;
     50     private static final int RsBlas_sasum = 10;
     51     private static final int RsBlas_dnrm2 = 11;
     52     private static final int RsBlas_dasum = 12;
     53     private static final int RsBlas_scnrm2 = 13;
     54     private static final int RsBlas_scasum = 14;
     55     private static final int RsBlas_dznrm2 = 15;
     56     private static final int RsBlas_dzasum = 16;
     57     private static final int RsBlas_isamax = 17;
     58     private static final int RsBlas_idamax = 18;
     59     private static final int RsBlas_icamax = 19;
     60     private static final int RsBlas_izamax = 20;
     61     private static final int RsBlas_sswap = 21;
     62     private static final int RsBlas_scopy = 22;
     63     private static final int RsBlas_saxpy = 23;
     64     private static final int RsBlas_dswap = 24;
     65     private static final int RsBlas_dcopy = 25;
     66     private static final int RsBlas_daxpy = 26;
     67     private static final int RsBlas_cswap = 27;
     68     private static final int RsBlas_ccopy = 28;
     69     private static final int RsBlas_caxpy = 29;
     70     private static final int RsBlas_zswap = 30;
     71     private static final int RsBlas_zcopy = 31;
     72     private static final int RsBlas_zaxpy = 32;
     73     private static final int RsBlas_srotg = 33;
     74     private static final int RsBlas_srotmg = 34;
     75     private static final int RsBlas_srot = 35;
     76     private static final int RsBlas_srotm = 36;
     77     private static final int RsBlas_drotg = 37;
     78     private static final int RsBlas_drotmg = 38;
     79     private static final int RsBlas_drot = 39;
     80     private static final int RsBlas_drotm = 40;
     81     private static final int RsBlas_sscal = 41;
     82     private static final int RsBlas_dscal = 42;
     83     private static final int RsBlas_cscal = 43;
     84     private static final int RsBlas_zscal = 44;
     85     private static final int RsBlas_csscal = 45;
     86     private static final int RsBlas_zdscal = 46;
     87     private static final int RsBlas_sgemv = 47;
     88     private static final int RsBlas_sgbmv = 48;
     89     private static final int RsBlas_strmv = 49;
     90     private static final int RsBlas_stbmv = 50;
     91     private static final int RsBlas_stpmv = 51;
     92     private static final int RsBlas_strsv = 52;
     93     private static final int RsBlas_stbsv = 53;
     94     private static final int RsBlas_stpsv = 54;
     95     private static final int RsBlas_dgemv = 55;
     96     private static final int RsBlas_dgbmv = 56;
     97     private static final int RsBlas_dtrmv = 57;
     98     private static final int RsBlas_dtbmv = 58;
     99     private static final int RsBlas_dtpmv = 59;
    100     private static final int RsBlas_dtrsv = 60;
    101     private static final int RsBlas_dtbsv = 61;
    102     private static final int RsBlas_dtpsv = 62;
    103     private static final int RsBlas_cgemv = 63;
    104     private static final int RsBlas_cgbmv = 64;
    105     private static final int RsBlas_ctrmv = 65;
    106     private static final int RsBlas_ctbmv = 66;
    107     private static final int RsBlas_ctpmv = 67;
    108     private static final int RsBlas_ctrsv = 68;
    109     private static final int RsBlas_ctbsv = 69;
    110     private static final int RsBlas_ctpsv = 70;
    111     private static final int RsBlas_zgemv = 71;
    112     private static final int RsBlas_zgbmv = 72;
    113     private static final int RsBlas_ztrmv = 73;
    114     private static final int RsBlas_ztbmv = 74;
    115     private static final int RsBlas_ztpmv = 75;
    116     private static final int RsBlas_ztrsv = 76;
    117     private static final int RsBlas_ztbsv = 77;
    118     private static final int RsBlas_ztpsv = 78;
    119     private static final int RsBlas_ssymv = 79;
    120     private static final int RsBlas_ssbmv = 80;
    121     private static final int RsBlas_sspmv = 81;
    122     private static final int RsBlas_sger = 82;
    123     private static final int RsBlas_ssyr = 83;
    124     private static final int RsBlas_sspr = 84;
    125     private static final int RsBlas_ssyr2 = 85;
    126     private static final int RsBlas_sspr2 = 86;
    127     private static final int RsBlas_dsymv = 87;
    128     private static final int RsBlas_dsbmv = 88;
    129     private static final int RsBlas_dspmv = 89;
    130     private static final int RsBlas_dger = 90;
    131     private static final int RsBlas_dsyr = 91;
    132     private static final int RsBlas_dspr = 92;
    133     private static final int RsBlas_dsyr2 = 93;
    134     private static final int RsBlas_dspr2 = 94;
    135     private static final int RsBlas_chemv = 95;
    136     private static final int RsBlas_chbmv = 96;
    137     private static final int RsBlas_chpmv = 97;
    138     private static final int RsBlas_cgeru = 98;
    139     private static final int RsBlas_cgerc = 99;
    140     private static final int RsBlas_cher = 100;
    141     private static final int RsBlas_chpr = 101;
    142     private static final int RsBlas_cher2 = 102;
    143     private static final int RsBlas_chpr2 = 103;
    144     private static final int RsBlas_zhemv = 104;
    145     private static final int RsBlas_zhbmv = 105;
    146     private static final int RsBlas_zhpmv = 106;
    147     private static final int RsBlas_zgeru = 107;
    148     private static final int RsBlas_zgerc = 108;
    149     private static final int RsBlas_zher = 109;
    150     private static final int RsBlas_zhpr = 110;
    151     private static final int RsBlas_zher2 = 111;
    152     private static final int RsBlas_zhpr2 = 112;
    153     private static final int RsBlas_sgemm = 113;
    154     private static final int RsBlas_ssymm = 114;
    155     private static final int RsBlas_ssyrk = 115;
    156     private static final int RsBlas_ssyr2k = 116;
    157     private static final int RsBlas_strmm = 117;
    158     private static final int RsBlas_strsm = 118;
    159     private static final int RsBlas_dgemm = 119;
    160     private static final int RsBlas_dsymm = 120;
    161     private static final int RsBlas_dsyrk = 121;
    162     private static final int RsBlas_dsyr2k = 122;
    163     private static final int RsBlas_dtrmm = 123;
    164     private static final int RsBlas_dtrsm = 124;
    165     private static final int RsBlas_cgemm = 125;
    166     private static final int RsBlas_csymm = 126;
    167     private static final int RsBlas_csyrk = 127;
    168     private static final int RsBlas_csyr2k = 128;
    169     private static final int RsBlas_ctrmm = 129;
    170     private static final int RsBlas_ctrsm = 130;
    171     private static final int RsBlas_zgemm = 131;
    172     private static final int RsBlas_zsymm = 132;
    173     private static final int RsBlas_zsyrk = 133;
    174     private static final int RsBlas_zsyr2k = 134;
    175     private static final int RsBlas_ztrmm = 135;
    176     private static final int RsBlas_ztrsm = 136;
    177     private static final int RsBlas_chemm = 137;
    178     private static final int RsBlas_cherk = 138;
    179     private static final int RsBlas_cher2k = 139;
    180     private static final int RsBlas_zhemm = 140;
    181     private static final int RsBlas_zherk = 141;
    182     private static final int RsBlas_zher2k = 142;
    183 
    184     // BLAS extensions start here
    185     private static final int RsBlas_bnnm = 1000;
    186 
    187     /**
    188      * Create an intrinsic to access BLAS subroutines.
    189      *
    190      * @param rs The RenderScript context
    191      * @return ScriptIntrinsicBLAS
    192      */
    193     public static ScriptIntrinsicBLAS create(RenderScript rs) {
    194         long id;
    195         boolean mUseIncSupp = rs.isUseNative() &&
    196                               android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL;
    197 
    198         id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp);
    199         ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs);
    200         si.setIncSupp(mUseIncSupp);
    201         return si;
    202     }
    203 
    204     /**
    205      * @hide
    206      */
    207     @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
    208     @Retention(RetentionPolicy.SOURCE)
    209     public @interface Transpose {}
    210 
    211     /**
    212      * @hide
    213      */
    214     @IntDef({UPPER, LOWER})
    215     @Retention(RetentionPolicy.SOURCE)
    216     public @interface Uplo {}
    217 
    218     /**
    219      * @hide
    220      */
    221     @IntDef({NON_UNIT, UNIT})
    222     @Retention(RetentionPolicy.SOURCE)
    223     public @interface Diag {}
    224 
    225     /**
    226      * @hide
    227      */
    228     @IntDef({LEFT, RIGHT})
    229     @Retention(RetentionPolicy.SOURCE)
    230     public @interface Side {}
    231 
    232     public static final int NO_TRANSPOSE = 111;
    233     public static final int TRANSPOSE = 112;
    234     public static final int CONJ_TRANSPOSE = 113;
    235 
    236     public static final int UPPER = 121;
    237     public static final int LOWER = 122;
    238 
    239     public static final int NON_UNIT = 131;
    240     public static final int UNIT = 132;
    241 
    242     public static final int LEFT = 141;
    243     public static final int RIGHT = 142;
    244 
    245     static void validateSide(@Side int Side) {
    246         if (Side != LEFT && Side != RIGHT) {
    247             throw new RSRuntimeException("Invalid side passed to BLAS");
    248         }
    249     }
    250 
    251     static void validateTranspose(@Transpose int Trans) {
    252         if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
    253             Trans != CONJ_TRANSPOSE) {
    254             throw new RSRuntimeException("Invalid transpose passed to BLAS");
    255         }
    256     }
    257 
    258     static void validateConjTranspose(@Transpose int Trans) {
    259         if (Trans != NO_TRANSPOSE &&
    260             Trans != CONJ_TRANSPOSE) {
    261             throw new RSRuntimeException("Invalid transpose passed to BLAS");
    262         }
    263     }
    264 
    265     static void validateDiag(@Diag int Diag) {
    266         if (Diag != NON_UNIT && Diag != UNIT) {
    267             throw new RSRuntimeException("Invalid diag passed to BLAS");
    268         }
    269     }
    270 
    271     static void validateUplo(@Uplo int Uplo) {
    272         if (Uplo != UPPER && Uplo != LOWER) {
    273             throw new RSRuntimeException("Invalid uplo passed to BLAS");
    274         }
    275     }
    276 
    277 
    278     /**
    279      * Level 2 BLAS
    280      */
    281 
    282     static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
    283         validateTranspose(TransA);
    284         int M = A.getType().getY();
    285         int N = A.getType().getX();
    286         if (!A.getType().getElement().isCompatible(e) ||
    287             !X.getType().getElement().isCompatible(e) ||
    288             !Y.getType().getElement().isCompatible(e)) {
    289             throw new RSRuntimeException("Called BLAS with wrong Element type");
    290         }
    291         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
    292             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
    293         }
    294 
    295         if (incX <= 0 || incY <= 0) {
    296             throw new RSRuntimeException("Vector increments must be greater than 0");
    297         }
    298         int expectedXDim = -1, expectedYDim = -1;
    299         if (TransA == NO_TRANSPOSE) {
    300             expectedXDim = 1 + (N - 1) * incX;
    301             expectedYDim = 1 + (M - 1) * incY;
    302         } else {
    303             expectedXDim = 1 + (M - 1) * incX;
    304             expectedYDim = 1 + (N - 1) * incY;
    305         }
    306         if (X.getType().getX() != expectedXDim ||
    307             Y.getType().getX() != expectedYDim) {
    308             throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
    309         }
    310     }
    311 
    312     /**
    313      * SGEMV performs one of the matrix-vector operations
    314      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
    315      *
    316      * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
    317      *
    318      * @param TransA The type of transpose applied to matrix A.
    319      * @param alpha The scalar alpha.
    320      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
    321      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
    322      * @param incX The increment for the elements of vector x, must be larger than zero.
    323      * @param beta The scalar beta.
    324      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
    325      * @param incY The increment for the elements of vector y, must be larger than zero.
    326      */
    327     public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
    328         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
    329         int M = A.getType().getY();
    330         int N = A.getType().getX();
    331 
    332         boolean mUseIncSupp = isIncSupp();
    333         long aID = A.getID(mRS);
    334         long xID = X.getID(mRS);
    335         long yID = Y.getID(mRS);
    336         if (mUseIncSupp) {
    337             aID = getDummyAlloc(A);
    338             xID = getDummyAlloc(X);
    339             yID = getDummyAlloc(Y);
    340         }
    341         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
    342     }
    343 
    344     /**
    345      * DGEMV performs one of the matrix-vector operations
    346      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
    347      *
    348      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
    349      *
    350      * @param TransA The type of transpose applied to matrix A.
    351      * @param alpha The scalar alpha.
    352      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
    353      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
    354      * @param incX The increment for the elements of vector x, must be larger than zero.
    355      * @param beta The scalar beta.
    356      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
    357      * @param incY The increment for the elements of vector y, must be larger than zero.
    358      */
    359     public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
    360         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
    361         int M = A.getType().getY();
    362         int N = A.getType().getX();
    363 
    364         boolean mUseIncSupp = isIncSupp();
    365         long aID = A.getID(mRS);
    366         long xID = X.getID(mRS);
    367         long yID = Y.getID(mRS);
    368         if (mUseIncSupp) {
    369             aID = getDummyAlloc(A);
    370             xID = getDummyAlloc(X);
    371             yID = getDummyAlloc(Y);
    372         }
    373         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
    374     }
    375 
    376     /**
    377      * CGEMV performs one of the matrix-vector operations
    378      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
    379      *
    380      * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
    381      *
    382      * @param TransA The type of transpose applied to matrix A.
    383      * @param alpha The scalar alpha.
    384      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
    385      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
    386      * @param incX The increment for the elements of vector x, must be larger than zero.
    387      * @param beta The scalar beta.
    388      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
    389      * @param incY The increment for the elements of vector y, must be larger than zero.
    390      */
    391     public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
    392         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
    393         int M = A.getType().getY();
    394         int N = A.getType().getX();
    395 
    396         boolean mUseIncSupp = isIncSupp();
    397         long aID = A.getID(mRS);
    398         long xID = X.getID(mRS);
    399         long yID = Y.getID(mRS);
    400         if (mUseIncSupp) {
    401             aID = getDummyAlloc(A);
    402             xID = getDummyAlloc(X);
    403             yID = getDummyAlloc(Y);
    404         }
    405         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
    406     }
    407 
    408     /**
    409      * ZGEMV performs one of the matrix-vector operations
    410      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
    411      *
    412      * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
    413      *
    414      * @param TransA The type of transpose applied to matrix A.
    415      * @param alpha The scalar alpha.
    416      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
    417      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
    418      * @param incX The increment for the elements of vector x, must be larger than zero.
    419      * @param beta The scalar beta.
    420      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
    421      * @param incY The increment for the elements of vector y, must be larger than zero.
    422      */
    423     public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
    424         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
    425         int M = A.getType().getY();
    426         int N = A.getType().getX();
    427 
    428         boolean mUseIncSupp = isIncSupp();
    429         long aID = A.getID(mRS);
    430         long xID = X.getID(mRS);
    431         long yID = Y.getID(mRS);
    432         if (mUseIncSupp) {
    433             aID = getDummyAlloc(A);
    434             xID = getDummyAlloc(X);
    435             yID = getDummyAlloc(Y);
    436         }
    437         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
    438     }
    439 
    440     /**
    441      * SGBMV performs one of the matrix-vector operations
    442      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
    443      *
    444      * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
    445      *
    446      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
    447      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
    448      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
    449      *           for i in range(0, m):
    450      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
    451      *                  b[i, j-i+kl] = a[i, j]
    452      *
    453      * @param TransA The type of transpose applied to matrix A.
    454      * @param KL The number of sub-diagonals of the matrix A.
    455      * @param KU The number of super-diagonals of the matrix A.
    456      * @param alpha The scalar alpha.
    457      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
    458      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
    459      * @param incX The increment for the elements of vector x, must be larger than zero.
    460      * @param beta The scalar beta.
    461      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
    462      * @param incY The increment for the elements of vector y, must be larger than zero.
    463      */
    464     public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
    465         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
    466         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
    467         if (KL < 0 || KU < 0) {
    468             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
    469         }
    470         int M = A.getType().getY();
    471         int N = A.getType().getX();
    472 
    473         boolean mUseIncSupp = isIncSupp();
    474         long aID = A.getID(mRS);
    475         long xID = X.getID(mRS);
    476         long yID = Y.getID(mRS);
    477         if (mUseIncSupp) {
    478             aID = getDummyAlloc(A);
    479             xID = getDummyAlloc(X);
    480             yID = getDummyAlloc(Y);
    481         }
    482         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
    483     }
    484 
    485     /**
    486      * DGBMV performs one of the matrix-vector operations
    487      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
    488      *
    489      * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
    490      *
    491      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
    492      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
    493      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
    494      *           for i in range(0, m):
    495      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
    496      *                  b[i, j-i+kl] = a[i, j]
    497      *
    498      * @param TransA The type of transpose applied to matrix A.
    499      * @param KL The number of sub-diagonals of the matrix A.
    500      * @param KU The number of super-diagonals of the matrix A.
    501      * @param alpha The scalar alpha.
    502      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
    503      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
    504      * @param incX The increment for the elements of vector x, must be larger than zero.
    505      * @param beta The scalar beta.
    506      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
    507      * @param incY The increment for the elements of vector y, must be larger than zero.
    508      */
    509     public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
    510         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
    511         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
    512         if (KL < 0 || KU < 0) {
    513             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
    514         }
    515         int M = A.getType().getY();
    516         int N = A.getType().getX();
    517 
    518         boolean mUseIncSupp = isIncSupp();
    519         long aID = A.getID(mRS);
    520         long xID = X.getID(mRS);
    521         long yID = Y.getID(mRS);
    522         if (mUseIncSupp) {
    523             aID = getDummyAlloc(A);
    524             xID = getDummyAlloc(X);
    525             yID = getDummyAlloc(Y);
    526         }
    527         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
    528     }
    529 
    530     /**
    531      * CGBMV performs one of the matrix-vector operations
    532      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
    533      *
    534      * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
    535      *
    536      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
    537      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
    538      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
    539      *           for i in range(0, m):
    540      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
    541      *                  b[i, j-i+kl] = a[i, j]
    542      *
    543      * @param TransA The type of transpose applied to matrix A.
    544      * @param KL The number of sub-diagonals of the matrix A.
    545      * @param KU The number of super-diagonals of the matrix A.
    546      * @param alpha The scalar alpha.
    547      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
    548      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
    549      * @param incX The increment for the elements of vector x, must be larger than zero.
    550      * @param beta The scalar beta.
    551      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
    552      * @param incY The increment for the elements of vector y, must be larger than zero.
    553      */
    554     public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
    555         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
    556         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
    557         if (KL < 0 || KU < 0) {
    558             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
    559         }
    560         int M = A.getType().getY();
    561         int N = A.getType().getX();
    562 
    563         boolean mUseIncSupp = isIncSupp();
    564         long aID = A.getID(mRS);
    565         long xID = X.getID(mRS);
    566         long yID = Y.getID(mRS);
    567         if (mUseIncSupp) {
    568             aID = getDummyAlloc(A);
    569             xID = getDummyAlloc(X);
    570             yID = getDummyAlloc(Y);
    571         }
    572         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
    573     }
    574 
    575     /**
    576      * ZGBMV performs one of the matrix-vector operations
    577      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
    578      *
    579      * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
    580      *
    581      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
    582      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
    583      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
    584      *           for i in range(0, m):
    585      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
    586      *                  b[i, j-i+kl] = a[i, j]
    587      *
    588      * @param TransA The type of transpose applied to matrix A.
    589      * @param KL The number of sub-diagonals of the matrix A.
    590      * @param KU The number of super-diagonals of the matrix A.
    591      * @param alpha The scalar alpha.
    592      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
    593      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
    594      * @param incX The increment for the elements of vector x, must be larger than zero.
    595      * @param beta The scalar beta.
    596      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
    597      * @param incY The increment for the elements of vector y, must be larger than zero.
    598      */
    599     public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
    600         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
    601         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
    602         if (KL < 0 || KU < 0) {
    603             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
    604         }
    605         int M = A.getType().getY();
    606         int N = A.getType().getX();
    607 
    608         boolean mUseIncSupp = isIncSupp();
    609         long aID = A.getID(mRS);
    610         long xID = X.getID(mRS);
    611         long yID = Y.getID(mRS);
    612         if (mUseIncSupp) {
    613             aID = getDummyAlloc(A);
    614             xID = getDummyAlloc(X);
    615             yID = getDummyAlloc(Y);
    616         }
    617         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
    618     }
    619 
    620     static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
    621         validateTranspose(TransA);
    622         validateUplo(Uplo);
    623         validateDiag(Diag);
    624         int N = A.getType().getY();
    625         if (A.getType().getX() != N) {
    626             throw new RSRuntimeException("A must be a square matrix for TRMV");
    627         }
    628         if (!A.getType().getElement().isCompatible(e) ||
    629             !X.getType().getElement().isCompatible(e)) {
    630             throw new RSRuntimeException("Called BLAS with wrong Element type");
    631         }
    632         if (X.getType().getY() > 1) {
    633             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
    634         }
    635 
    636         if (incX <= 0) {
    637             throw new RSRuntimeException("Vector increments must be greater than 0");
    638         }
    639         int expectedXDim = 1 + (N - 1) * incX;
    640         if (X.getType().getX() != expectedXDim) {
    641             throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
    642         }
    643     }
    644 
    645     static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
    646         validateTranspose(TransA);
    647         validateUplo(Uplo);
    648         validateDiag(Diag);
    649         if (!Ap.getType().getElement().isCompatible(e) ||
    650             !X.getType().getElement().isCompatible(e)) {
    651             throw new RSRuntimeException("Called BLAS with wrong Element type");
    652         }
    653         if (X.getType().getY() > 1) {
    654             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
    655         }
    656 
    657         if (Ap.getType().getY() > 1) {
    658             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
    659         }
    660 
    661         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
    662         //is it really doing anything?
    663         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
    664             throw new RSRuntimeException("Invalid dimension for Ap");
    665         }
    666         if (incX <= 0) {
    667             throw new RSRuntimeException("Vector increments must be greater than 0");
    668         }
    669         int expectedXDim = 1 + (N - 1) * incX;
    670         if (X.getType().getX() != expectedXDim) {
    671             throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
    672         }
    673 
    674         return N;
    675     }
    676 
    677     /**
    678      * STRMV performs one of the matrix-vector operations
    679      * x := A*x   or   x := A**T*x
    680      *
    681      * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
    682      *
    683      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    684      * @param TransA The type of transpose applied to matrix A.
    685      * @param Diag Specifies whether or not A is unit triangular.
    686      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
    687      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
    688      * @param incX The increment for the elements of vector x, must be larger than zero.
    689      */
    690     public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
    691         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
    692         int N = A.getType().getY();
    693 
    694         boolean mUseIncSupp = isIncSupp();
    695         long aID = A.getID(mRS);
    696         long xID = X.getID(mRS);
    697         if (mUseIncSupp) {
    698             aID = getDummyAlloc(A);
    699             xID = getDummyAlloc(X);
    700         }
    701         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    702     }
    703 
    704     /**
    705      * DTRMV performs one of the matrix-vector operations
    706      * x := A*x   or   x := A**T*x
    707      *
    708      * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
    709      *
    710      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    711      * @param TransA The type of transpose applied to matrix A.
    712      * @param Diag Specifies whether or not A is unit triangular.
    713      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
    714      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
    715      * @param incX The increment for the elements of vector x, must be larger than zero.
    716      */
    717     public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
    718         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
    719         int N = A.getType().getY();
    720 
    721         boolean mUseIncSupp = isIncSupp();
    722         long aID = A.getID(mRS);
    723         long xID = X.getID(mRS);
    724         if (mUseIncSupp) {
    725             aID = getDummyAlloc(A);
    726             xID = getDummyAlloc(X);
    727         }
    728         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    729     }
    730 
    731     /**
    732      * CTRMV performs one of the matrix-vector operations
    733      * x := A*x   or   x := A**T*x   or   x := A**H*x
    734      *
    735      * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
    736      *
    737      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    738      * @param TransA The type of transpose applied to matrix A.
    739      * @param Diag Specifies whether or not A is unit triangular.
    740      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
    741      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
    742      * @param incX The increment for the elements of vector x, must be larger than zero.
    743      */
    744     public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
    745         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
    746         int N = A.getType().getY();
    747 
    748         boolean mUseIncSupp = isIncSupp();
    749         long aID = A.getID(mRS);
    750         long xID = X.getID(mRS);
    751         if (mUseIncSupp) {
    752             aID = getDummyAlloc(A);
    753             xID = getDummyAlloc(X);
    754         }
    755         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    756     }
    757 
    758     /**
    759      * ZTRMV performs one of the matrix-vector operations
    760      * x := A*x   or   x := A**T*x   or   x := A**H*x
    761      *
    762      * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
    763      *
    764      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    765      * @param TransA The type of transpose applied to matrix A.
    766      * @param Diag Specifies whether or not A is unit triangular.
    767      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
    768      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
    769      * @param incX The increment for the elements of vector x, must be larger than zero.
    770      */
    771     public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
    772         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
    773         int N = A.getType().getY();
    774 
    775         boolean mUseIncSupp = isIncSupp();
    776         long aID = A.getID(mRS);
    777         long xID = X.getID(mRS);
    778         if (mUseIncSupp) {
    779             aID = getDummyAlloc(A);
    780             xID = getDummyAlloc(X);
    781         }
    782         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    783     }
    784 
    785     /**
    786      * STBMV performs one of the matrix-vector operations
    787      * x := A*x   or   x := A**T*x
    788      *
    789      * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
    790      *
    791      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
    792      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
    793      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
    794      *           for i in range(0, n):
    795      *              for j in range(i, min(i+k+1, n)):
    796      *                  b[i, j-i] = a[i, j]
    797      *
    798      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    799      * @param TransA The type of transpose applied to matrix A.
    800      * @param Diag Specifies whether or not A is unit triangular.
    801      * @param K The number of off-diagonals of the matrix A
    802      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
    803      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
    804      * @param incX The increment for the elements of vector x, must be larger than zero.
    805      */
    806     public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
    807         // TBMV has the same requirements as TRMV + K >= 0
    808         if (K < 0) {
    809             throw new RSRuntimeException("K must be greater than or equal to 0");
    810         }
    811         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
    812         int N = A.getType().getY();
    813 
    814         boolean mUseIncSupp = isIncSupp();
    815         long aID = A.getID(mRS);
    816         long xID = X.getID(mRS);
    817         if (mUseIncSupp) {
    818             aID = getDummyAlloc(A);
    819             xID = getDummyAlloc(X);
    820         }
    821         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    822     }
    823 
    824     /**
    825      * DTBMV performs one of the matrix-vector operations
    826      * x := A*x   or   x := A**T*x
    827      *
    828      * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
    829      *
    830      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
    831      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
    832      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
    833      *           for i in range(0, n):
    834      *              for j in range(i, min(i+k+1, n)):
    835      *                  b[i, j-i] = a[i, j]
    836      *
    837      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    838      * @param TransA The type of transpose applied to matrix A.
    839      * @param Diag Specifies whether or not A is unit triangular.
    840      * @param K The number of off-diagonals of the matrix A
    841      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
    842      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
    843      * @param incX The increment for the elements of vector x, must be larger than zero.
    844      */
    845     public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
    846         // TBMV has the same requirements as TRMV + K >= 0
    847         if (K < 0) {
    848             throw new RSRuntimeException("K must be greater than or equal to 0");
    849         }
    850         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
    851         int N = A.getType().getY();
    852 
    853         boolean mUseIncSupp = isIncSupp();
    854         long aID = A.getID(mRS);
    855         long xID = X.getID(mRS);
    856         if (mUseIncSupp) {
    857             aID = getDummyAlloc(A);
    858             xID = getDummyAlloc(X);
    859         }
    860         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    861     }
    862 
    863     /**
    864      * CTBMV performs one of the matrix-vector operations
    865      * x := A*x   or   x := A**T*x   or   x := A**H*x
    866      *
    867      * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
    868      *
    869      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
    870      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
    871      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
    872      *           for i in range(0, n):
    873      *              for j in range(i, min(i+k+1, n)):
    874      *                  b[i, j-i] = a[i, j]
    875      *
    876      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    877      * @param TransA The type of transpose applied to matrix A.
    878      * @param Diag Specifies whether or not A is unit triangular.
    879      * @param K The number of off-diagonals of the matrix A
    880      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
    881      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
    882      * @param incX The increment for the elements of vector x, must be larger than zero.
    883      */
    884     public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
    885         // TBMV has the same requirements as TRMV + K >= 0
    886         if (K < 0) {
    887             throw new RSRuntimeException("K must be greater than or equal to 0");
    888         }
    889         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
    890         int N = A.getType().getY();
    891 
    892         boolean mUseIncSupp = isIncSupp();
    893         long aID = A.getID(mRS);
    894         long xID = X.getID(mRS);
    895         if (mUseIncSupp) {
    896             aID = getDummyAlloc(A);
    897             xID = getDummyAlloc(X);
    898         }
    899         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    900     }
    901 
    902     /**
    903      * ZTBMV performs one of the matrix-vector operations
    904      * x := A*x   or   x := A**T*x   or   x := A**H*x
    905      *
    906      * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
    907      *
    908      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
    909      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
    910      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
    911      *           for i in range(0, n):
    912      *              for j in range(i, min(i+k+1, n)):
    913      *                  b[i, j-i] = a[i, j]
    914      *
    915      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    916      * @param TransA The type of transpose applied to matrix A.
    917      * @param Diag Specifies whether or not A is unit triangular.
    918      * @param K The number of off-diagonals of the matrix A
    919      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
    920      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
    921      * @param incX The increment for the elements of vector x, must be larger than zero.
    922      */
    923     public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
    924         // TBMV has the same requirements as TRMV + K >= 0
    925         if (K < 0) {
    926             throw new RSRuntimeException("K must be greater than or equal to 0");
    927         }
    928         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
    929         int N = A.getType().getY();
    930 
    931         boolean mUseIncSupp = isIncSupp();
    932         long aID = A.getID(mRS);
    933         long xID = X.getID(mRS);
    934         if (mUseIncSupp) {
    935             aID = getDummyAlloc(A);
    936             xID = getDummyAlloc(X);
    937         }
    938         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    939     }
    940 
    941     /**
    942      * STPMV performs one of the matrix-vector operations
    943      * x := A*x   or   x := A**T*x
    944      *
    945      * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
    946      *
    947      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
    948      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
    949      *       'a' to packed matrix 'b'.
    950      *           k = 0
    951      *           for i in range(0, n):
    952      *              for j in range(i, n):
    953      *                  b[k++] = a[i, j]
    954      *
    955      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    956      * @param TransA The type of transpose applied to matrix A.
    957      * @param Diag Specifies whether or not A is unit triangular.
    958      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
    959      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
    960      * @param incX The increment for the elements of vector x, must be larger than zero.
    961      */
    962     public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
    963         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
    964 
    965         boolean mUseIncSupp = isIncSupp();
    966         long apID = Ap.getID(mRS);
    967         long xID = X.getID(mRS);
    968         if (mUseIncSupp) {
    969             apID = getDummyAlloc(Ap);
    970             xID = getDummyAlloc(X);
    971         }
    972         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
    973     }
    974 
    975     /**
    976      * DTPMV performs one of the matrix-vector operations
    977      * x := A*x   or   x := A**T*x
    978      *
    979      * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
    980      *
    981      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
    982      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
    983      *       'a' to packed matrix 'b'.
    984      *           k = 0
    985      *           for i in range(0, n):
    986      *              for j in range(i, n):
    987      *                  b[k++] = a[i, j]
    988      *
    989      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
    990      * @param TransA The type of transpose applied to matrix A.
    991      * @param Diag Specifies whether or not A is unit triangular.
    992      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
    993      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
    994      * @param incX The increment for the elements of vector x, must be larger than zero.
    995      */
    996     public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
    997         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
    998 
    999         boolean mUseIncSupp = isIncSupp();
   1000         long apID = Ap.getID(mRS);
   1001         long xID = X.getID(mRS);
   1002         if (mUseIncSupp) {
   1003             apID = getDummyAlloc(Ap);
   1004             xID = getDummyAlloc(X);
   1005         }
   1006         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1007     }
   1008 
   1009     /**
   1010      * CTPMV performs one of the matrix-vector operations
   1011      * x := A*x   or   x := A**T*x   or   x := A**H*x
   1012      *
   1013      * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
   1014      *
   1015      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1016      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1017      *       'a' to packed matrix 'b'.
   1018      *           k = 0
   1019      *           for i in range(0, n):
   1020      *              for j in range(i, n):
   1021      *                  b[k++] = a[i, j]
   1022      *
   1023      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1024      * @param TransA The type of transpose applied to matrix A.
   1025      * @param Diag Specifies whether or not A is unit triangular.
   1026      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
   1027      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   1028      * @param incX The increment for the elements of vector x, must be larger than zero.
   1029      */
   1030     public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1031         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1032 
   1033         boolean mUseIncSupp = isIncSupp();
   1034         long apID = Ap.getID(mRS);
   1035         long xID = X.getID(mRS);
   1036         if (mUseIncSupp) {
   1037             apID = getDummyAlloc(Ap);
   1038             xID = getDummyAlloc(X);
   1039         }
   1040         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1041     }
   1042 
   1043     /**
   1044      * ZTPMV performs one of the matrix-vector operations
   1045      * x := A*x   or   x := A**T*x   or   x := A**H*x
   1046      *
   1047      * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
   1048      *
   1049      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1050      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1051      *       'a' to packed matrix 'b'.
   1052      *           k = 0
   1053      *           for i in range(0, n):
   1054      *              for j in range(i, n):
   1055      *                  b[k++] = a[i, j]
   1056      *
   1057      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1058      * @param TransA The type of transpose applied to matrix A.
   1059      * @param Diag Specifies whether or not A is unit triangular.
   1060      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
   1061      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   1062      * @param incX The increment for the elements of vector x, must be larger than zero.
   1063      */
   1064     public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1065         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1066 
   1067         boolean mUseIncSupp = isIncSupp();
   1068         long apID = Ap.getID(mRS);
   1069         long xID = X.getID(mRS);
   1070         if (mUseIncSupp) {
   1071             apID = getDummyAlloc(Ap);
   1072             xID = getDummyAlloc(X);
   1073         }
   1074         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1075     }
   1076 
   1077     /**
   1078      * STRSV solves one of the systems of equations
   1079      * A*x = b   or   A**T*x = b
   1080      *
   1081      * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
   1082      *
   1083      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1084      * @param TransA The type of transpose applied to matrix A.
   1085      * @param Diag Specifies whether or not A is unit triangular.
   1086      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1087      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1088      * @param incX The increment for the elements of vector x, must be larger than zero.
   1089      */
   1090     public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
   1091         // TRSV is the same as TRMV
   1092         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
   1093         int N = A.getType().getY();
   1094 
   1095         boolean mUseIncSupp = isIncSupp();
   1096         long aID = A.getID(mRS);
   1097         long xID = X.getID(mRS);
   1098         if (mUseIncSupp) {
   1099             aID = getDummyAlloc(A);
   1100             xID = getDummyAlloc(X);
   1101         }
   1102         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1103 
   1104     }
   1105 
   1106     /**
   1107      * DTRSV solves one of the systems of equations
   1108      * A*x = b   or   A**T*x = b
   1109      *
   1110      * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
   1111      *
   1112      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1113      * @param TransA The type of transpose applied to matrix A.
   1114      * @param Diag Specifies whether or not A is unit triangular.
   1115      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   1116      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   1117      * @param incX The increment for the elements of vector x, must be larger than zero.
   1118      */
   1119     public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
   1120         // TRSV is the same as TRMV
   1121         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
   1122         int N = A.getType().getY();
   1123 
   1124         boolean mUseIncSupp = isIncSupp();
   1125         long aID = A.getID(mRS);
   1126         long xID = X.getID(mRS);
   1127         if (mUseIncSupp) {
   1128             aID = getDummyAlloc(A);
   1129             xID = getDummyAlloc(X);
   1130         }
   1131         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1132 
   1133     }
   1134 
   1135     /**
   1136      * CTRSV solves one of the systems of equations
   1137      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1138      *
   1139      * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
   1140      *
   1141      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1142      * @param TransA The type of transpose applied to matrix A.
   1143      * @param Diag Specifies whether or not A is unit triangular.
   1144      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   1145      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   1146      * @param incX The increment for the elements of vector x, must be larger than zero.
   1147      */
   1148     public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
   1149         // TRSV is the same as TRMV
   1150         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
   1151         int N = A.getType().getY();
   1152 
   1153         boolean mUseIncSupp = isIncSupp();
   1154         long aID = A.getID(mRS);
   1155         long xID = X.getID(mRS);
   1156         if (mUseIncSupp) {
   1157             aID = getDummyAlloc(A);
   1158             xID = getDummyAlloc(X);
   1159         }
   1160         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1161 
   1162     }
   1163 
   1164     /**
   1165      * ZTRSV solves one of the systems of equations
   1166      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1167      *
   1168      * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
   1169      *
   1170      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1171      * @param TransA The type of transpose applied to matrix A.
   1172      * @param Diag Specifies whether or not A is unit triangular.
   1173      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   1174      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   1175      * @param incX The increment for the elements of vector x, must be larger than zero.
   1176      */
   1177     public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
   1178         // TRSV is the same as TRMV
   1179         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
   1180         int N = A.getType().getY();
   1181 
   1182         boolean mUseIncSupp = isIncSupp();
   1183         long aID = A.getID(mRS);
   1184         long xID = X.getID(mRS);
   1185         if (mUseIncSupp) {
   1186             aID = getDummyAlloc(A);
   1187             xID = getDummyAlloc(X);
   1188         }
   1189         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1190 
   1191     }
   1192 
   1193     /**
   1194      * STBSV solves one of the systems of equations
   1195      * A*x = b   or   A**T*x = b
   1196      *
   1197      * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
   1198      *
   1199      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1200      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1201      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1202      *           for i in range(0, n):
   1203      *              for j in range(i, min(i+k+1, n)):
   1204      *                  b[i, j-i] = a[i, j]
   1205      *
   1206      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1207      * @param TransA The type of transpose applied to matrix A.
   1208      * @param Diag Specifies whether or not A is unit triangular.
   1209      * @param K The number of off-diagonals of the matrix A
   1210      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1211      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1212      * @param incX The increment for the elements of vector x, must be larger than zero.
   1213      */
   1214     public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
   1215         // TBSV is the same as TRMV + K >= 0
   1216         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
   1217         int N = A.getType().getY();
   1218         if (K < 0) {
   1219             throw new RSRuntimeException("Number of diagonals must be positive");
   1220         }
   1221 
   1222         boolean mUseIncSupp = isIncSupp();
   1223         long aID = A.getID(mRS);
   1224         long xID = X.getID(mRS);
   1225         if (mUseIncSupp) {
   1226             aID = getDummyAlloc(A);
   1227             xID = getDummyAlloc(X);
   1228         }
   1229         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1230     }
   1231 
   1232     /**
   1233      * DTBSV solves one of the systems of equations
   1234      * A*x = b   or   A**T*x = b
   1235      *
   1236      * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
   1237      *
   1238      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1239      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1240      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1241      *           for i in range(0, n):
   1242      *              for j in range(i, min(i+k+1, n)):
   1243      *                  b[i, j-i] = a[i, j]
   1244      *
   1245      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1246      * @param TransA The type of transpose applied to matrix A.
   1247      * @param Diag Specifies whether or not A is unit triangular.
   1248      * @param K The number of off-diagonals of the matrix A
   1249      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   1250      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   1251      * @param incX The increment for the elements of vector x, must be larger than zero.
   1252      */
   1253     public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
   1254         // TBSV is the same as TRMV + K >= 0
   1255         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
   1256         int N = A.getType().getY();
   1257         if (K < 0) {
   1258             throw new RSRuntimeException("Number of diagonals must be positive");
   1259         }
   1260 
   1261         boolean mUseIncSupp = isIncSupp();
   1262         long aID = A.getID(mRS);
   1263         long xID = X.getID(mRS);
   1264         if (mUseIncSupp) {
   1265             aID = getDummyAlloc(A);
   1266             xID = getDummyAlloc(X);
   1267         }
   1268         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1269     }
   1270 
   1271     /**
   1272      * CTBSV solves one of the systems of equations
   1273      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1274      *
   1275      * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
   1276      *
   1277      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1278      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1279      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1280      *           for i in range(0, n):
   1281      *              for j in range(i, min(i+k+1, n)):
   1282      *                  b[i, j-i] = a[i, j]
   1283      *
   1284      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1285      * @param TransA The type of transpose applied to matrix A.
   1286      * @param Diag Specifies whether or not A is unit triangular.
   1287      * @param K The number of off-diagonals of the matrix A
   1288      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   1289      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   1290      * @param incX The increment for the elements of vector x, must be larger than zero.
   1291      */
   1292     public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
   1293         // TBSV is the same as TRMV + K >= 0
   1294         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
   1295         int N = A.getType().getY();
   1296         if (K < 0) {
   1297             throw new RSRuntimeException("Number of diagonals must be positive");
   1298         }
   1299 
   1300         boolean mUseIncSupp = isIncSupp();
   1301         long aID = A.getID(mRS);
   1302         long xID = X.getID(mRS);
   1303         if (mUseIncSupp) {
   1304             aID = getDummyAlloc(A);
   1305             xID = getDummyAlloc(X);
   1306         }
   1307         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1308     }
   1309 
   1310     /**
   1311      * ZTBSV solves one of the systems of equations
   1312      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1313      *
   1314      * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
   1315      *
   1316      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1317      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1318      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1319      *           for i in range(0, n):
   1320      *              for j in range(i, min(i+k+1, n)):
   1321      *                  b[i, j-i] = a[i, j]
   1322      *
   1323      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1324      * @param TransA The type of transpose applied to matrix A.
   1325      * @param Diag Specifies whether or not A is unit triangular.
   1326      * @param K The number of off-diagonals of the matrix A
   1327      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   1328      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   1329      * @param incX The increment for the elements of vector x, must be larger than zero.
   1330      */
   1331     public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
   1332         // TBSV is the same as TRMV + K >= 0
   1333         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
   1334         int N = A.getType().getY();
   1335         if (K < 0) {
   1336             throw new RSRuntimeException("Number of diagonals must be positive");
   1337         }
   1338 
   1339         boolean mUseIncSupp = isIncSupp();
   1340         long aID = A.getID(mRS);
   1341         long xID = X.getID(mRS);
   1342         if (mUseIncSupp) {
   1343             aID = getDummyAlloc(A);
   1344             xID = getDummyAlloc(X);
   1345         }
   1346         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1347     }
   1348 
   1349     /**
   1350      * STPSV solves one of the systems of equations
   1351      * A*x = b   or   A**T*x = b
   1352      *
   1353      * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
   1354      *
   1355      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1356      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1357      *       'a' to packed matrix 'b'.
   1358      *           k = 0
   1359      *           for i in range(0, n):
   1360      *              for j in range(i, n):
   1361      *                  b[k++] = a[i, j]
   1362      *
   1363      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1364      * @param TransA The type of transpose applied to matrix A.
   1365      * @param Diag Specifies whether or not A is unit triangular.
   1366      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
   1367      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1368      * @param incX The increment for the elements of vector x, must be larger than zero.
   1369      */
   1370     public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1371         // TPSV is same as TPMV
   1372         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1373 
   1374         boolean mUseIncSupp = isIncSupp();
   1375         long apID = Ap.getID(mRS);
   1376         long xID = X.getID(mRS);
   1377         if (mUseIncSupp) {
   1378             apID = getDummyAlloc(Ap);
   1379             xID = getDummyAlloc(X);
   1380         }
   1381         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1382     }
   1383 
   1384     /**
   1385      * DTPSV solves one of the systems of equations
   1386      * A*x = b   or   A**T*x = b
   1387      *
   1388      * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
   1389      *
   1390      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1391      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1392      *       'a' to packed matrix 'b'.
   1393      *           k = 0
   1394      *           for i in range(0, n):
   1395      *              for j in range(i, n):
   1396      *                  b[k++] = a[i, j]
   1397      *
   1398      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1399      * @param TransA The type of transpose applied to matrix A.
   1400      * @param Diag Specifies whether or not A is unit triangular.
   1401      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
   1402      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   1403      * @param incX The increment for the elements of vector x, must be larger than zero.
   1404      */
   1405     public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1406         // TPSV is same as TPMV
   1407         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1408 
   1409         boolean mUseIncSupp = isIncSupp();
   1410         long apID = Ap.getID(mRS);
   1411         long xID = X.getID(mRS);
   1412         if (mUseIncSupp) {
   1413             apID = getDummyAlloc(Ap);
   1414             xID = getDummyAlloc(X);
   1415         }
   1416         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1417     }
   1418 
   1419     /**
   1420      * CTPSV solves one of the systems of equations
   1421      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1422      *
   1423      * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
   1424      *
   1425      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1426      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1427      *       'a' to packed matrix 'b'.
   1428      *           k = 0
   1429      *           for i in range(0, n):
   1430      *              for j in range(i, n):
   1431      *                  b[k++] = a[i, j]
   1432      *
   1433      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1434      * @param TransA The type of transpose applied to matrix A.
   1435      * @param Diag Specifies whether or not A is unit triangular.
   1436      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
   1437      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   1438      * @param incX The increment for the elements of vector x, must be larger than zero.
   1439      */
   1440     public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1441         // TPSV is same as TPMV
   1442         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1443 
   1444         boolean mUseIncSupp = isIncSupp();
   1445         long apID = Ap.getID(mRS);
   1446         long xID = X.getID(mRS);
   1447         if (mUseIncSupp) {
   1448             apID = getDummyAlloc(Ap);
   1449             xID = getDummyAlloc(X);
   1450         }
   1451         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1452     }
   1453 
   1454     /**
   1455      * ZTPSV solves one of the systems of equations
   1456      * A*x = b   or   A**T*x = b   or   A**H*x = b
   1457      *
   1458      * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
   1459      *
   1460      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1461      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1462      *       'a' to packed matrix 'b'.
   1463      *           k = 0
   1464      *           for i in range(0, n):
   1465      *              for j in range(i, n):
   1466      *                  b[k++] = a[i, j]
   1467      *
   1468      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
   1469      * @param TransA The type of transpose applied to matrix A.
   1470      * @param Diag Specifies whether or not A is unit triangular.
   1471      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
   1472      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   1473      * @param incX The increment for the elements of vector x, must be larger than zero.
   1474      */
   1475     public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
   1476         // TPSV is same as TPMV
   1477         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
   1478 
   1479         boolean mUseIncSupp = isIncSupp();
   1480         long apID = Ap.getID(mRS);
   1481         long xID = X.getID(mRS);
   1482         if (mUseIncSupp) {
   1483             apID = getDummyAlloc(Ap);
   1484             xID = getDummyAlloc(X);
   1485         }
   1486         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
   1487     }
   1488 
   1489     /**
   1490      * Level 2, S and D only
   1491      */
   1492     static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
   1493         validateUplo(Uplo);
   1494         int N = A.getType().getY();
   1495         if (A.getType().getX() != N) {
   1496             throw new RSRuntimeException("A must be a square matrix for SYMV");
   1497         }
   1498         if (!A.getType().getElement().isCompatible(e) ||
   1499             !X.getType().getElement().isCompatible(e) ||
   1500             !Y.getType().getElement().isCompatible(e) ) {
   1501             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1502         }
   1503         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1504             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1505         }
   1506 
   1507         if (incX <= 0 || incY <= 0) {
   1508             throw new RSRuntimeException("Vector increments must be greater than 0");
   1509         }
   1510         int expectedXDim = 1 + (N - 1) * incX;
   1511         if (X.getType().getX() != expectedXDim) {
   1512             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
   1513         }
   1514         int expectedYDim = 1 + (N - 1) * incY;
   1515         if (Y.getType().getX() != expectedYDim) {
   1516             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
   1517         }
   1518         return N;
   1519     }
   1520     static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
   1521         validateUplo(Uplo);
   1522         if (!Ap.getType().getElement().isCompatible(e) ||
   1523             !X.getType().getElement().isCompatible(e) ||
   1524             !Y.getType().getElement().isCompatible(e)) {
   1525             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1526         }
   1527         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1528             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1529         }
   1530 
   1531         if (Ap.getType().getY() > 1) {
   1532             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
   1533         }
   1534 
   1535         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   1536         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   1537             throw new RSRuntimeException("Invalid dimension for Ap");
   1538         }
   1539         if (incX <= 0 || incY <= 0) {
   1540             throw new RSRuntimeException("Vector increments must be greater than 0");
   1541         }
   1542         int expectedXDim = 1 + (N - 1) * incX;
   1543         if (X.getType().getX() != expectedXDim) {
   1544             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
   1545         }
   1546         int expectedYDim = 1 + (N - 1) * incY;
   1547         if (Y.getType().getX() != expectedYDim) {
   1548             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
   1549         }
   1550 
   1551         return N;
   1552     }
   1553     static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   1554         if (!A.getType().getElement().isCompatible(e) ||
   1555             !X.getType().getElement().isCompatible(e) ||
   1556             !Y.getType().getElement().isCompatible(e) ) {
   1557             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1558         }
   1559 
   1560         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1561             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1562         }
   1563 
   1564         int M = A.getType().getY();
   1565         int N = A.getType().getX();
   1566 
   1567         if (N < 1 || M < 1) {
   1568             throw new RSRuntimeException("M and N must be 1 or greater for GER");
   1569         }
   1570         if (incX <= 0 || incY <= 0) {
   1571             throw new RSRuntimeException("Vector increments must be greater than 0");
   1572         }
   1573         int expectedXDim = 1 + (M - 1) * incX;
   1574         if (X.getType().getX() != expectedXDim) {
   1575             throw new RSRuntimeException("Incorrect vector dimensions for GER");
   1576         }
   1577         int expectedYDim = 1 + (N - 1) * incY;
   1578         if (Y.getType().getX() != expectedYDim) {
   1579             throw new RSRuntimeException("Incorrect vector dimensions for GER");
   1580         }
   1581 
   1582 
   1583     }
   1584     static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
   1585         validateUplo(Uplo);
   1586         if (!A.getType().getElement().isCompatible(e) ||
   1587             !X.getType().getElement().isCompatible(e)) {
   1588             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1589         }
   1590 
   1591         int N = A.getType().getX();
   1592 
   1593         if (X.getType().getY() > 1) {
   1594             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1595         }
   1596         if (N != A.getType().getY()) {
   1597             throw new RSRuntimeException("A must be a symmetric matrix");
   1598         }
   1599         if (incX <= 0) {
   1600             throw new RSRuntimeException("Vector increments must be greater than 0");
   1601         }
   1602         int expectedXDim = 1 + (N - 1) * incX;
   1603         if (X.getType().getX() != expectedXDim) {
   1604             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
   1605         }
   1606         return N;
   1607     }
   1608     static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
   1609         validateUplo(Uplo);
   1610         if (!Ap.getType().getElement().isCompatible(e) ||
   1611             !X.getType().getElement().isCompatible(e)) {
   1612             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1613         }
   1614         if (X.getType().getY() > 1) {
   1615             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1616         }
   1617 
   1618         if (Ap.getType().getY() > 1) {
   1619             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
   1620         }
   1621 
   1622         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   1623         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   1624             throw new RSRuntimeException("Invalid dimension for Ap");
   1625         }
   1626         if (incX <= 0) {
   1627             throw new RSRuntimeException("Vector increments must be greater than 0");
   1628         }
   1629         int expectedXDim = 1 + (N - 1) * incX;
   1630         if (X.getType().getX() != expectedXDim) {
   1631             throw new RSRuntimeException("Incorrect vector dimensions for SPR");
   1632         }
   1633 
   1634         return N;
   1635     }
   1636 
   1637     static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   1638         validateUplo(Uplo);
   1639         if (!A.getType().getElement().isCompatible(e) ||
   1640             !X.getType().getElement().isCompatible(e) ||
   1641             !Y.getType().getElement().isCompatible(e)) {
   1642             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1643         }
   1644 
   1645         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1646             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1647         }
   1648 
   1649         int N = A.getType().getX();
   1650 
   1651         if (N != A.getType().getY()) {
   1652             throw new RSRuntimeException("A must be a symmetric matrix");
   1653         }
   1654         if (incX <= 0 || incY <= 0) {
   1655             throw new RSRuntimeException("Vector increments must be greater than 0");
   1656         }
   1657         int expectedXDim = 1 + (N - 1) * incX;
   1658         int expectedYDim = 1 + (N - 1) * incY;
   1659         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
   1660             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
   1661         }
   1662         return N;
   1663 
   1664     }
   1665     static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   1666         validateUplo(Uplo);
   1667         if (!Ap.getType().getElement().isCompatible(e) ||
   1668             !X.getType().getElement().isCompatible(e) ||
   1669             !Y.getType().getElement().isCompatible(e)) {
   1670             throw new RSRuntimeException("Called BLAS with wrong Element type");
   1671         }
   1672         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   1673             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   1674         }
   1675 
   1676         if (Ap.getType().getY() > 1) {
   1677             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
   1678         }
   1679 
   1680         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
   1681         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
   1682             throw new RSRuntimeException("Invalid dimension for Ap");
   1683         }
   1684         if (incX <= 0 || incY <= 0) {
   1685             throw new RSRuntimeException("Vector increments must be greater than 0");
   1686         }
   1687         int expectedXDim = 1 + (N - 1) * incX;
   1688         int expectedYDim = 1 + (N - 1) * incY;
   1689         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
   1690             throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
   1691         }
   1692 
   1693         return N;
   1694     }
   1695 
   1696     /**
   1697      * SSYMV performs the matrix-vector operation
   1698      * y := alpha*A*x + beta*y
   1699      *
   1700      * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
   1701      *
   1702      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   1703      * @param alpha The scalar alpha.
   1704      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1705      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1706      * @param incX The increment for the elements of vector x, must be larger than zero.
   1707      * @param beta The scalar beta.
   1708      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1709      * @param incY The increment for the elements of vector y, must be larger than zero.
   1710      */
   1711     public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
   1712         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
   1713 
   1714         boolean mUseIncSupp = isIncSupp();
   1715         long aID = A.getID(mRS);
   1716         long xID = X.getID(mRS);
   1717         long yID = Y.getID(mRS);
   1718         if (mUseIncSupp) {
   1719             aID = getDummyAlloc(A);
   1720             xID = getDummyAlloc(X);
   1721             yID = getDummyAlloc(Y);
   1722         }
   1723         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   1724     }
   1725 
   1726     /**
   1727      * SSBMV performs the matrix-vector operation
   1728      * y := alpha*A*x + beta*y
   1729      *
   1730      * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
   1731      *
   1732      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1733      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1734      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1735      *           for i in range(0, n):
   1736      *              for j in range(i, min(i+k+1, n)):
   1737      *                  b[i, j-i] = a[i, j]
   1738      *
   1739      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
   1740      * @param K The number of off-diagonals of the matrix A
   1741      * @param alpha The scalar alpha.
   1742      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1743      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1744      * @param incX The increment for the elements of vector x, must be larger than zero.
   1745      * @param beta The scalar beta.
   1746      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1747      * @param incY The increment for the elements of vector y, must be larger than zero.
   1748      */
   1749     public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
   1750         // SBMV is the same as SYMV + K >= 0
   1751         if (K < 0) {
   1752             throw new RSRuntimeException("K must be greater than or equal to 0");
   1753         }
   1754         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
   1755 
   1756         boolean mUseIncSupp = isIncSupp();
   1757         long aID = A.getID(mRS);
   1758         long xID = X.getID(mRS);
   1759         long yID = Y.getID(mRS);
   1760         if (mUseIncSupp) {
   1761             aID = getDummyAlloc(A);
   1762             xID = getDummyAlloc(X);
   1763             yID = getDummyAlloc(Y);
   1764         }
   1765         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   1766     }
   1767 
   1768     /**
   1769      * SSPMV performs the matrix-vector operation
   1770      * y := alpha*A*x + beta*y
   1771      *
   1772      * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
   1773      *
   1774      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1775      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1776      *       'a' to packed matrix 'b'.
   1777      *           k = 0
   1778      *           for i in range(0, n):
   1779      *              for j in range(i, n):
   1780      *                  b[k++] = a[i, j]
   1781      *
   1782      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
   1783      * @param alpha The scalar alpha.
   1784      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1785      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1786      * @param incX The increment for the elements of vector x, must be larger than zero.
   1787      * @param beta The scalar beta.
   1788      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1789      * @param incY The increment for the elements of vector y, must be larger than zero.
   1790      */
   1791     public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
   1792         int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
   1793 
   1794         boolean mUseIncSupp = isIncSupp();
   1795         long apID = Ap.getID(mRS);
   1796         long xID = X.getID(mRS);
   1797         long yID = Y.getID(mRS);
   1798         if (mUseIncSupp) {
   1799             apID = getDummyAlloc(Ap);
   1800             xID = getDummyAlloc(X);
   1801             yID = getDummyAlloc(Y);
   1802         }
   1803         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   1804     }
   1805 
   1806     /**
   1807      * SGER performs the rank 1 operation
   1808      * A := alpha*x*y**T + A
   1809      *
   1810      * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
   1811      *
   1812      * @param alpha The scalar alpha.
   1813      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1814      * @param incX The increment for the elements of vector x, must be larger than zero.
   1815      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1816      * @param incY The increment for the elements of vector y, must be larger than zero.
   1817      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1818      */
   1819     public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   1820         int M = A.getType().getY();
   1821         int N = A.getType().getX();
   1822         validateGER(Element.F32(mRS), X, incX, Y, incY, A);
   1823 
   1824         boolean mUseIncSupp = isIncSupp();
   1825         long aID = A.getID(mRS);
   1826         long xID = X.getID(mRS);
   1827         long yID = Y.getID(mRS);
   1828         if (mUseIncSupp) {
   1829             aID = getDummyAlloc(A);
   1830             xID = getDummyAlloc(X);
   1831             yID = getDummyAlloc(Y);
   1832         }
   1833         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
   1834     }
   1835 
   1836     /**
   1837      * SSYR performs the rank 1 operation
   1838      * A := alpha*x*x**T + A
   1839      *
   1840      * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
   1841      *
   1842      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   1843      * @param alpha The scalar alpha.
   1844      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1845      * @param incX The increment for the elements of vector x, must be larger than zero.
   1846      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1847      */
   1848     public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
   1849         int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
   1850 
   1851         boolean mUseIncSupp = isIncSupp();
   1852         long aID = A.getID(mRS);
   1853         long xID = X.getID(mRS);
   1854         if (mUseIncSupp) {
   1855             aID = getDummyAlloc(A);
   1856             xID = getDummyAlloc(X);
   1857         }
   1858         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
   1859     }
   1860 
   1861     /**
   1862      * SSPR performs the rank 1 operation
   1863      * A := alpha*x*x**T + A
   1864      *
   1865      * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
   1866      *
   1867      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1868      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1869      *       'a' to packed matrix 'b'.
   1870      *           k = 0
   1871      *           for i in range(0, n):
   1872      *              for j in range(i, n):
   1873      *                  b[k++] = a[i, j]
   1874      *
   1875      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   1876      * @param alpha The scalar alpha.
   1877      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1878      * @param incX The increment for the elements of vector x, must be larger than zero.
   1879      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1880      */
   1881     public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
   1882         int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
   1883 
   1884         boolean mUseIncSupp = isIncSupp();
   1885         long apID = Ap.getID(mRS);
   1886         long xID = X.getID(mRS);
   1887         if (mUseIncSupp) {
   1888             apID = getDummyAlloc(Ap);
   1889             xID = getDummyAlloc(X);
   1890         }
   1891         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
   1892     }
   1893 
   1894     /**
   1895      * SSYR2 performs the symmetric rank 2 operation
   1896      * A := alpha*x*y**T + alpha*y*x**T + A
   1897      *
   1898      * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
   1899      *
   1900      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   1901      * @param alpha The scalar alpha.
   1902      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1903      * @param incX The increment for the elements of vector x, must be larger than zero.
   1904      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1905      * @param incY The increment for the elements of vector y, must be larger than zero.
   1906      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1907      */
   1908     public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   1909         int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
   1910 
   1911         boolean mUseIncSupp = isIncSupp();
   1912         long aID = A.getID(mRS);
   1913         long xID = X.getID(mRS);
   1914         long yID = Y.getID(mRS);
   1915         if (mUseIncSupp) {
   1916             aID = getDummyAlloc(A);
   1917             xID = getDummyAlloc(X);
   1918             yID = getDummyAlloc(Y);
   1919         }
   1920         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   1921     }
   1922 
   1923     /**
   1924      * SSPR2 performs the symmetric rank 2 operation
   1925      * A := alpha*x*y**T + alpha*y*x**T + A
   1926      *
   1927      * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
   1928      *
   1929      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   1930      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   1931      *       'a' to packed matrix 'b'.
   1932      *           k = 0
   1933      *           for i in range(0, n):
   1934      *              for j in range(i, n):
   1935      *                  b[k++] = a[i, j]
   1936      *
   1937      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   1938      * @param alpha The scalar alpha.
   1939      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
   1940      * @param incX The increment for the elements of vector x, must be larger than zero.
   1941      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
   1942      * @param incY The increment for the elements of vector y, must be larger than zero.
   1943      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
   1944      */
   1945     public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   1946         int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
   1947 
   1948         boolean mUseIncSupp = isIncSupp();
   1949         long apID = Ap.getID(mRS);
   1950         long xID = X.getID(mRS);
   1951         long yID = Y.getID(mRS);
   1952         if (mUseIncSupp) {
   1953             apID = getDummyAlloc(Ap);
   1954             xID = getDummyAlloc(X);
   1955             yID = getDummyAlloc(Y);
   1956         }
   1957         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
   1958     }
   1959 
   1960     /**
   1961      * DSYMV performs the matrix-vector operation
   1962      * y := alpha*A*x + beta*y
   1963      *
   1964      * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
   1965      *
   1966      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   1967      * @param alpha The scalar alpha.
   1968      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   1969      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   1970      * @param incX The increment for the elements of vector x, must be larger than zero.
   1971      * @param beta The scalar beta.
   1972      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   1973      * @param incY The increment for the elements of vector y, must be larger than zero.
   1974      */
   1975     public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
   1976         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
   1977 
   1978         boolean mUseIncSupp = isIncSupp();
   1979         long aID = A.getID(mRS);
   1980         long xID = X.getID(mRS);
   1981         long yID = Y.getID(mRS);
   1982         if (mUseIncSupp) {
   1983             aID = getDummyAlloc(A);
   1984             xID = getDummyAlloc(X);
   1985             yID = getDummyAlloc(Y);
   1986         }
   1987         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   1988     }
   1989 
   1990     /**
   1991      * DSBMV performs the matrix-vector operation
   1992      * y := alpha*A*x + beta*y
   1993      *
   1994      * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
   1995      *
   1996      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   1997      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   1998      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   1999      *           for i in range(0, n):
   2000      *              for j in range(i, min(i+k+1, n)):
   2001      *                  b[i, j-i] = a[i, j]
   2002      *
   2003      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
   2004      * @param K The number of off-diagonals of the matrix A
   2005      * @param alpha The scalar alpha.
   2006      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2007      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2008      * @param incX The increment for the elements of vector x, must be larger than zero.
   2009      * @param beta The scalar beta.
   2010      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   2011      * @param incY The increment for the elements of vector y, must be larger than zero.
   2012      */
   2013     public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
   2014         // SBMV is the same as SYMV + K >= 0
   2015         if (K < 0) {
   2016             throw new RSRuntimeException("K must be greater than or equal to 0");
   2017         }
   2018         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
   2019 
   2020         boolean mUseIncSupp = isIncSupp();
   2021         long aID = A.getID(mRS);
   2022         long xID = X.getID(mRS);
   2023         long yID = Y.getID(mRS);
   2024         if (mUseIncSupp) {
   2025             aID = getDummyAlloc(A);
   2026             xID = getDummyAlloc(X);
   2027             yID = getDummyAlloc(Y);
   2028         }
   2029         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   2030     }
   2031 
   2032     /**
   2033      * DSPMV performs the matrix-vector operation
   2034      * y := alpha*A*x + beta*y
   2035      *
   2036      * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
   2037      *
   2038      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2039      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2040      *       'a' to packed matrix 'b'.
   2041      *           k = 0
   2042      *           for i in range(0, n):
   2043      *              for j in range(i, n):
   2044      *                  b[k++] = a[i, j]
   2045      *
   2046      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
   2047      * @param alpha The scalar alpha.
   2048      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2049      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2050      * @param incX The increment for the elements of vector x, must be larger than zero.
   2051      * @param beta The scalar beta.
   2052      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   2053      * @param incY The increment for the elements of vector y, must be larger than zero.
   2054      */
   2055     public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
   2056         int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
   2057 
   2058         boolean mUseIncSupp = isIncSupp();
   2059         long apID = Ap.getID(mRS);
   2060         long xID = X.getID(mRS);
   2061         long yID = Y.getID(mRS);
   2062         if (mUseIncSupp) {
   2063             apID = getDummyAlloc(Ap);
   2064             xID = getDummyAlloc(X);
   2065             yID = getDummyAlloc(Y);
   2066         }
   2067         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
   2068     }
   2069 
   2070     /**
   2071      * DGER performs the rank 1 operation
   2072      * A := alpha*x*y**T + A
   2073      *
   2074      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
   2075      *
   2076      * @param alpha The scalar alpha.
   2077      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2078      * @param incX The increment for the elements of vector x, must be larger than zero.
   2079      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   2080      * @param incY The increment for the elements of vector y, must be larger than zero.
   2081      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2082      */
   2083     public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2084         int M = A.getType().getY();
   2085         int N = A.getType().getX();
   2086         validateGER(Element.F64(mRS), X, incX, Y, incY, A);
   2087 
   2088         boolean mUseIncSupp = isIncSupp();
   2089         long aID = A.getID(mRS);
   2090         long xID = X.getID(mRS);
   2091         long yID = Y.getID(mRS);
   2092         if (mUseIncSupp) {
   2093             aID = getDummyAlloc(A);
   2094             xID = getDummyAlloc(X);
   2095             yID = getDummyAlloc(Y);
   2096         }
   2097         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
   2098     }
   2099 
   2100     /**
   2101      * DSYR performs the rank 1 operation
   2102      * A := alpha*x*x**T + A
   2103      *
   2104      * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
   2105      *
   2106      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2107      * @param alpha The scalar alpha.
   2108      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2109      * @param incX The increment for the elements of vector x, must be larger than zero.
   2110      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2111      */
   2112     public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
   2113         int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
   2114 
   2115         boolean mUseIncSupp = isIncSupp();
   2116         long aID = A.getID(mRS);
   2117         long xID = X.getID(mRS);
   2118         if (mUseIncSupp) {
   2119             aID = getDummyAlloc(A);
   2120             xID = getDummyAlloc(X);
   2121         }
   2122         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
   2123     }
   2124 
   2125     /**
   2126      * DSPR performs the rank 1 operation
   2127      * A := alpha*x*x**T + A
   2128      *
   2129      * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
   2130      *
   2131      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2132      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2133      *       'a' to packed matrix 'b'.
   2134      *           k = 0
   2135      *           for i in range(0, n):
   2136      *              for j in range(i, n):
   2137      *                  b[k++] = a[i, j]
   2138      *
   2139      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2140      * @param alpha The scalar alpha.
   2141      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2142      * @param incX The increment for the elements of vector x, must be larger than zero.
   2143      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2144      */
   2145     public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
   2146         int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
   2147 
   2148         boolean mUseIncSupp = isIncSupp();
   2149         long apID = Ap.getID(mRS);
   2150         long xID = X.getID(mRS);
   2151         if (mUseIncSupp) {
   2152             apID = getDummyAlloc(Ap);
   2153             xID = getDummyAlloc(X);
   2154         }
   2155         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
   2156     }
   2157 
   2158     /**
   2159      * DSYR2 performs the symmetric rank 2 operation
   2160      * A := alpha*x*y**T + alpha*y*x**T + A
   2161      *
   2162      * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
   2163      *
   2164      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2165      * @param alpha The scalar alpha.
   2166      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2167      * @param incX The increment for the elements of vector x, must be larger than zero.
   2168      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   2169      * @param incY The increment for the elements of vector y, must be larger than zero.
   2170      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2171      */
   2172     public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2173         int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
   2174 
   2175         boolean mUseIncSupp = isIncSupp();
   2176         long aID = A.getID(mRS);
   2177         long xID = X.getID(mRS);
   2178         long yID = Y.getID(mRS);
   2179         if (mUseIncSupp) {
   2180             aID = getDummyAlloc(A);
   2181             xID = getDummyAlloc(X);
   2182             yID = getDummyAlloc(Y);
   2183         }
   2184         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2185     }
   2186 
   2187     /**
   2188      * DSPR2 performs the symmetric rank 2 operation
   2189      * A := alpha*x*y**T + alpha*y*x**T + A
   2190      *
   2191      * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
   2192      *
   2193      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2194      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2195      *       'a' to packed matrix 'b'.
   2196      *           k = 0
   2197      *           for i in range(0, n):
   2198      *              for j in range(i, n):
   2199      *                  b[k++] = a[i, j]
   2200      *
   2201      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2202      * @param alpha The scalar alpha.
   2203      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
   2204      * @param incX The increment for the elements of vector x, must be larger than zero.
   2205      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
   2206      * @param incY The increment for the elements of vector y, must be larger than zero.
   2207      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2208      */
   2209     public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   2210         int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
   2211 
   2212         boolean mUseIncSupp = isIncSupp();
   2213         long apID = Ap.getID(mRS);
   2214         long xID = X.getID(mRS);
   2215         long yID = Y.getID(mRS);
   2216         if (mUseIncSupp) {
   2217             apID = getDummyAlloc(Ap);
   2218             xID = getDummyAlloc(X);
   2219             yID = getDummyAlloc(Y);
   2220         }
   2221         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
   2222     }
   2223 
   2224 
   2225     /**
   2226      * Level 2, C and Z only
   2227      */
   2228 
   2229     static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2230         if (!A.getType().getElement().isCompatible(e) ||
   2231             !X.getType().getElement().isCompatible(e) ||
   2232             !Y.getType().getElement().isCompatible(e)) {
   2233             throw new RSRuntimeException("Called BLAS with wrong Element type");
   2234         }
   2235         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
   2236             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
   2237         }
   2238 
   2239         int M = A.getType().getY();
   2240         int N = A.getType().getX();
   2241         if (incX <= 0 || incY <= 0) {
   2242             throw new RSRuntimeException("Vector increments must be greater than 0");
   2243         }
   2244         int expectedXDim = 1 + (M - 1) * incX;
   2245         if (X.getType().getX() != expectedXDim) {
   2246             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
   2247         }
   2248         int expectedYDim = 1 + (N - 1) * incY;
   2249         if (Y.getType().getX() != expectedYDim) {
   2250             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
   2251         }
   2252 
   2253     }
   2254 
   2255     /**
   2256      * CHEMV performs the matrix-vector operation
   2257      * y := alpha*A*x + beta*y
   2258      *
   2259      * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
   2260      *
   2261      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2262      * @param alpha The scalar alpha.
   2263      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2264      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2265      * @param incX The increment for the elements of vector x, must be larger than zero.
   2266      * @param beta The scalar beta.
   2267      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2268      * @param incY The increment for the elements of vector y, must be larger than zero.
   2269      */
   2270     public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
   2271         // HEMV is the same as SYR2 validation-wise
   2272         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
   2273 
   2274         boolean mUseIncSupp = isIncSupp();
   2275         long aID = A.getID(mRS);
   2276         long xID = X.getID(mRS);
   2277         long yID = Y.getID(mRS);
   2278         if (mUseIncSupp) {
   2279             aID = getDummyAlloc(A);
   2280             xID = getDummyAlloc(X);
   2281             yID = getDummyAlloc(Y);
   2282         }
   2283         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2284     }
   2285 
   2286     /**
   2287      * CHBMV performs the matrix-vector operation
   2288      * y := alpha*A*x + beta*y
   2289      *
   2290      * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
   2291      *
   2292      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   2293      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   2294      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   2295      *           for i in range(0, n):
   2296      *              for j in range(i, min(i+k+1, n)):
   2297      *                  b[i, j-i] = a[i, j]
   2298      *
   2299      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
   2300      * @param K The number of off-diagonals of the matrix A
   2301      * @param alpha The scalar alpha.
   2302      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2303      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2304      * @param incX The increment for the elements of vector x, must be larger than zero.
   2305      * @param beta The scalar beta.
   2306      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2307      * @param incY The increment for the elements of vector y, must be larger than zero.
   2308      */
   2309     public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
   2310         // HBMV is the same as SYR2 validation-wise
   2311         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
   2312         if (K < 0) {
   2313             throw new RSRuntimeException("K must be 0 or greater for HBMV");
   2314         }
   2315 
   2316         boolean mUseIncSupp = isIncSupp();
   2317         long aID = A.getID(mRS);
   2318         long xID = X.getID(mRS);
   2319         long yID = Y.getID(mRS);
   2320         if (mUseIncSupp) {
   2321             aID = getDummyAlloc(A);
   2322             xID = getDummyAlloc(X);
   2323             yID = getDummyAlloc(Y);
   2324         }
   2325         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2326     }
   2327 
   2328     /**
   2329      * CHPMV performs the matrix-vector operation
   2330      * y := alpha*A*x + beta*y
   2331      *
   2332      * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
   2333      *
   2334      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2335      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2336      *       'a' to packed matrix 'b'.
   2337      *           k = 0
   2338      *           for i in range(0, n):
   2339      *              for j in range(i, n):
   2340      *                  b[k++] = a[i, j]
   2341      *
   2342      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
   2343      * @param alpha The scalar alpha.
   2344      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2345      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2346      * @param incX The increment for the elements of vector x, must be larger than zero.
   2347      * @param beta The scalar beta.
   2348      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2349      * @param incY The increment for the elements of vector y, must be larger than zero.
   2350      */
   2351     public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
   2352         // HPMV is the same as SPR2
   2353         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
   2354 
   2355         boolean mUseIncSupp = isIncSupp();
   2356         long apID = Ap.getID(mRS);
   2357         long xID = X.getID(mRS);
   2358         long yID = Y.getID(mRS);
   2359         if (mUseIncSupp) {
   2360             apID = getDummyAlloc(Ap);
   2361             xID = getDummyAlloc(X);
   2362             yID = getDummyAlloc(Y);
   2363         }
   2364         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2365     }
   2366 
   2367     /**
   2368      * CGERU performs the rank 1 operation
   2369      * A := alpha*x*y**T + A
   2370      *
   2371      * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
   2372      *
   2373      * @param alpha The scalar alpha.
   2374      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2375      * @param incX The increment for the elements of vector x, must be larger than zero.
   2376      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2377      * @param incY The increment for the elements of vector y, must be larger than zero.
   2378      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2379      */
   2380     public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2381         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
   2382         int M = A.getType().getY();
   2383         int N = A.getType().getX();
   2384 
   2385         boolean mUseIncSupp = isIncSupp();
   2386         long aID = A.getID(mRS);
   2387         long xID = X.getID(mRS);
   2388         long yID = Y.getID(mRS);
   2389         if (mUseIncSupp) {
   2390             aID = getDummyAlloc(A);
   2391             xID = getDummyAlloc(X);
   2392             yID = getDummyAlloc(Y);
   2393         }
   2394         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2395     }
   2396 
   2397     /**
   2398      * CGERC performs the rank 1 operation
   2399      * A := alpha*x*y**H + A
   2400      *
   2401      * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
   2402      *
   2403      * @param alpha The scalar alpha.
   2404      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2405      * @param incX The increment for the elements of vector x, must be larger than zero.
   2406      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2407      * @param incY The increment for the elements of vector y, must be larger than zero.
   2408      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2409      */
   2410     public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2411         // same as GERU
   2412         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
   2413         int M = A.getType().getY();
   2414         int N = A.getType().getX();
   2415 
   2416         boolean mUseIncSupp = isIncSupp();
   2417         long aID = A.getID(mRS);
   2418         long xID = X.getID(mRS);
   2419         long yID = Y.getID(mRS);
   2420         if (mUseIncSupp) {
   2421             aID = getDummyAlloc(A);
   2422             xID = getDummyAlloc(X);
   2423             yID = getDummyAlloc(Y);
   2424         }
   2425         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2426     }
   2427 
   2428     /**
   2429      * CHER performs the rank 1 operation
   2430      * A := alpha*x*x**H + A
   2431      *
   2432      * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
   2433      *
   2434      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2435      * @param alpha The scalar alpha.
   2436      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2437      * @param incX The increment for the elements of vector x, must be larger than zero.
   2438      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2439      */
   2440     public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
   2441         // same as SYR
   2442         int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
   2443 
   2444         boolean mUseIncSupp = isIncSupp();
   2445         long aID = A.getID(mRS);
   2446         long xID = X.getID(mRS);
   2447         if (mUseIncSupp) {
   2448             aID = getDummyAlloc(A);
   2449             xID = getDummyAlloc(X);
   2450         }
   2451         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
   2452     }
   2453 
   2454     /**
   2455      * CHPR performs the rank 1 operation
   2456      * A := alpha*x*x**H + A
   2457      *
   2458      * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
   2459      *
   2460      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2461      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2462      *       'a' to packed matrix 'b'.
   2463      *           k = 0
   2464      *           for i in range(0, n):
   2465      *              for j in range(i, n):
   2466      *                  b[k++] = a[i, j]
   2467      *
   2468      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2469      * @param alpha The scalar alpha.
   2470      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2471      * @param incX The increment for the elements of vector x, must be larger than zero.
   2472      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2473      */
   2474     public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
   2475         // equivalent to SPR for validation
   2476         int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
   2477 
   2478         boolean mUseIncSupp = isIncSupp();
   2479         long apID = Ap.getID(mRS);
   2480         long xID = X.getID(mRS);
   2481         if (mUseIncSupp) {
   2482             apID = getDummyAlloc(Ap);
   2483             xID = getDummyAlloc(X);
   2484         }
   2485         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
   2486     }
   2487 
   2488     /**
   2489      * CHER2 performs the symmetric rank 2 operation
   2490      * A := alpha*x*y**H + alpha*y*x**H + A
   2491      *
   2492      * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
   2493      *
   2494      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2495      * @param alpha The scalar alpha.
   2496      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2497      * @param incX The increment for the elements of vector x, must be larger than zero.
   2498      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2499      * @param incY The increment for the elements of vector y, must be larger than zero.
   2500      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2501      */
   2502     public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2503         // same as SYR2
   2504         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
   2505 
   2506         boolean mUseIncSupp = isIncSupp();
   2507         long aID = A.getID(mRS);
   2508         long xID = X.getID(mRS);
   2509         long yID = Y.getID(mRS);
   2510         if (mUseIncSupp) {
   2511             aID = getDummyAlloc(A);
   2512             xID = getDummyAlloc(X);
   2513             yID = getDummyAlloc(Y);
   2514         }
   2515         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2516     }
   2517 
   2518     /**
   2519      * CHPR2 performs the symmetric rank 2 operation
   2520      * A := alpha*x*y**H + alpha*y*x**H + A
   2521      *
   2522      * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
   2523      *
   2524      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2525      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2526      *       'a' to packed matrix 'b'.
   2527      *           k = 0
   2528      *           for i in range(0, n):
   2529      *              for j in range(i, n):
   2530      *                  b[k++] = a[i, j]
   2531      *
   2532      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2533      * @param alpha The scalar alpha.
   2534      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
   2535      * @param incX The increment for the elements of vector x, must be larger than zero.
   2536      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
   2537      * @param incY The increment for the elements of vector y, must be larger than zero.
   2538      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   2539      */
   2540     public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   2541         // same as SPR2
   2542         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
   2543 
   2544         boolean mUseIncSupp = isIncSupp();
   2545         long apID = Ap.getID(mRS);
   2546         long xID = X.getID(mRS);
   2547         long yID = Y.getID(mRS);
   2548         if (mUseIncSupp) {
   2549             apID = getDummyAlloc(Ap);
   2550             xID = getDummyAlloc(X);
   2551             yID = getDummyAlloc(Y);
   2552         }
   2553         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
   2554     }
   2555 
   2556     /**
   2557      * ZHEMV performs the matrix-vector operation
   2558      * y := alpha*A*x + beta*y
   2559      *
   2560      * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
   2561      *
   2562      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2563      * @param alpha The scalar alpha.
   2564      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2565      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2566      * @param incX The increment for the elements of vector x, must be larger than zero.
   2567      * @param beta The scalar beta.
   2568      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2569      * @param incY The increment for the elements of vector y, must be larger than zero.
   2570      */
   2571     public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
   2572         // HEMV is the same as SYR2 validation-wise
   2573         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
   2574 
   2575         boolean mUseIncSupp = isIncSupp();
   2576         long aID = A.getID(mRS);
   2577         long xID = X.getID(mRS);
   2578         long yID = Y.getID(mRS);
   2579         if (mUseIncSupp) {
   2580             aID = getDummyAlloc(A);
   2581             xID = getDummyAlloc(X);
   2582             yID = getDummyAlloc(Y);
   2583         }
   2584         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2585     }
   2586 
   2587     /**
   2588      * ZHBMV performs the matrix-vector operation
   2589      * y := alpha*A*x + beta*y
   2590      *
   2591      * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
   2592      *
   2593      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
   2594      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
   2595      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
   2596      *           for i in range(0, n):
   2597      *              for j in range(i, min(i+k+1, n)):
   2598      *                  b[i, j-i] = a[i, j]
   2599      *
   2600      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
   2601      * @param K The number of off-diagonals of the matrix A
   2602      * @param alpha The scalar alpha.
   2603      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2604      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2605      * @param incX The increment for the elements of vector x, must be larger than zero.
   2606      * @param beta The scalar beta.
   2607      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2608      * @param incY The increment for the elements of vector y, must be larger than zero.
   2609      */
   2610     public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
   2611         // HBMV is the same as SYR2 validation-wise
   2612         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
   2613         if (K < 0) {
   2614             throw new RSRuntimeException("K must be 0 or greater for HBMV");
   2615         }
   2616 
   2617         boolean mUseIncSupp = isIncSupp();
   2618         long aID = A.getID(mRS);
   2619         long xID = X.getID(mRS);
   2620         long yID = Y.getID(mRS);
   2621         if (mUseIncSupp) {
   2622             aID = getDummyAlloc(A);
   2623             xID = getDummyAlloc(X);
   2624             yID = getDummyAlloc(Y);
   2625         }
   2626         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2627     }
   2628 
   2629     /**
   2630      * ZHPMV performs the matrix-vector operation
   2631      * y := alpha*A*x + beta*y
   2632      *
   2633      * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
   2634      *
   2635      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2636      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2637      *       'a' to packed matrix 'b'.
   2638      *           k = 0
   2639      *           for i in range(0, n):
   2640      *              for j in range(i, n):
   2641      *                  b[k++] = a[i, j]
   2642      *
   2643      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
   2644      * @param alpha The scalar alpha.
   2645      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2646      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2647      * @param incX The increment for the elements of vector x, must be larger than zero.
   2648      * @param beta The scalar beta.
   2649      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2650      * @param incY The increment for the elements of vector y, must be larger than zero.
   2651      */
   2652     public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
   2653         // HPMV is the same as SPR2
   2654         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
   2655 
   2656         boolean mUseIncSupp = isIncSupp();
   2657         long apID = Ap.getID(mRS);
   2658         long xID = X.getID(mRS);
   2659         long yID = Y.getID(mRS);
   2660         if (mUseIncSupp) {
   2661             apID = getDummyAlloc(Ap);
   2662             xID = getDummyAlloc(X);
   2663             yID = getDummyAlloc(Y);
   2664         }
   2665         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
   2666     }
   2667 
   2668     /**
   2669      * ZGERU performs the rank 1 operation
   2670      * A := alpha*x*y**T + A
   2671      *
   2672      * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
   2673      *
   2674      * @param alpha The scalar alpha.
   2675      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2676      * @param incX The increment for the elements of vector x, must be larger than zero.
   2677      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2678      * @param incY The increment for the elements of vector y, must be larger than zero.
   2679      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2680      */
   2681     public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2682         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
   2683         int M = A.getType().getY();
   2684         int N = A.getType().getX();
   2685 
   2686         boolean mUseIncSupp = isIncSupp();
   2687         long aID = A.getID(mRS);
   2688         long xID = X.getID(mRS);
   2689         long yID = Y.getID(mRS);
   2690         if (mUseIncSupp) {
   2691             aID = getDummyAlloc(A);
   2692             xID = getDummyAlloc(X);
   2693             yID = getDummyAlloc(Y);
   2694         }
   2695         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2696     }
   2697 
   2698     /**
   2699      * ZGERC performs the rank 1 operation
   2700      * A := alpha*x*y**H + A
   2701      *
   2702      * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
   2703      *
   2704      * @param alpha The scalar alpha.
   2705      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2706      * @param incX The increment for the elements of vector x, must be larger than zero.
   2707      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2708      * @param incY The increment for the elements of vector y, must be larger than zero.
   2709      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2710      */
   2711     public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2712         // same as GERU
   2713         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
   2714         int M = A.getType().getY();
   2715         int N = A.getType().getX();
   2716 
   2717         boolean mUseIncSupp = isIncSupp();
   2718         long aID = A.getID(mRS);
   2719         long xID = X.getID(mRS);
   2720         long yID = Y.getID(mRS);
   2721         if (mUseIncSupp) {
   2722             aID = getDummyAlloc(A);
   2723             xID = getDummyAlloc(X);
   2724             yID = getDummyAlloc(Y);
   2725         }
   2726         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2727     }
   2728 
   2729     /**
   2730      * ZHER performs the rank 1 operation
   2731      * A := alpha*x*x**H + A
   2732      *
   2733      * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
   2734      *
   2735      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2736      * @param alpha The scalar alpha.
   2737      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2738      * @param incX The increment for the elements of vector x, must be larger than zero.
   2739      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2740      */
   2741     public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
   2742         // same as SYR
   2743         int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
   2744 
   2745         boolean mUseIncSupp = isIncSupp();
   2746         long aID = A.getID(mRS);
   2747         long xID = X.getID(mRS);
   2748         if (mUseIncSupp) {
   2749             aID = getDummyAlloc(A);
   2750             xID = getDummyAlloc(X);
   2751         }
   2752         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
   2753     }
   2754 
   2755     /**
   2756      * ZHPR performs the rank 1 operation
   2757      * A := alpha*x*x**H + A
   2758      *
   2759      * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
   2760      *
   2761      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2762      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2763      *       'a' to packed matrix 'b'.
   2764      *           k = 0
   2765      *           for i in range(0, n):
   2766      *              for j in range(i, n):
   2767      *                  b[k++] = a[i, j]
   2768      *
   2769      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2770      * @param alpha The scalar alpha.
   2771      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2772      * @param incX The increment for the elements of vector x, must be larger than zero.
   2773      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2774      */
   2775     public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
   2776         // equivalent to SPR for validation
   2777         int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
   2778 
   2779         boolean mUseIncSupp = isIncSupp();
   2780         long apID = Ap.getID(mRS);
   2781         long xID = X.getID(mRS);
   2782         if (mUseIncSupp) {
   2783             apID = getDummyAlloc(Ap);
   2784             xID = getDummyAlloc(X);
   2785         }
   2786         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
   2787     }
   2788 
   2789     /**
   2790      * ZHER2 performs the symmetric rank 2 operation
   2791      * A := alpha*x*y**H + alpha*y*x**H + A
   2792      *
   2793      * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
   2794      *
   2795      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   2796      * @param alpha The scalar alpha.
   2797      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2798      * @param incX The increment for the elements of vector x, must be larger than zero.
   2799      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2800      * @param incY The increment for the elements of vector y, must be larger than zero.
   2801      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2802      */
   2803     public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
   2804         // same as SYR2
   2805         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
   2806 
   2807         boolean mUseIncSupp = isIncSupp();
   2808         long aID = A.getID(mRS);
   2809         long xID = X.getID(mRS);
   2810         long yID = Y.getID(mRS);
   2811         if (mUseIncSupp) {
   2812             aID = getDummyAlloc(A);
   2813             xID = getDummyAlloc(X);
   2814             yID = getDummyAlloc(Y);
   2815         }
   2816         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
   2817     }
   2818 
   2819     /**
   2820      * ZHPR2 performs the symmetric rank 2 operation
   2821      * A := alpha*x*y**H + alpha*y*x**H + A
   2822      *
   2823      * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
   2824      *
   2825      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
   2826      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
   2827      *       'a' to packed matrix 'b'.
   2828      *           k = 0
   2829      *           for i in range(0, n):
   2830      *              for j in range(i, n):
   2831      *                  b[k++] = a[i, j]
   2832      *
   2833      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
   2834      * @param alpha The scalar alpha.
   2835      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
   2836      * @param incX The increment for the elements of vector x, must be larger than zero.
   2837      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
   2838      * @param incY The increment for the elements of vector y, must be larger than zero.
   2839      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   2840      */
   2841     public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
   2842         // same as SPR2
   2843         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
   2844 
   2845         boolean mUseIncSupp = isIncSupp();
   2846         long apID = Ap.getID(mRS);
   2847         long xID = X.getID(mRS);
   2848         long yID = Y.getID(mRS);
   2849         if (mUseIncSupp) {
   2850             apID = getDummyAlloc(Ap);
   2851             xID = getDummyAlloc(X);
   2852             yID = getDummyAlloc(Y);
   2853         }
   2854         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
   2855     }
   2856 
   2857 
   2858     /**
   2859      * Level 3 BLAS
   2860      */
   2861 
   2862     static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
   2863         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
   2864         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
   2865             (B != null && !B.getType().getElement().isCompatible(e)) ||
   2866             (C != null && !C.getType().getElement().isCompatible(e))) {
   2867             throw new RSRuntimeException("Called BLAS with wrong Element type");
   2868         }
   2869         if (C == null) {
   2870             //since matrix C is used to store the result, it cannot be null.
   2871             throw new RSRuntimeException("Allocation C cannot be null");
   2872         }
   2873         cM = C.getType().getY();
   2874         cN = C.getType().getX();
   2875 
   2876         if (Side == RIGHT) {
   2877             if ((A == null && B != null) || (A != null && B == null)) {
   2878                 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
   2879             }
   2880             if (B != null) {
   2881                 bM = A.getType().getY();
   2882                 bN = A.getType().getX();
   2883             }
   2884             if (A != null) {
   2885                 aM = B.getType().getY();
   2886                 aN = B.getType().getX();
   2887             }
   2888         } else {
   2889             if (A != null) {
   2890                 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
   2891                     aN = A.getType().getY();
   2892                     aM = A.getType().getX();
   2893                 } else {
   2894                     aM = A.getType().getY();
   2895                     aN = A.getType().getX();
   2896                 }
   2897             }
   2898             if (B != null) {
   2899                 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
   2900                     bN = B.getType().getY();
   2901                     bM = B.getType().getX();
   2902                 } else {
   2903                     bM = B.getType().getY();
   2904                     bN = B.getType().getX();
   2905                 }
   2906             }
   2907         }
   2908         if (A != null && B != null && C != null) {
   2909             if (aN != bM || aM != cM || bN != cN) {
   2910                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
   2911             }
   2912         } else if (A != null && C != null) {
   2913             // A and C only, for SYRK
   2914             if (cM != cN) {
   2915                 throw new RSRuntimeException("Matrix C is not symmetric");
   2916             }
   2917             if (aM != cM) {
   2918                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
   2919             }
   2920         } else if (A != null && B != null) {
   2921             // A and B only
   2922             if (aN != bM) {
   2923                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
   2924             }
   2925         }
   2926 
   2927     }
   2928 
   2929     /**
   2930      * SGEMM performs one of the matrix-matrix operations
   2931      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
   2932      *
   2933      * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
   2934      *
   2935      * @param TransA The type of transpose applied to matrix A.
   2936      * @param TransB The type of transpose applied to matrix B.
   2937      * @param alpha The scalar alpha.
   2938      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   2939      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
   2940      * @param beta The scalar beta.
   2941      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
   2942      */
   2943     public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
   2944                       Allocation B, float beta, Allocation C) {
   2945         validateTranspose(TransA);
   2946         validateTranspose(TransB);
   2947         validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
   2948 
   2949         int M = -1, N = -1, K = -1;
   2950         if (TransA != NO_TRANSPOSE) {
   2951             M = A.getType().getX();
   2952             K = A.getType().getY();
   2953         } else {
   2954             M = A.getType().getY();
   2955             K = A.getType().getX();
   2956         }
   2957         if (TransB != NO_TRANSPOSE) {
   2958             N = B.getType().getY();
   2959         } else {
   2960             N = B.getType().getX();
   2961         }
   2962 
   2963         boolean mUseIncSupp = isIncSupp();
   2964         long aID = A.getID(mRS);
   2965         long bID = B.getID(mRS);
   2966         long cID = C.getID(mRS);
   2967         if (mUseIncSupp) {
   2968             aID = getDummyAlloc(A);
   2969             bID = getDummyAlloc(B);
   2970             cID = getDummyAlloc(C);
   2971         }
   2972         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
   2973                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
   2974     }
   2975 
   2976     /**
   2977      * DGEMM performs one of the matrix-matrix operations
   2978      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
   2979      *
   2980      * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
   2981      *
   2982      * @param TransA The type of transpose applied to matrix A.
   2983      * @param TransB The type of transpose applied to matrix B.
   2984      * @param alpha The scalar alpha.
   2985      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   2986      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
   2987      * @param beta The scalar beta.
   2988      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
   2989      */
   2990     public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
   2991                       Allocation B, double beta, Allocation C) {
   2992         validateTranspose(TransA);
   2993         validateTranspose(TransB);
   2994         validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
   2995         int M = -1, N = -1, K = -1;
   2996         if (TransA != NO_TRANSPOSE) {
   2997             M = A.getType().getX();
   2998             K = A.getType().getY();
   2999         } else {
   3000             M = A.getType().getY();
   3001             K = A.getType().getX();
   3002         }
   3003         if (TransB != NO_TRANSPOSE) {
   3004             N = B.getType().getY();
   3005         } else {
   3006             N = B.getType().getX();
   3007         }
   3008 
   3009         boolean mUseIncSupp = isIncSupp();
   3010         long aID = A.getID(mRS);
   3011         long bID = B.getID(mRS);
   3012         long cID = C.getID(mRS);
   3013         if (mUseIncSupp) {
   3014             aID = getDummyAlloc(A);
   3015             bID = getDummyAlloc(B);
   3016             cID = getDummyAlloc(C);
   3017         }
   3018         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
   3019                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3020     }
   3021 
   3022     /**
   3023      * CGEMM performs one of the matrix-matrix operations
   3024      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
   3025      *
   3026      * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
   3027      *
   3028      * @param TransA The type of transpose applied to matrix A.
   3029      * @param TransB The type of transpose applied to matrix B.
   3030      * @param alpha The scalar alpha.
   3031      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3032      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3033      * @param beta The scalar beta.
   3034      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3035      */
   3036     public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
   3037                       Allocation B, Float2 beta, Allocation C) {
   3038         validateTranspose(TransA);
   3039         validateTranspose(TransB);
   3040         validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
   3041         int M = -1, N = -1, K = -1;
   3042         if (TransA != NO_TRANSPOSE) {
   3043             M = A.getType().getX();
   3044             K = A.getType().getY();
   3045         } else {
   3046             M = A.getType().getY();
   3047             K = A.getType().getX();
   3048         }
   3049         if (TransB != NO_TRANSPOSE) {
   3050             N = B.getType().getY();
   3051         } else {
   3052             N = B.getType().getX();
   3053         }
   3054 
   3055         boolean mUseIncSupp = isIncSupp();
   3056         long aID = A.getID(mRS);
   3057         long bID = B.getID(mRS);
   3058         long cID = C.getID(mRS);
   3059         if (mUseIncSupp) {
   3060             aID = getDummyAlloc(A);
   3061             bID = getDummyAlloc(B);
   3062             cID = getDummyAlloc(C);
   3063         }
   3064         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
   3065                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3066     }
   3067 
   3068     /**
   3069      * ZGEMM performs one of the matrix-matrix operations
   3070      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
   3071      *
   3072      * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
   3073      *
   3074      * @param TransA The type of transpose applied to matrix A.
   3075      * @param TransB The type of transpose applied to matrix B.
   3076      * @param alpha The scalar alpha.
   3077      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
   3078      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
   3079      * @param beta The scalar beta.
   3080      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
   3081      */
   3082     public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
   3083                       Allocation B, Double2 beta, Allocation C) {
   3084         validateTranspose(TransA);
   3085         validateTranspose(TransB);
   3086         validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
   3087         int M = -1, N = -1, K = -1;
   3088         if (TransA != NO_TRANSPOSE) {
   3089             M = A.getType().getX();
   3090             K = A.getType().getY();
   3091         } else {
   3092             M = A.getType().getY();
   3093             K = A.getType().getX();
   3094         }
   3095         if (TransB != NO_TRANSPOSE) {
   3096             N = B.getType().getY();
   3097         } else {
   3098             N = B.getType().getX();
   3099         }
   3100 
   3101         boolean mUseIncSupp = isIncSupp();
   3102         long aID = A.getID(mRS);
   3103         long bID = B.getID(mRS);
   3104         long cID = C.getID(mRS);
   3105         if (mUseIncSupp) {
   3106             aID = getDummyAlloc(A);
   3107             bID = getDummyAlloc(B);
   3108             cID = getDummyAlloc(C);
   3109         }
   3110         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
   3111                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3112     }
   3113 
   3114     /**
   3115      * SSYMM performs one of the matrix-matrix operations
   3116      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3117      *
   3118      * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
   3119      *
   3120      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3121      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3122      * @param alpha The scalar alpha.
   3123      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   3124      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
   3125      * @param beta The scalar beta.
   3126      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
   3127      */
   3128     public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
   3129                       Allocation B, float beta, Allocation C) {
   3130         validateSide(Side);
   3131         validateUplo(Uplo);
   3132         //For SYMM, Matrix A should be symmetric
   3133         if (A.getType().getX() != A.getType().getY()) {
   3134             throw new RSRuntimeException("Matrix A is not symmetric");
   3135         }
   3136         validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
   3137 
   3138         boolean mUseIncSupp = isIncSupp();
   3139         long aID = A.getID(mRS);
   3140         long bID = B.getID(mRS);
   3141         long cID = C.getID(mRS);
   3142         if (mUseIncSupp) {
   3143             aID = getDummyAlloc(A);
   3144             bID = getDummyAlloc(B);
   3145             cID = getDummyAlloc(C);
   3146         }
   3147         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
   3148                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3149     }
   3150 
   3151     /**
   3152      * DSYMM performs one of the matrix-matrix operations
   3153      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3154      *
   3155      * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
   3156      *
   3157      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3158      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3159      * @param alpha The scalar alpha.
   3160      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   3161      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
   3162      * @param beta The scalar beta.
   3163      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
   3164      */
   3165     public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
   3166                       Allocation B, double beta, Allocation C) {
   3167         validateSide(Side);
   3168         validateUplo(Uplo);
   3169         if (A.getType().getX() != A.getType().getY()) {
   3170             throw new RSRuntimeException("Matrix A is not symmetric");
   3171         }
   3172         validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
   3173 
   3174         boolean mUseIncSupp = isIncSupp();
   3175         long aID = A.getID(mRS);
   3176         long bID = B.getID(mRS);
   3177         long cID = C.getID(mRS);
   3178         if (mUseIncSupp) {
   3179             aID = getDummyAlloc(A);
   3180             bID = getDummyAlloc(B);
   3181             cID = getDummyAlloc(C);
   3182         }
   3183         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
   3184                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3185     }
   3186 
   3187     /**
   3188      * CSYMM performs one of the matrix-matrix operations
   3189      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3190      *
   3191      * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
   3192      *
   3193      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3194      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3195      * @param alpha The scalar alpha.
   3196      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3197      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3198      * @param beta The scalar beta.
   3199      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3200      */
   3201     public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
   3202                       Allocation B, Float2 beta, Allocation C) {
   3203         validateSide(Side);
   3204         validateUplo(Uplo);
   3205         if (A.getType().getX() != A.getType().getY()) {
   3206             throw new RSRuntimeException("Matrix A is not symmetric");
   3207         }
   3208         validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
   3209 
   3210         boolean mUseIncSupp = isIncSupp();
   3211         long aID = A.getID(mRS);
   3212         long bID = B.getID(mRS);
   3213         long cID = C.getID(mRS);
   3214         if (mUseIncSupp) {
   3215             aID = getDummyAlloc(A);
   3216             bID = getDummyAlloc(B);
   3217             cID = getDummyAlloc(C);
   3218         }
   3219         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
   3220                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3221     }
   3222 
   3223     /**
   3224      * ZSYMM performs one of the matrix-matrix operations
   3225      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3226      *
   3227      * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
   3228      *
   3229      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3230      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3231      * @param alpha The scalar alpha.
   3232      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3233      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   3234      * @param beta The scalar beta.
   3235      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   3236      */
   3237     public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
   3238                       Allocation B, Double2 beta, Allocation C) {
   3239         validateSide(Side);
   3240         validateUplo(Uplo);
   3241         if (A.getType().getX() != A.getType().getY()) {
   3242             throw new RSRuntimeException("Matrix A is not symmetric");
   3243         }
   3244         validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
   3245 
   3246         boolean mUseIncSupp = isIncSupp();
   3247         long aID = A.getID(mRS);
   3248         long bID = B.getID(mRS);
   3249         long cID = C.getID(mRS);
   3250         if (mUseIncSupp) {
   3251             aID = getDummyAlloc(A);
   3252             bID = getDummyAlloc(B);
   3253             cID = getDummyAlloc(C);
   3254         }
   3255         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
   3256                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3257     }
   3258 
   3259     /**
   3260      * SSYRK performs one of the symmetric rank k operations
   3261      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
   3262      *
   3263      * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
   3264      *
   3265      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3266      * @param Trans The type of transpose applied to the operation.
   3267      * @param alpha The scalar alpha.
   3268      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   3269      * @param beta The scalar beta.
   3270      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
   3271      */
   3272     public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
   3273         validateTranspose(Trans);
   3274         validateUplo(Uplo);
   3275         validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
   3276         int K = -1;
   3277         if (Trans != NO_TRANSPOSE) {
   3278             K = A.getType().getY();
   3279         } else {
   3280             K = A.getType().getX();
   3281         }
   3282 
   3283         boolean mUseIncSupp = isIncSupp();
   3284         long aID = A.getID(mRS);
   3285         long cID = C.getID(mRS);
   3286         if (mUseIncSupp) {
   3287             aID = getDummyAlloc(A);
   3288             cID = getDummyAlloc(C);
   3289         }
   3290         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3291     }
   3292 
   3293     /**
   3294      * DSYRK performs one of the symmetric rank k operations
   3295      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
   3296      *
   3297      * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
   3298      *
   3299      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3300      * @param Trans The type of transpose applied to the operation.
   3301      * @param alpha The scalar alpha.
   3302      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   3303      * @param beta The scalar beta.
   3304      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
   3305      */
   3306     public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
   3307         validateTranspose(Trans);
   3308         validateUplo(Uplo);
   3309         validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
   3310         int K = -1;
   3311         if (Trans != NO_TRANSPOSE) {
   3312             K = A.getType().getY();
   3313         } else {
   3314             K = A.getType().getX();
   3315         }
   3316 
   3317         boolean mUseIncSupp = isIncSupp();
   3318         long aID = A.getID(mRS);
   3319         long cID = C.getID(mRS);
   3320         if (mUseIncSupp) {
   3321             aID = getDummyAlloc(A);
   3322             cID = getDummyAlloc(C);
   3323         }
   3324         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3325     }
   3326 
   3327     /**
   3328      * CSYRK performs one of the symmetric rank k operations
   3329      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
   3330      *
   3331      * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
   3332      *
   3333      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3334      * @param Trans The type of transpose applied to the operation.
   3335      * @param alpha The scalar alpha.
   3336      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3337      * @param beta The scalar beta.
   3338      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3339      */
   3340     public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
   3341         validateTranspose(Trans);
   3342         validateUplo(Uplo);
   3343         validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
   3344         int K = -1;
   3345         if (Trans != NO_TRANSPOSE) {
   3346             K = A.getType().getY();
   3347         } else {
   3348             K = A.getType().getX();
   3349         }
   3350 
   3351         boolean mUseIncSupp = isIncSupp();
   3352         long aID = A.getID(mRS);
   3353         long cID = C.getID(mRS);
   3354         if (mUseIncSupp) {
   3355             aID = getDummyAlloc(A);
   3356             cID = getDummyAlloc(C);
   3357         }
   3358         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
   3359                                          C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
   3360     }
   3361 
   3362     /**
   3363      * ZSYRK performs one of the symmetric rank k operations
   3364      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
   3365      *
   3366      * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
   3367      *
   3368      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3369      * @param Trans The type of transpose applied to the operation.
   3370      * @param alpha The scalar alpha.
   3371      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3372      * @param beta The scalar beta.
   3373      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   3374      */
   3375     public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
   3376         validateTranspose(Trans);
   3377         validateUplo(Uplo);
   3378         validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
   3379         int K = -1;
   3380         if (Trans != NO_TRANSPOSE) {
   3381             K = A.getType().getY();
   3382         } else {
   3383             K = A.getType().getX();
   3384         }
   3385 
   3386         boolean mUseIncSupp = isIncSupp();
   3387         long aID = A.getID(mRS);
   3388         long cID = C.getID(mRS);
   3389         if (mUseIncSupp) {
   3390             aID = getDummyAlloc(A);
   3391             cID = getDummyAlloc(C);
   3392         }
   3393         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
   3394                                    C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
   3395     }
   3396 
   3397     static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
   3398         validateTranspose(Trans);
   3399         if (!A.getType().getElement().isCompatible(e) ||
   3400             !B.getType().getElement().isCompatible(e) ||
   3401             !C.getType().getElement().isCompatible(e)) {
   3402             throw new RSRuntimeException("Called BLAS with wrong Element type");
   3403         }
   3404         int Cdim = -1;
   3405         // A is n x k if no transpose, k x n if transpose
   3406         // C is n x n
   3407         if (Trans == TRANSPOSE) {
   3408             // check columns versus C
   3409             Cdim = A.getType().getX();
   3410         } else {
   3411             // check rows versus C
   3412             Cdim = A.getType().getY();
   3413         }
   3414         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
   3415             throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
   3416         }
   3417         // A dims == B dims
   3418         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
   3419             throw new RSRuntimeException("Invalid A and B in SYR2K");
   3420         }
   3421     }
   3422 
   3423     /**
   3424      * SSYR2K performs one of the symmetric rank 2k operations
   3425      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
   3426      *
   3427      * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
   3428      *
   3429      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3430      * @param Trans The type of transpose applied to the operation.
   3431      * @param alpha The scalar alpha.
   3432      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   3433      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
   3434      * @param beta The scalar beta.
   3435      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
   3436      */
   3437     public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
   3438         validateUplo(Uplo);
   3439         validateSYR2K(Element.F32(mRS), Trans, A, B, C);
   3440         int K = -1;
   3441         if (Trans != NO_TRANSPOSE) {
   3442             K = A.getType().getY();
   3443         } else {
   3444             K = A.getType().getX();
   3445         }
   3446 
   3447         boolean mUseIncSupp = isIncSupp();
   3448         long aID = A.getID(mRS);
   3449         long bID = B.getID(mRS);
   3450         long cID = C.getID(mRS);
   3451         if (mUseIncSupp) {
   3452             aID = getDummyAlloc(A);
   3453             bID = getDummyAlloc(B);
   3454             cID = getDummyAlloc(C);
   3455         }
   3456         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3457     }
   3458 
   3459     /**
   3460      * DSYR2K performs one of the symmetric rank 2k operations
   3461      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
   3462      *
   3463      * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
   3464      *
   3465      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3466      * @param Trans The type of transpose applied to the operation.
   3467      * @param alpha The scalar alpha.
   3468      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   3469      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
   3470      * @param beta The scalar beta.
   3471      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
   3472      */
   3473     public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
   3474         validateUplo(Uplo);
   3475         validateSYR2K(Element.F64(mRS), Trans, A, B, C);
   3476         int K = -1;
   3477         if (Trans != NO_TRANSPOSE) {
   3478             K = A.getType().getY();
   3479         } else {
   3480             K = A.getType().getX();
   3481         }
   3482 
   3483         boolean mUseIncSupp = isIncSupp();
   3484         long aID = A.getID(mRS);
   3485         long bID = B.getID(mRS);
   3486         long cID = C.getID(mRS);
   3487         if (mUseIncSupp) {
   3488             aID = getDummyAlloc(A);
   3489             bID = getDummyAlloc(B);
   3490             cID = getDummyAlloc(C);
   3491         }
   3492         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
   3493     }
   3494 
   3495     /**
   3496      * CSYR2K performs one of the symmetric rank 2k operations
   3497      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
   3498      *
   3499      * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
   3500      *
   3501      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3502      * @param Trans The type of transpose applied to the operation.
   3503      * @param alpha The scalar alpha.
   3504      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3505      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3506      * @param beta The scalar beta.
   3507      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3508      */
   3509     public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
   3510         validateUplo(Uplo);
   3511         validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
   3512         int K = -1;
   3513         if (Trans != NO_TRANSPOSE) {
   3514             K = A.getType().getY();
   3515         } else {
   3516             K = A.getType().getX();
   3517         }
   3518 
   3519         boolean mUseIncSupp = isIncSupp();
   3520         long aID = A.getID(mRS);
   3521         long bID = B.getID(mRS);
   3522         long cID = C.getID(mRS);
   3523         if (mUseIncSupp) {
   3524             aID = getDummyAlloc(A);
   3525             bID = getDummyAlloc(B);
   3526             cID = getDummyAlloc(C);
   3527         }
   3528         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3529     }
   3530 
   3531     /**
   3532      * ZSYR2K performs one of the symmetric rank 2k operations
   3533      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
   3534      *
   3535      * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
   3536      *
   3537      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3538      * @param Trans The type of transpose applied to the operation.
   3539      * @param alpha The scalar alpha.
   3540      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3541      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   3542      * @param beta The scalar beta.
   3543      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   3544      */
   3545     public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
   3546         validateUplo(Uplo);
   3547         validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
   3548         int K = -1;
   3549         if (Trans != NO_TRANSPOSE) {
   3550             K = A.getType().getY();
   3551         } else {
   3552             K = A.getType().getX();
   3553         }
   3554 
   3555         boolean mUseIncSupp = isIncSupp();
   3556         long aID = A.getID(mRS);
   3557         long bID = B.getID(mRS);
   3558         long cID = C.getID(mRS);
   3559         if (mUseIncSupp) {
   3560             aID = getDummyAlloc(A);
   3561             bID = getDummyAlloc(B);
   3562             cID = getDummyAlloc(C);
   3563         }
   3564         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3565     }
   3566 
   3567     static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
   3568         validateSide(Side);
   3569         validateTranspose(TransA);
   3570         int aM = -1, aN = -1, bM = -1, bN = -1;
   3571         if (!A.getType().getElement().isCompatible(e) ||
   3572             !B.getType().getElement().isCompatible(e)) {
   3573             throw new RSRuntimeException("Called BLAS with wrong Element type");
   3574         }
   3575 
   3576         aM = A.getType().getY();
   3577         aN = A.getType().getX();
   3578         if (aM != aN) {
   3579             throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
   3580         }
   3581 
   3582         bM = B.getType().getY();
   3583         bN = B.getType().getX();
   3584         if (Side == LEFT) {
   3585             if (aN != bM) {
   3586                 throw new RSRuntimeException("Called TRMM with invalid matrices");
   3587             }
   3588         } else {
   3589             if (bN != aM) {
   3590                 throw new RSRuntimeException("Called TRMM with invalid matrices");
   3591             }
   3592         }
   3593     }
   3594 
   3595     /**
   3596      * STRMM performs one of the matrix-matrix operations
   3597      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
   3598      * op(A) is one of  op(A) = A  or  op(A) = A**T
   3599      *
   3600      * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
   3601      *
   3602      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3603      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3604      * @param TransA The type of transpose applied to matrix A.
   3605      * @param Diag Specifies whether or not A is unit triangular.
   3606      * @param alpha The scalar alpha.
   3607      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   3608      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
   3609      */
   3610     public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
   3611         validateUplo(Uplo);
   3612         validateDiag(Diag);
   3613         validateTRMM(Element.F32(mRS), Side, TransA, A, B);
   3614 
   3615         boolean mUseIncSupp = isIncSupp();
   3616         long aID = A.getID(mRS);
   3617         long bID = B.getID(mRS);
   3618         if (mUseIncSupp) {
   3619             aID = getDummyAlloc(A);
   3620             bID = getDummyAlloc(B);
   3621         }
   3622         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3623                                         alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp);
   3624     }
   3625 
   3626     /**
   3627      * DTRMM performs one of the matrix-matrix operations
   3628      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
   3629      * op(A) is one of  op(A) = A  or  op(A) = A**T
   3630      *
   3631      * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
   3632      *
   3633      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3634      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3635      * @param TransA The type of transpose applied to matrix A.
   3636      * @param Diag Specifies whether or not A is unit triangular.
   3637      * @param alpha The scalar alpha.
   3638      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   3639      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
   3640      */
   3641     public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
   3642         validateUplo(Uplo);
   3643         validateDiag(Diag);
   3644         validateTRMM(Element.F64(mRS), Side, TransA, A, B);
   3645 
   3646         boolean mUseIncSupp = isIncSupp();
   3647         long aID = A.getID(mRS);
   3648         long bID = B.getID(mRS);
   3649         if (mUseIncSupp) {
   3650             aID = getDummyAlloc(A);
   3651             bID = getDummyAlloc(B);
   3652         }
   3653         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3654                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3655     }
   3656 
   3657     /**
   3658      * CTRMM performs one of the matrix-matrix operations
   3659      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
   3660      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
   3661      *
   3662      * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
   3663      *
   3664      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3665      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3666      * @param TransA The type of transpose applied to matrix A.
   3667      * @param Diag Specifies whether or not A is unit triangular.
   3668      * @param alpha The scalar alpha.
   3669      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3670      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3671      */
   3672     public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
   3673         validateUplo(Uplo);
   3674         validateDiag(Diag);
   3675         validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
   3676 
   3677         boolean mUseIncSupp = isIncSupp();
   3678         long aID = A.getID(mRS);
   3679         long bID = B.getID(mRS);
   3680         if (mUseIncSupp) {
   3681             aID = getDummyAlloc(A);
   3682             bID = getDummyAlloc(B);
   3683         }
   3684         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3685                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3686     }
   3687 
   3688     /**
   3689      * ZTRMM performs one of the matrix-matrix operations
   3690      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
   3691      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
   3692      *
   3693      * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
   3694      *
   3695      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3696      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3697      * @param TransA The type of transpose applied to matrix A.
   3698      * @param Diag Specifies whether or not A is unit triangular.
   3699      * @param alpha The scalar alpha.
   3700      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3701      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   3702      */
   3703     public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
   3704         validateUplo(Uplo);
   3705         validateDiag(Diag);
   3706         validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
   3707 
   3708         boolean mUseIncSupp = isIncSupp();
   3709         long aID = A.getID(mRS);
   3710         long bID = B.getID(mRS);
   3711         if (mUseIncSupp) {
   3712             aID = getDummyAlloc(A);
   3713             bID = getDummyAlloc(B);
   3714         }
   3715         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3716                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3717     }
   3718 
   3719     static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
   3720         int adim = -1, bM = -1, bN = -1;
   3721         validateSide(Side);
   3722         validateTranspose(TransA);
   3723         if (!A.getType().getElement().isCompatible(e) ||
   3724             !B.getType().getElement().isCompatible(e)) {
   3725             throw new RSRuntimeException("Called BLAS with wrong Element type");
   3726         }
   3727         adim = A.getType().getX();
   3728         if (adim != A.getType().getY()) {
   3729             // this may be unnecessary, the restriction could potentially be relaxed
   3730             // A needs to contain at least that symmetric matrix but could theoretically be larger
   3731             // for now we assume adapters are sufficient, will reevaluate in the future
   3732             throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
   3733         }
   3734         bM = B.getType().getY();
   3735         bN = B.getType().getX();
   3736         if (Side == LEFT) {
   3737             // A is M*M
   3738             if (adim != bM) {
   3739                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
   3740             }
   3741         } else {
   3742             // A is N*N
   3743             if (adim != bN) {
   3744                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
   3745             }
   3746         }
   3747     }
   3748 
   3749     /**
   3750      * STRSM solves one of the matrix equations
   3751      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
   3752      * op(A) is one of  op(A) = A  or  op(A) = A**T
   3753      *
   3754      * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
   3755      *
   3756      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3757      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3758      * @param TransA The type of transpose applied to matrix A.
   3759      * @param Diag Specifies whether or not A is unit triangular.
   3760      * @param alpha The scalar alpha.
   3761      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
   3762      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
   3763      */
   3764     public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
   3765         validateUplo(Uplo);
   3766         validateDiag(Diag);
   3767         validateTRSM(Element.F32(mRS), Side, TransA, A, B);
   3768 
   3769         boolean mUseIncSupp = isIncSupp();
   3770         long aID = A.getID(mRS);
   3771         long bID = B.getID(mRS);
   3772         if (mUseIncSupp) {
   3773             aID = getDummyAlloc(A);
   3774             bID = getDummyAlloc(B);
   3775         }
   3776         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3777                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3778     }
   3779 
   3780     /**
   3781      * DTRSM solves one of the matrix equations
   3782      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
   3783      * op(A) is one of  op(A) = A  or  op(A) = A**T
   3784      *
   3785      * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
   3786      *
   3787      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3788      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3789      * @param TransA The type of transpose applied to matrix A.
   3790      * @param Diag Specifies whether or not A is unit triangular.
   3791      * @param alpha The scalar alpha.
   3792      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
   3793      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
   3794      */
   3795     public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
   3796         validateUplo(Uplo);
   3797         validateDiag(Diag);
   3798         validateTRSM(Element.F64(mRS), Side, TransA, A, B);
   3799 
   3800         boolean mUseIncSupp = isIncSupp();
   3801         long aID = A.getID(mRS);
   3802         long bID = B.getID(mRS);
   3803         if (mUseIncSupp) {
   3804             aID = getDummyAlloc(A);
   3805             bID = getDummyAlloc(B);
   3806         }
   3807         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3808                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3809     }
   3810 
   3811     /**
   3812      * CTRSM solves one of the matrix equations
   3813      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
   3814      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
   3815      *
   3816      * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
   3817      *
   3818      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3819      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3820      * @param TransA The type of transpose applied to matrix A.
   3821      * @param Diag Specifies whether or not A is unit triangular.
   3822      * @param alpha The scalar alpha.
   3823      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3824      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3825      */
   3826     public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
   3827         validateUplo(Uplo);
   3828         validateDiag(Diag);
   3829         validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
   3830 
   3831         boolean mUseIncSupp = isIncSupp();
   3832         long aID = A.getID(mRS);
   3833         long bID = B.getID(mRS);
   3834         if (mUseIncSupp) {
   3835             aID = getDummyAlloc(A);
   3836             bID = getDummyAlloc(B);
   3837         }
   3838         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3839                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3840     }
   3841 
   3842     /**
   3843      * ZTRSM solves one of the matrix equations
   3844      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
   3845      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
   3846      *
   3847      * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
   3848      *
   3849      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3850      * @param Uplo Specifies whether matrix A is upper or lower triangular.
   3851      * @param TransA The type of transpose applied to matrix A.
   3852      * @param Diag Specifies whether or not A is unit triangular.
   3853      * @param alpha The scalar alpha.
   3854      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3855      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   3856      */
   3857     public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
   3858         validateUplo(Uplo);
   3859         validateDiag(Diag);
   3860         validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
   3861 
   3862         boolean mUseIncSupp = isIncSupp();
   3863         long aID = A.getID(mRS);
   3864         long bID = B.getID(mRS);
   3865         if (mUseIncSupp) {
   3866             aID = getDummyAlloc(A);
   3867             bID = getDummyAlloc(B);
   3868         }
   3869         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
   3870                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
   3871     }
   3872 
   3873     static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
   3874         validateSide(Side);
   3875 
   3876         if (!A.getType().getElement().isCompatible(e) ||
   3877             !B.getType().getElement().isCompatible(e) ||
   3878             !C.getType().getElement().isCompatible(e)) {
   3879             throw new RSRuntimeException("Called BLAS with wrong Element type");
   3880         }
   3881 
   3882         // A must be square; can potentially be relaxed similar to TRSM
   3883         int adim = A.getType().getX();
   3884         if (adim != A.getType().getY()) {
   3885             throw new RSRuntimeException("Called HEMM with non-square A");
   3886         }
   3887         if ((Side == LEFT && adim != B.getType().getY()) ||
   3888             (Side == RIGHT && adim != B.getType().getX())) {
   3889             throw new RSRuntimeException("Called HEMM with invalid B");
   3890         }
   3891         if (B.getType().getX() != C.getType().getX() ||
   3892             B.getType().getY() != C.getType().getY()) {
   3893             throw new RSRuntimeException("Called HEMM with mismatched B and C");
   3894         }
   3895     }
   3896 
   3897     /**
   3898      * CHEMM performs one of the matrix-matrix operations
   3899      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3900      *
   3901      * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
   3902      *
   3903      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3904      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3905      * @param alpha The scalar alpha.
   3906      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3907      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   3908      * @param beta The scalar beta.
   3909      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3910      */
   3911     public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
   3912         validateUplo(Uplo);
   3913         validateHEMM(Element.F32_2(mRS), Side, A, B, C);
   3914 
   3915         boolean mUseIncSupp = isIncSupp();
   3916         long aID = A.getID(mRS);
   3917         long bID = B.getID(mRS);
   3918         long cID = C.getID(mRS);
   3919         if (mUseIncSupp) {
   3920             aID = getDummyAlloc(A);
   3921             bID = getDummyAlloc(B);
   3922             cID = getDummyAlloc(C);
   3923         }
   3924         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
   3925                                          alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3926     }
   3927 
   3928     /**
   3929      * ZHEMM performs one of the matrix-matrix operations
   3930      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
   3931      *
   3932      * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
   3933      *
   3934      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
   3935      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
   3936      * @param alpha The scalar alpha.
   3937      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   3938      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   3939      * @param beta The scalar beta.
   3940      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   3941      */
   3942     public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
   3943         validateUplo(Uplo);
   3944         validateHEMM(Element.F64_2(mRS), Side, A, B, C);
   3945 
   3946         boolean mUseIncSupp = isIncSupp();
   3947         long aID = A.getID(mRS);
   3948         long bID = B.getID(mRS);
   3949         long cID = C.getID(mRS);
   3950         if (mUseIncSupp) {
   3951             aID = getDummyAlloc(A);
   3952             bID = getDummyAlloc(B);
   3953             cID = getDummyAlloc(C);
   3954         }
   3955         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
   3956                                    alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
   3957     }
   3958 
   3959     static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
   3960         if (!A.getType().getElement().isCompatible(e) ||
   3961             !C.getType().getElement().isCompatible(e)) {
   3962             throw new RSRuntimeException("Called BLAS with wrong Element type");
   3963         }
   3964         validateConjTranspose(Trans);
   3965         int cdim = C.getType().getX();
   3966         if (cdim != C.getType().getY()) {
   3967             throw new RSRuntimeException("Called HERK with non-square C");
   3968         }
   3969         if (Trans == NO_TRANSPOSE) {
   3970             if (cdim != A.getType().getY()) {
   3971                 throw new RSRuntimeException("Called HERK with invalid A");
   3972             }
   3973         } else {
   3974             if (cdim != A.getType().getX()) {
   3975                 throw new RSRuntimeException("Called HERK with invalid A");
   3976             }
   3977         }
   3978     }
   3979 
   3980     /**
   3981      * CHERK performs one of the hermitian rank k operations
   3982      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
   3983      *
   3984      * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
   3985      *
   3986      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   3987      * @param Trans The type of transpose applied to the operation.
   3988      * @param alpha The scalar alpha.
   3989      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   3990      * @param beta The scalar beta.
   3991      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   3992      */
   3993     public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
   3994         validateUplo(Uplo);
   3995         validateHERK(Element.F32_2(mRS), Trans, A, C);
   3996         int k = 0;
   3997         if (Trans == CONJ_TRANSPOSE) {
   3998             k = A.getType().getY();
   3999         } else {
   4000             k = A.getType().getX();
   4001         }
   4002 
   4003         boolean mUseIncSupp = isIncSupp();
   4004         long aID = A.getID(mRS);
   4005         long cID = C.getID(mRS);
   4006         if (mUseIncSupp) {
   4007             aID = getDummyAlloc(A);
   4008             cID = getDummyAlloc(C);
   4009         }
   4010         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
   4011                                          alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
   4012     }
   4013 
   4014     /**
   4015      * ZHERK performs one of the hermitian rank k operations
   4016      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
   4017      *
   4018      * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
   4019      *
   4020      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   4021      * @param Trans The type of transpose applied to the operation.
   4022      * @param alpha The scalar alpha.
   4023      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   4024      * @param beta The scalar beta.
   4025      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   4026      */
   4027     public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
   4028         validateUplo(Uplo);
   4029         validateHERK(Element.F64_2(mRS), Trans, A, C);
   4030         int k = 0;
   4031         if (Trans == CONJ_TRANSPOSE) {
   4032             k = A.getType().getY();
   4033         } else {
   4034             k = A.getType().getX();
   4035         }
   4036 
   4037         boolean mUseIncSupp = isIncSupp();
   4038         long aID = A.getID(mRS);
   4039         long cID = C.getID(mRS);
   4040         if (mUseIncSupp) {
   4041             aID = getDummyAlloc(A);
   4042             cID = getDummyAlloc(C);
   4043         }
   4044         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
   4045                                    alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
   4046     }
   4047 
   4048     static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
   4049         if (!A.getType().getElement().isCompatible(e) ||
   4050             !B.getType().getElement().isCompatible(e) ||
   4051             !C.getType().getElement().isCompatible(e)) {
   4052             throw new RSRuntimeException("Called BLAS with wrong Element type");
   4053         }
   4054         validateConjTranspose(Trans);
   4055         int cdim = C.getType().getX();
   4056         if (cdim != C.getType().getY()) {
   4057             throw new RSRuntimeException("Called HER2K with non-square C");
   4058         }
   4059         if (Trans == NO_TRANSPOSE) {
   4060             if (A.getType().getY() != cdim) {
   4061                 throw new RSRuntimeException("Called HER2K with invalid matrices");
   4062             }
   4063         } else {
   4064             if (A.getType().getX() != cdim) {
   4065                 throw new RSRuntimeException("Called HER2K with invalid matrices");
   4066             }
   4067         }
   4068         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
   4069             throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
   4070         }
   4071     }
   4072 
   4073     /**
   4074      * CHER2K performs one of the hermitian rank 2k operations
   4075      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
   4076      *
   4077      * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
   4078      *
   4079      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   4080      * @param Trans The type of transpose applied to the operation.
   4081      * @param alpha The scalar alpha.
   4082      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
   4083      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
   4084      * @param beta The scalar beta.
   4085      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
   4086      */
   4087     public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
   4088         validateUplo(Uplo);
   4089         validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
   4090         int k = 0;
   4091         if (Trans == NO_TRANSPOSE) {
   4092             k = A.getType().getX();
   4093         } else {
   4094             k = A.getType().getY();
   4095         }
   4096 
   4097         boolean mUseIncSupp = isIncSupp();
   4098         long aID = A.getID(mRS);
   4099         long bID = B.getID(mRS);
   4100         long cID = C.getID(mRS);
   4101         if (mUseIncSupp) {
   4102             aID = getDummyAlloc(A);
   4103             bID = getDummyAlloc(B);
   4104             cID = getDummyAlloc(C);
   4105         }
   4106         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
   4107                                          A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
   4108     }
   4109 
   4110     /**
   4111      * ZHER2K performs one of the hermitian rank 2k operations
   4112      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
   4113      *
   4114      * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
   4115      *
   4116      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
   4117      * @param Trans The type of transpose applied to the operation.
   4118      * @param alpha The scalar alpha.
   4119      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
   4120      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
   4121      * @param beta The scalar beta.
   4122      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
   4123      */
   4124     public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
   4125         validateUplo(Uplo);
   4126         validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
   4127         int k = 0;
   4128         if (Trans == NO_TRANSPOSE) {
   4129             k = A.getType().getX();
   4130         } else {
   4131             k = A.getType().getY();
   4132         }
   4133 
   4134         boolean mUseIncSupp = isIncSupp();
   4135         long aID = A.getID(mRS);
   4136         long bID = B.getID(mRS);
   4137         long cID = C.getID(mRS);
   4138         if (mUseIncSupp) {
   4139             aID = getDummyAlloc(A);
   4140             bID = getDummyAlloc(B);
   4141             cID = getDummyAlloc(C);
   4142         }
   4143         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
   4144                                    A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
   4145     }
   4146 
   4147 
   4148     /**
   4149      * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
   4150      * Calculations are done in 1.10.21 fixed-point format for the final output,
   4151      * just before there's a shift down to drop the fractional parts. The output
   4152      * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
   4153      * gives some headroom to avoid wrapping around on small overflows.
   4154      *
   4155      * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
   4156      * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
   4157      * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
   4158      * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
   4159      * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
   4160      * @param c_offset The offset for all values in matrix C.
   4161      * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
   4162      **/
   4163     public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
   4164         validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
   4165 
   4166         if (a_offset < 0 || a_offset > 255) {
   4167             throw new RSRuntimeException("Invalid a_offset passed to BNNM");
   4168         }
   4169         if (b_offset < 0 || b_offset > 255) {
   4170             throw new RSRuntimeException("Invalid b_offset passed to BNNM");
   4171         }
   4172         int M = -1, N = -1, K = -1;
   4173         M = A.getType().getY();
   4174         N = B.getType().getY();
   4175         K = A.getType().getX();
   4176 
   4177         boolean mUseIncSupp = isIncSupp();
   4178         long aID = A.getID(mRS);
   4179         long bID = B.getID(mRS);
   4180         long cID = C.getID(mRS);
   4181         if (mUseIncSupp) {
   4182             aID = getDummyAlloc(A);
   4183             bID = getDummyAlloc(B);
   4184             cID = getDummyAlloc(C);
   4185         }
   4186         mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp);
   4187 
   4188     }
   4189 
   4190 }
   4191