1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.renderscript; 18 19 import android.annotation.IntDef; 20 import java.lang.annotation.Retention; 21 import java.lang.annotation.RetentionPolicy; 22 23 /** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 **/ 33 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34 private Allocation mLUT; 35 36 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 37 super(id, rs); 38 } 39 40 private static final int RsBlas_sdsdot = 1; 41 private static final int RsBlas_dsdot = 2; 42 private static final int RsBlas_sdot = 3; 43 private static final int RsBlas_ddot = 4; 44 private static final int RsBlas_cdotu_sub = 5; 45 private static final int RsBlas_cdotc_sub = 6; 46 private static final int RsBlas_zdotu_sub = 7; 47 private static final int RsBlas_zdotc_sub = 8; 48 private static final int RsBlas_snrm2 = 9; 49 private static final int RsBlas_sasum = 10; 50 private static final int RsBlas_dnrm2 = 11; 51 private static final int RsBlas_dasum = 12; 52 private static final int RsBlas_scnrm2 = 13; 53 private static final int RsBlas_scasum = 14; 54 private static final int RsBlas_dznrm2 = 15; 55 private static final int RsBlas_dzasum = 16; 56 private static final int RsBlas_isamax = 17; 57 private static final int RsBlas_idamax = 18; 58 private static final int RsBlas_icamax = 19; 59 private static final int RsBlas_izamax = 20; 60 private static final int RsBlas_sswap = 21; 61 private static final int RsBlas_scopy = 22; 62 private static final int RsBlas_saxpy = 23; 63 private static final int RsBlas_dswap = 24; 64 private static final int RsBlas_dcopy = 25; 65 private static final int RsBlas_daxpy = 26; 66 private static final int RsBlas_cswap = 27; 67 private static final int RsBlas_ccopy = 28; 68 private static final int RsBlas_caxpy = 29; 69 private static final int RsBlas_zswap = 30; 70 private static final int RsBlas_zcopy = 31; 71 private static final int RsBlas_zaxpy = 32; 72 private static final int RsBlas_srotg = 33; 73 private static final int RsBlas_srotmg = 34; 74 private static final int RsBlas_srot = 35; 75 private static final int RsBlas_srotm = 36; 76 private static final int RsBlas_drotg = 37; 77 private static final int RsBlas_drotmg = 38; 78 private static final int RsBlas_drot = 39; 79 private static final int RsBlas_drotm = 40; 80 private static final int RsBlas_sscal = 41; 81 private static final int RsBlas_dscal = 42; 82 private static final int RsBlas_cscal = 43; 83 private static final int RsBlas_zscal = 44; 84 private static final int RsBlas_csscal = 45; 85 private static final int RsBlas_zdscal = 46; 86 private static final int RsBlas_sgemv = 47; 87 private static final int RsBlas_sgbmv = 48; 88 private static final int RsBlas_strmv = 49; 89 private static final int RsBlas_stbmv = 50; 90 private static final int RsBlas_stpmv = 51; 91 private static final int RsBlas_strsv = 52; 92 private static final int RsBlas_stbsv = 53; 93 private static final int RsBlas_stpsv = 54; 94 private static final int RsBlas_dgemv = 55; 95 private static final int RsBlas_dgbmv = 56; 96 private static final int RsBlas_dtrmv = 57; 97 private static final int RsBlas_dtbmv = 58; 98 private static final int RsBlas_dtpmv = 59; 99 private static final int RsBlas_dtrsv = 60; 100 private static final int RsBlas_dtbsv = 61; 101 private static final int RsBlas_dtpsv = 62; 102 private static final int RsBlas_cgemv = 63; 103 private static final int RsBlas_cgbmv = 64; 104 private static final int RsBlas_ctrmv = 65; 105 private static final int RsBlas_ctbmv = 66; 106 private static final int RsBlas_ctpmv = 67; 107 private static final int RsBlas_ctrsv = 68; 108 private static final int RsBlas_ctbsv = 69; 109 private static final int RsBlas_ctpsv = 70; 110 private static final int RsBlas_zgemv = 71; 111 private static final int RsBlas_zgbmv = 72; 112 private static final int RsBlas_ztrmv = 73; 113 private static final int RsBlas_ztbmv = 74; 114 private static final int RsBlas_ztpmv = 75; 115 private static final int RsBlas_ztrsv = 76; 116 private static final int RsBlas_ztbsv = 77; 117 private static final int RsBlas_ztpsv = 78; 118 private static final int RsBlas_ssymv = 79; 119 private static final int RsBlas_ssbmv = 80; 120 private static final int RsBlas_sspmv = 81; 121 private static final int RsBlas_sger = 82; 122 private static final int RsBlas_ssyr = 83; 123 private static final int RsBlas_sspr = 84; 124 private static final int RsBlas_ssyr2 = 85; 125 private static final int RsBlas_sspr2 = 86; 126 private static final int RsBlas_dsymv = 87; 127 private static final int RsBlas_dsbmv = 88; 128 private static final int RsBlas_dspmv = 89; 129 private static final int RsBlas_dger = 90; 130 private static final int RsBlas_dsyr = 91; 131 private static final int RsBlas_dspr = 92; 132 private static final int RsBlas_dsyr2 = 93; 133 private static final int RsBlas_dspr2 = 94; 134 private static final int RsBlas_chemv = 95; 135 private static final int RsBlas_chbmv = 96; 136 private static final int RsBlas_chpmv = 97; 137 private static final int RsBlas_cgeru = 98; 138 private static final int RsBlas_cgerc = 99; 139 private static final int RsBlas_cher = 100; 140 private static final int RsBlas_chpr = 101; 141 private static final int RsBlas_cher2 = 102; 142 private static final int RsBlas_chpr2 = 103; 143 private static final int RsBlas_zhemv = 104; 144 private static final int RsBlas_zhbmv = 105; 145 private static final int RsBlas_zhpmv = 106; 146 private static final int RsBlas_zgeru = 107; 147 private static final int RsBlas_zgerc = 108; 148 private static final int RsBlas_zher = 109; 149 private static final int RsBlas_zhpr = 110; 150 private static final int RsBlas_zher2 = 111; 151 private static final int RsBlas_zhpr2 = 112; 152 private static final int RsBlas_sgemm = 113; 153 private static final int RsBlas_ssymm = 114; 154 private static final int RsBlas_ssyrk = 115; 155 private static final int RsBlas_ssyr2k = 116; 156 private static final int RsBlas_strmm = 117; 157 private static final int RsBlas_strsm = 118; 158 private static final int RsBlas_dgemm = 119; 159 private static final int RsBlas_dsymm = 120; 160 private static final int RsBlas_dsyrk = 121; 161 private static final int RsBlas_dsyr2k = 122; 162 private static final int RsBlas_dtrmm = 123; 163 private static final int RsBlas_dtrsm = 124; 164 private static final int RsBlas_cgemm = 125; 165 private static final int RsBlas_csymm = 126; 166 private static final int RsBlas_csyrk = 127; 167 private static final int RsBlas_csyr2k = 128; 168 private static final int RsBlas_ctrmm = 129; 169 private static final int RsBlas_ctrsm = 130; 170 private static final int RsBlas_zgemm = 131; 171 private static final int RsBlas_zsymm = 132; 172 private static final int RsBlas_zsyrk = 133; 173 private static final int RsBlas_zsyr2k = 134; 174 private static final int RsBlas_ztrmm = 135; 175 private static final int RsBlas_ztrsm = 136; 176 private static final int RsBlas_chemm = 137; 177 private static final int RsBlas_cherk = 138; 178 private static final int RsBlas_cher2k = 139; 179 private static final int RsBlas_zhemm = 140; 180 private static final int RsBlas_zherk = 141; 181 private static final int RsBlas_zher2k = 142; 182 183 // BLAS extensions start here 184 private static final int RsBlas_bnnm = 1000; 185 186 /** 187 * Create an intrinsic to access BLAS subroutines. 188 * 189 * @param rs The RenderScript context 190 * @return ScriptIntrinsicBLAS 191 */ 192 public static ScriptIntrinsicBLAS create(RenderScript rs) { 193 long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs)); 194 return new ScriptIntrinsicBLAS(id, rs); 195 } 196 197 /** 198 * @hide 199 */ 200 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 201 @Retention(RetentionPolicy.SOURCE) 202 public @interface Transpose {} 203 204 /** 205 * @hide 206 */ 207 @IntDef({UPPER, LOWER}) 208 @Retention(RetentionPolicy.SOURCE) 209 public @interface Uplo {} 210 211 /** 212 * @hide 213 */ 214 @IntDef({NON_UNIT, UNIT}) 215 @Retention(RetentionPolicy.SOURCE) 216 public @interface Diag {} 217 218 /** 219 * @hide 220 */ 221 @IntDef({LEFT, RIGHT}) 222 @Retention(RetentionPolicy.SOURCE) 223 public @interface Side {} 224 225 public static final int NO_TRANSPOSE = 111; 226 public static final int TRANSPOSE = 112; 227 public static final int CONJ_TRANSPOSE = 113; 228 229 public static final int UPPER = 121; 230 public static final int LOWER = 122; 231 232 public static final int NON_UNIT = 131; 233 public static final int UNIT = 132; 234 235 public static final int LEFT = 141; 236 public static final int RIGHT = 142; 237 238 static void validateSide(@Side int Side) { 239 if (Side != LEFT && Side != RIGHT) { 240 throw new RSRuntimeException("Invalid side passed to BLAS"); 241 } 242 } 243 244 static void validateTranspose(@Transpose int Trans) { 245 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 246 Trans != CONJ_TRANSPOSE) { 247 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 248 } 249 } 250 251 static void validateConjTranspose(@Transpose int Trans) { 252 if (Trans != NO_TRANSPOSE && 253 Trans != CONJ_TRANSPOSE) { 254 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 255 } 256 } 257 258 static void validateDiag(@Diag int Diag) { 259 if (Diag != NON_UNIT && Diag != UNIT) { 260 throw new RSRuntimeException("Invalid diag passed to BLAS"); 261 } 262 } 263 264 static void validateUplo(@Uplo int Uplo) { 265 if (Uplo != UPPER && Uplo != LOWER) { 266 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 267 } 268 } 269 270 271 /** 272 * Level 2 BLAS 273 */ 274 275 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 276 validateTranspose(TransA); 277 int M = A.getType().getY(); 278 int N = A.getType().getX(); 279 if (!A.getType().getElement().isCompatible(e) || 280 !X.getType().getElement().isCompatible(e) || 281 !Y.getType().getElement().isCompatible(e)) { 282 throw new RSRuntimeException("Called BLAS with wrong Element type"); 283 } 284 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 285 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 286 } 287 288 if (incX <= 0 || incY <= 0) { 289 throw new RSRuntimeException("Vector increments must be greater than 0"); 290 } 291 int expectedXDim = -1, expectedYDim = -1; 292 if (TransA == NO_TRANSPOSE) { 293 expectedXDim = 1 + (N - 1) * incX; 294 expectedYDim = 1 + (M - 1) * incY; 295 } else { 296 expectedXDim = 1 + (M - 1) * incX; 297 expectedYDim = 1 + (N - 1) * incY; 298 } 299 if (X.getType().getX() != expectedXDim || 300 Y.getType().getX() != expectedYDim) { 301 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 302 } 303 } 304 305 /** 306 * SGEMV performs one of the matrix-vector operations 307 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 308 * 309 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 310 * 311 * @param TransA The type of transpose applied to matrix A. 312 * @param alpha The scalar alpha. 313 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 314 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 315 * @param incX The increment for the elements of vector x, must be larger than zero. 316 * @param beta The scalar beta. 317 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 318 * @param incY The increment for the elements of vector y, must be larger than zero. 319 */ 320 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 321 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 322 int M = A.getType().getY(); 323 int N = A.getType().getX(); 324 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 325 } 326 327 /** 328 * DGEMV performs one of the matrix-vector operations 329 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 330 * 331 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 332 * 333 * @param TransA The type of transpose applied to matrix A. 334 * @param alpha The scalar alpha. 335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 336 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 337 * @param incX The increment for the elements of vector x, must be larger than zero. 338 * @param beta The scalar beta. 339 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 340 * @param incY The increment for the elements of vector y, must be larger than zero. 341 */ 342 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 343 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 344 int M = A.getType().getY(); 345 int N = A.getType().getX(); 346 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 347 } 348 349 /** 350 * CGEMV performs one of the matrix-vector operations 351 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 352 * 353 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 354 * 355 * @param TransA The type of transpose applied to matrix A. 356 * @param alpha The scalar alpha. 357 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 358 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 359 * @param incX The increment for the elements of vector x, must be larger than zero. 360 * @param beta The scalar beta. 361 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 362 * @param incY The increment for the elements of vector y, must be larger than zero. 363 */ 364 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 365 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 366 int M = A.getType().getY(); 367 int N = A.getType().getX(); 368 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 369 } 370 371 /** 372 * ZGEMV performs one of the matrix-vector operations 373 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 374 * 375 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 376 * 377 * @param TransA The type of transpose applied to matrix A. 378 * @param alpha The scalar alpha. 379 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 380 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 381 * @param incX The increment for the elements of vector x, must be larger than zero. 382 * @param beta The scalar beta. 383 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 384 * @param incY The increment for the elements of vector y, must be larger than zero. 385 */ 386 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 387 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 388 int M = A.getType().getY(); 389 int N = A.getType().getX(); 390 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 391 } 392 393 /** 394 * SGBMV performs one of the matrix-vector operations 395 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 396 * 397 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 398 * 399 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 400 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 401 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 402 * for i in range(0, m): 403 * for j in range(max(0, i-kl), min(i+ku+1, n)): 404 * b[i, j-i+kl] = a[i, j] 405 * 406 * @param TransA The type of transpose applied to matrix A. 407 * @param KL The number of sub-diagonals of the matrix A. 408 * @param KU The number of super-diagonals of the matrix A. 409 * @param alpha The scalar alpha. 410 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 411 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 412 * @param incX The increment for the elements of vector x, must be larger than zero. 413 * @param beta The scalar beta. 414 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 415 * @param incY The increment for the elements of vector y, must be larger than zero. 416 */ 417 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 418 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 419 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 420 if (KL < 0 || KU < 0) { 421 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 422 } 423 int M = A.getType().getY(); 424 int N = A.getType().getX(); 425 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 426 } 427 428 /** 429 * DGBMV performs one of the matrix-vector operations 430 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 431 * 432 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 433 * 434 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 435 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 436 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 437 * for i in range(0, m): 438 * for j in range(max(0, i-kl), min(i+ku+1, n)): 439 * b[i, j-i+kl] = a[i, j] 440 * 441 * @param TransA The type of transpose applied to matrix A. 442 * @param KL The number of sub-diagonals of the matrix A. 443 * @param KU The number of super-diagonals of the matrix A. 444 * @param alpha The scalar alpha. 445 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 446 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 447 * @param incX The increment for the elements of vector x, must be larger than zero. 448 * @param beta The scalar beta. 449 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 450 * @param incY The increment for the elements of vector y, must be larger than zero. 451 */ 452 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 453 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 454 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 455 if (KL < 0 || KU < 0) { 456 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 457 } 458 int M = A.getType().getY(); 459 int N = A.getType().getX(); 460 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 461 } 462 463 /** 464 * CGBMV performs one of the matrix-vector operations 465 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 466 * 467 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 468 * 469 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 470 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 471 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 472 * for i in range(0, m): 473 * for j in range(max(0, i-kl), min(i+ku+1, n)): 474 * b[i, j-i+kl] = a[i, j] 475 * 476 * @param TransA The type of transpose applied to matrix A. 477 * @param KL The number of sub-diagonals of the matrix A. 478 * @param KU The number of super-diagonals of the matrix A. 479 * @param alpha The scalar alpha. 480 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 481 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 482 * @param incX The increment for the elements of vector x, must be larger than zero. 483 * @param beta The scalar beta. 484 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 485 * @param incY The increment for the elements of vector y, must be larger than zero. 486 */ 487 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 488 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 489 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 490 if (KL < 0 || KU < 0) { 491 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 492 } 493 int M = A.getType().getY(); 494 int N = A.getType().getX(); 495 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 496 } 497 498 /** 499 * ZGBMV performs one of the matrix-vector operations 500 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 501 * 502 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 503 * 504 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 505 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 506 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 507 * for i in range(0, m): 508 * for j in range(max(0, i-kl), min(i+ku+1, n)): 509 * b[i, j-i+kl] = a[i, j] 510 * 511 * @param TransA The type of transpose applied to matrix A. 512 * @param KL The number of sub-diagonals of the matrix A. 513 * @param KU The number of super-diagonals of the matrix A. 514 * @param alpha The scalar alpha. 515 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 516 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 517 * @param incX The increment for the elements of vector x, must be larger than zero. 518 * @param beta The scalar beta. 519 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 520 * @param incY The increment for the elements of vector y, must be larger than zero. 521 */ 522 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 523 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 524 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 525 if (KL < 0 || KU < 0) { 526 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 527 } 528 int M = A.getType().getY(); 529 int N = A.getType().getX(); 530 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 531 } 532 533 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 534 validateTranspose(TransA); 535 validateUplo(Uplo); 536 validateDiag(Diag); 537 int N = A.getType().getY(); 538 if (A.getType().getX() != N) { 539 throw new RSRuntimeException("A must be a square matrix for TRMV"); 540 } 541 if (!A.getType().getElement().isCompatible(e) || 542 !X.getType().getElement().isCompatible(e)) { 543 throw new RSRuntimeException("Called BLAS with wrong Element type"); 544 } 545 if (X.getType().getY() > 1) { 546 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 547 } 548 549 if (incX <= 0) { 550 throw new RSRuntimeException("Vector increments must be greater than 0"); 551 } 552 int expectedXDim = 1 + (N - 1) * incX; 553 if (X.getType().getX() != expectedXDim) { 554 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 555 } 556 } 557 558 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 559 validateTranspose(TransA); 560 validateUplo(Uplo); 561 validateDiag(Diag); 562 if (!Ap.getType().getElement().isCompatible(e) || 563 !X.getType().getElement().isCompatible(e)) { 564 throw new RSRuntimeException("Called BLAS with wrong Element type"); 565 } 566 if (X.getType().getY() > 1) { 567 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 568 } 569 570 if (Ap.getType().getY() > 1) { 571 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 572 } 573 574 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 575 //is it really doing anything? 576 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 577 throw new RSRuntimeException("Invalid dimension for Ap"); 578 } 579 if (incX <= 0) { 580 throw new RSRuntimeException("Vector increments must be greater than 0"); 581 } 582 int expectedXDim = 1 + (N - 1) * incX; 583 if (X.getType().getX() != expectedXDim) { 584 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 585 } 586 587 return N; 588 } 589 590 /** 591 * STRMV performs one of the matrix-vector operations 592 * x := A*x or x := A**T*x 593 * 594 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 595 * 596 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 597 * @param TransA The type of transpose applied to matrix A. 598 * @param Diag Specifies whether or not A is unit triangular. 599 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 600 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 601 * @param incX The increment for the elements of vector x, must be larger than zero. 602 */ 603 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 604 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 605 int N = A.getType().getY(); 606 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 607 } 608 609 /** 610 * DTRMV performs one of the matrix-vector operations 611 * x := A*x or x := A**T*x 612 * 613 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 614 * 615 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 616 * @param TransA The type of transpose applied to matrix A. 617 * @param Diag Specifies whether or not A is unit triangular. 618 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 619 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 620 * @param incX The increment for the elements of vector x, must be larger than zero. 621 */ 622 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 623 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 624 int N = A.getType().getY(); 625 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 626 } 627 628 /** 629 * CTRMV performs one of the matrix-vector operations 630 * x := A*x or x := A**T*x or x := A**H*x 631 * 632 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 633 * 634 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 635 * @param TransA The type of transpose applied to matrix A. 636 * @param Diag Specifies whether or not A is unit triangular. 637 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 638 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 639 * @param incX The increment for the elements of vector x, must be larger than zero. 640 */ 641 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 642 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 643 int N = A.getType().getY(); 644 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 645 } 646 647 /** 648 * ZTRMV performs one of the matrix-vector operations 649 * x := A*x or x := A**T*x or x := A**H*x 650 * 651 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 652 * 653 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 654 * @param TransA The type of transpose applied to matrix A. 655 * @param Diag Specifies whether or not A is unit triangular. 656 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 657 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 658 * @param incX The increment for the elements of vector x, must be larger than zero. 659 */ 660 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 661 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 662 int N = A.getType().getY(); 663 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 664 } 665 666 /** 667 * STBMV performs one of the matrix-vector operations 668 * x := A*x or x := A**T*x 669 * 670 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 671 * 672 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 673 * but only the region N*(K+1) will be referenced. The following subroutine can is an 674 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 675 * for i in range(0, n): 676 * for j in range(i, min(i+k+1, n)): 677 * b[i, j-i] = a[i, j] 678 * 679 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 680 * @param TransA The type of transpose applied to matrix A. 681 * @param Diag Specifies whether or not A is unit triangular. 682 * @param K The number of off-diagonals of the matrix A 683 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 684 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 685 * @param incX The increment for the elements of vector x, must be larger than zero. 686 */ 687 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 688 // TBMV has the same requirements as TRMV + K >= 0 689 if (K < 0) { 690 throw new RSRuntimeException("K must be greater than or equal to 0"); 691 } 692 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 693 int N = A.getType().getY(); 694 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 695 } 696 697 /** 698 * DTBMV performs one of the matrix-vector operations 699 * x := A*x or x := A**T*x 700 * 701 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 702 * 703 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 704 * but only the region N*(K+1) will be referenced. The following subroutine can is an 705 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 706 * for i in range(0, n): 707 * for j in range(i, min(i+k+1, n)): 708 * b[i, j-i] = a[i, j] 709 * 710 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 711 * @param TransA The type of transpose applied to matrix A. 712 * @param Diag Specifies whether or not A is unit triangular. 713 * @param K The number of off-diagonals of the matrix A 714 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 715 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 716 * @param incX The increment for the elements of vector x, must be larger than zero. 717 */ 718 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 719 // TBMV has the same requirements as TRMV + K >= 0 720 if (K < 0) { 721 throw new RSRuntimeException("K must be greater than or equal to 0"); 722 } 723 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 724 int N = A.getType().getY(); 725 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 726 } 727 728 /** 729 * CTBMV performs one of the matrix-vector operations 730 * x := A*x or x := A**T*x or x := A**H*x 731 * 732 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 733 * 734 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 735 * but only the region N*(K+1) will be referenced. The following subroutine can is an 736 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 737 * for i in range(0, n): 738 * for j in range(i, min(i+k+1, n)): 739 * b[i, j-i] = a[i, j] 740 * 741 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 742 * @param TransA The type of transpose applied to matrix A. 743 * @param Diag Specifies whether or not A is unit triangular. 744 * @param K The number of off-diagonals of the matrix A 745 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 746 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 747 * @param incX The increment for the elements of vector x, must be larger than zero. 748 */ 749 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 750 // TBMV has the same requirements as TRMV + K >= 0 751 if (K < 0) { 752 throw new RSRuntimeException("K must be greater than or equal to 0"); 753 } 754 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 755 int N = A.getType().getY(); 756 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 757 } 758 759 /** 760 * ZTBMV performs one of the matrix-vector operations 761 * x := A*x or x := A**T*x or x := A**H*x 762 * 763 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 764 * 765 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 766 * but only the region N*(K+1) will be referenced. The following subroutine can is an 767 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 768 * for i in range(0, n): 769 * for j in range(i, min(i+k+1, n)): 770 * b[i, j-i] = a[i, j] 771 * 772 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 773 * @param TransA The type of transpose applied to matrix A. 774 * @param Diag Specifies whether or not A is unit triangular. 775 * @param K The number of off-diagonals of the matrix A 776 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 777 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 778 * @param incX The increment for the elements of vector x, must be larger than zero. 779 */ 780 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 781 // TBMV has the same requirements as TRMV + K >= 0 782 if (K < 0) { 783 throw new RSRuntimeException("K must be greater than or equal to 0"); 784 } 785 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 786 int N = A.getType().getY(); 787 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 788 } 789 790 /** 791 * STPMV performs one of the matrix-vector operations 792 * x := A*x or x := A**T*x 793 * 794 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 795 * 796 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 797 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 798 * 'a' to packed matrix 'b'. 799 * k = 0 800 * for i in range(0, n): 801 * for j in range(i, n): 802 * b[k++] = a[i, j] 803 * 804 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 805 * @param TransA The type of transpose applied to matrix A. 806 * @param Diag Specifies whether or not A is unit triangular. 807 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 808 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 809 * @param incX The increment for the elements of vector x, must be larger than zero. 810 */ 811 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 812 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 813 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 814 } 815 816 /** 817 * DTPMV performs one of the matrix-vector operations 818 * x := A*x or x := A**T*x 819 * 820 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 821 * 822 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 823 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 824 * 'a' to packed matrix 'b'. 825 * k = 0 826 * for i in range(0, n): 827 * for j in range(i, n): 828 * b[k++] = a[i, j] 829 * 830 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 831 * @param TransA The type of transpose applied to matrix A. 832 * @param Diag Specifies whether or not A is unit triangular. 833 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 834 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 835 * @param incX The increment for the elements of vector x, must be larger than zero. 836 */ 837 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 838 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 839 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 840 } 841 842 /** 843 * CTPMV performs one of the matrix-vector operations 844 * x := A*x or x := A**T*x or x := A**H*x 845 * 846 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 847 * 848 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 849 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 850 * 'a' to packed matrix 'b'. 851 * k = 0 852 * for i in range(0, n): 853 * for j in range(i, n): 854 * b[k++] = a[i, j] 855 * 856 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 857 * @param TransA The type of transpose applied to matrix A. 858 * @param Diag Specifies whether or not A is unit triangular. 859 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 860 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 861 * @param incX The increment for the elements of vector x, must be larger than zero. 862 */ 863 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 864 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 865 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 866 } 867 868 /** 869 * ZTPMV performs one of the matrix-vector operations 870 * x := A*x or x := A**T*x or x := A**H*x 871 * 872 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 873 * 874 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 875 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 876 * 'a' to packed matrix 'b'. 877 * k = 0 878 * for i in range(0, n): 879 * for j in range(i, n): 880 * b[k++] = a[i, j] 881 * 882 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 883 * @param TransA The type of transpose applied to matrix A. 884 * @param Diag Specifies whether or not A is unit triangular. 885 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 886 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 887 * @param incX The increment for the elements of vector x, must be larger than zero. 888 */ 889 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 890 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 891 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 892 } 893 894 /** 895 * STRSV solves one of the systems of equations 896 * A*x = b or A**T*x = b 897 * 898 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 899 * 900 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 901 * @param TransA The type of transpose applied to matrix A. 902 * @param Diag Specifies whether or not A is unit triangular. 903 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 904 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 905 * @param incX The increment for the elements of vector x, must be larger than zero. 906 */ 907 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 908 // TRSV is the same as TRMV 909 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 910 int N = A.getType().getY(); 911 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 912 913 } 914 915 /** 916 * DTRSV solves one of the systems of equations 917 * A*x = b or A**T*x = b 918 * 919 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 920 * 921 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 922 * @param TransA The type of transpose applied to matrix A. 923 * @param Diag Specifies whether or not A is unit triangular. 924 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 925 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 926 * @param incX The increment for the elements of vector x, must be larger than zero. 927 */ 928 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 929 // TRSV is the same as TRMV 930 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 931 int N = A.getType().getY(); 932 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 933 934 } 935 936 /** 937 * CTRSV solves one of the systems of equations 938 * A*x = b or A**T*x = b or A**H*x = b 939 * 940 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 941 * 942 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 943 * @param TransA The type of transpose applied to matrix A. 944 * @param Diag Specifies whether or not A is unit triangular. 945 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 946 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 947 * @param incX The increment for the elements of vector x, must be larger than zero. 948 */ 949 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 950 // TRSV is the same as TRMV 951 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 952 int N = A.getType().getY(); 953 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 954 955 } 956 957 /** 958 * ZTRSV solves one of the systems of equations 959 * A*x = b or A**T*x = b or A**H*x = b 960 * 961 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 962 * 963 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 964 * @param TransA The type of transpose applied to matrix A. 965 * @param Diag Specifies whether or not A is unit triangular. 966 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 967 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 968 * @param incX The increment for the elements of vector x, must be larger than zero. 969 */ 970 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 971 // TRSV is the same as TRMV 972 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 973 int N = A.getType().getY(); 974 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 975 976 } 977 978 /** 979 * STBSV solves one of the systems of equations 980 * A*x = b or A**T*x = b 981 * 982 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 983 * 984 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 985 * but only the region N*(K+1) will be referenced. The following subroutine can is an 986 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 987 * for i in range(0, n): 988 * for j in range(i, min(i+k+1, n)): 989 * b[i, j-i] = a[i, j] 990 * 991 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 992 * @param TransA The type of transpose applied to matrix A. 993 * @param Diag Specifies whether or not A is unit triangular. 994 * @param K The number of off-diagonals of the matrix A 995 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 996 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 997 * @param incX The increment for the elements of vector x, must be larger than zero. 998 */ 999 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1000 // TBSV is the same as TRMV + K >= 0 1001 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1002 int N = A.getType().getY(); 1003 if (K < 0) { 1004 throw new RSRuntimeException("Number of diagonals must be positive"); 1005 } 1006 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1007 } 1008 1009 /** 1010 * DTBSV solves one of the systems of equations 1011 * A*x = b or A**T*x = b 1012 * 1013 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1014 * 1015 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1016 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1017 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1018 * for i in range(0, n): 1019 * for j in range(i, min(i+k+1, n)): 1020 * b[i, j-i] = a[i, j] 1021 * 1022 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1023 * @param TransA The type of transpose applied to matrix A. 1024 * @param Diag Specifies whether or not A is unit triangular. 1025 * @param K The number of off-diagonals of the matrix A 1026 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1027 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1028 * @param incX The increment for the elements of vector x, must be larger than zero. 1029 */ 1030 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1031 // TBSV is the same as TRMV + K >= 0 1032 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1033 int N = A.getType().getY(); 1034 if (K < 0) { 1035 throw new RSRuntimeException("Number of diagonals must be positive"); 1036 } 1037 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1038 } 1039 1040 /** 1041 * CTBSV solves one of the systems of equations 1042 * A*x = b or A**T*x = b or A**H*x = b 1043 * 1044 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1045 * 1046 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1047 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1048 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1049 * for i in range(0, n): 1050 * for j in range(i, min(i+k+1, n)): 1051 * b[i, j-i] = a[i, j] 1052 * 1053 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1054 * @param TransA The type of transpose applied to matrix A. 1055 * @param Diag Specifies whether or not A is unit triangular. 1056 * @param K The number of off-diagonals of the matrix A 1057 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1058 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1059 * @param incX The increment for the elements of vector x, must be larger than zero. 1060 */ 1061 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1062 // TBSV is the same as TRMV + K >= 0 1063 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1064 int N = A.getType().getY(); 1065 if (K < 0) { 1066 throw new RSRuntimeException("Number of diagonals must be positive"); 1067 } 1068 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1069 } 1070 1071 /** 1072 * ZTBSV solves one of the systems of equations 1073 * A*x = b or A**T*x = b or A**H*x = b 1074 * 1075 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1076 * 1077 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1078 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1079 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1080 * for i in range(0, n): 1081 * for j in range(i, min(i+k+1, n)): 1082 * b[i, j-i] = a[i, j] 1083 * 1084 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1085 * @param TransA The type of transpose applied to matrix A. 1086 * @param Diag Specifies whether or not A is unit triangular. 1087 * @param K The number of off-diagonals of the matrix A 1088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1089 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1090 * @param incX The increment for the elements of vector x, must be larger than zero. 1091 */ 1092 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1093 // TBSV is the same as TRMV + K >= 0 1094 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1095 int N = A.getType().getY(); 1096 if (K < 0) { 1097 throw new RSRuntimeException("Number of diagonals must be positive"); 1098 } 1099 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1100 } 1101 1102 /** 1103 * STPSV solves one of the systems of equations 1104 * A*x = b or A**T*x = b 1105 * 1106 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1107 * 1108 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1109 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1110 * 'a' to packed matrix 'b'. 1111 * k = 0 1112 * for i in range(0, n): 1113 * for j in range(i, n): 1114 * b[k++] = a[i, j] 1115 * 1116 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1117 * @param TransA The type of transpose applied to matrix A. 1118 * @param Diag Specifies whether or not A is unit triangular. 1119 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1120 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1121 * @param incX The increment for the elements of vector x, must be larger than zero. 1122 */ 1123 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1124 // TPSV is same as TPMV 1125 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1126 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1127 } 1128 1129 /** 1130 * DTPSV solves one of the systems of equations 1131 * A*x = b or A**T*x = b 1132 * 1133 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1134 * 1135 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1136 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1137 * 'a' to packed matrix 'b'. 1138 * k = 0 1139 * for i in range(0, n): 1140 * for j in range(i, n): 1141 * b[k++] = a[i, j] 1142 * 1143 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1144 * @param TransA The type of transpose applied to matrix A. 1145 * @param Diag Specifies whether or not A is unit triangular. 1146 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1147 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1148 * @param incX The increment for the elements of vector x, must be larger than zero. 1149 */ 1150 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1151 // TPSV is same as TPMV 1152 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1153 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1154 } 1155 1156 /** 1157 * CTPSV solves one of the systems of equations 1158 * A*x = b or A**T*x = b or A**H*x = b 1159 * 1160 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1161 * 1162 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1163 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1164 * 'a' to packed matrix 'b'. 1165 * k = 0 1166 * for i in range(0, n): 1167 * for j in range(i, n): 1168 * b[k++] = a[i, j] 1169 * 1170 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1171 * @param TransA The type of transpose applied to matrix A. 1172 * @param Diag Specifies whether or not A is unit triangular. 1173 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1174 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1175 * @param incX The increment for the elements of vector x, must be larger than zero. 1176 */ 1177 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1178 // TPSV is same as TPMV 1179 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1180 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1181 } 1182 1183 /** 1184 * ZTPSV solves one of the systems of equations 1185 * A*x = b or A**T*x = b or A**H*x = b 1186 * 1187 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1188 * 1189 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1190 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1191 * 'a' to packed matrix 'b'. 1192 * k = 0 1193 * for i in range(0, n): 1194 * for j in range(i, n): 1195 * b[k++] = a[i, j] 1196 * 1197 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1198 * @param TransA The type of transpose applied to matrix A. 1199 * @param Diag Specifies whether or not A is unit triangular. 1200 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1201 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1202 * @param incX The increment for the elements of vector x, must be larger than zero. 1203 */ 1204 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1205 // TPSV is same as TPMV 1206 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1207 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1208 } 1209 1210 /** 1211 * Level 2, S and D only 1212 */ 1213 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1214 validateUplo(Uplo); 1215 int N = A.getType().getY(); 1216 if (A.getType().getX() != N) { 1217 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1218 } 1219 if (!A.getType().getElement().isCompatible(e) || 1220 !X.getType().getElement().isCompatible(e) || 1221 !Y.getType().getElement().isCompatible(e) ) { 1222 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1223 } 1224 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1225 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1226 } 1227 1228 if (incX <= 0 || incY <= 0) { 1229 throw new RSRuntimeException("Vector increments must be greater than 0"); 1230 } 1231 int expectedXDim = 1 + (N - 1) * incX; 1232 if (X.getType().getX() != expectedXDim) { 1233 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1234 } 1235 int expectedYDim = 1 + (N - 1) * incY; 1236 if (Y.getType().getX() != expectedYDim) { 1237 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1238 } 1239 return N; 1240 } 1241 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1242 validateUplo(Uplo); 1243 if (!Ap.getType().getElement().isCompatible(e) || 1244 !X.getType().getElement().isCompatible(e) || 1245 !Y.getType().getElement().isCompatible(e)) { 1246 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1247 } 1248 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1249 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1250 } 1251 1252 if (Ap.getType().getY() > 1) { 1253 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1254 } 1255 1256 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1257 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1258 throw new RSRuntimeException("Invalid dimension for Ap"); 1259 } 1260 if (incX <= 0 || incY <= 0) { 1261 throw new RSRuntimeException("Vector increments must be greater than 0"); 1262 } 1263 int expectedXDim = 1 + (N - 1) * incX; 1264 if (X.getType().getX() != expectedXDim) { 1265 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1266 } 1267 int expectedYDim = 1 + (N - 1) * incY; 1268 if (Y.getType().getX() != expectedYDim) { 1269 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1270 } 1271 1272 return N; 1273 } 1274 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1275 if (!A.getType().getElement().isCompatible(e) || 1276 !X.getType().getElement().isCompatible(e) || 1277 !Y.getType().getElement().isCompatible(e) ) { 1278 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1279 } 1280 1281 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1282 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1283 } 1284 1285 int M = A.getType().getY(); 1286 int N = A.getType().getX(); 1287 1288 if (N < 1 || M < 1) { 1289 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1290 } 1291 if (incX <= 0 || incY <= 0) { 1292 throw new RSRuntimeException("Vector increments must be greater than 0"); 1293 } 1294 int expectedXDim = 1 + (M - 1) * incX; 1295 if (X.getType().getX() != expectedXDim) { 1296 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1297 } 1298 int expectedYDim = 1 + (N - 1) * incY; 1299 if (Y.getType().getX() != expectedYDim) { 1300 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1301 } 1302 1303 1304 } 1305 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1306 validateUplo(Uplo); 1307 if (!A.getType().getElement().isCompatible(e) || 1308 !X.getType().getElement().isCompatible(e)) { 1309 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1310 } 1311 1312 int N = A.getType().getX(); 1313 1314 if (X.getType().getY() > 1) { 1315 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1316 } 1317 if (N != A.getType().getY()) { 1318 throw new RSRuntimeException("A must be a symmetric matrix"); 1319 } 1320 if (incX <= 0) { 1321 throw new RSRuntimeException("Vector increments must be greater than 0"); 1322 } 1323 int expectedXDim = 1 + (N - 1) * incX; 1324 if (X.getType().getX() != expectedXDim) { 1325 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1326 } 1327 return N; 1328 } 1329 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1330 validateUplo(Uplo); 1331 if (!Ap.getType().getElement().isCompatible(e) || 1332 !X.getType().getElement().isCompatible(e)) { 1333 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1334 } 1335 if (X.getType().getY() > 1) { 1336 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1337 } 1338 1339 if (Ap.getType().getY() > 1) { 1340 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1341 } 1342 1343 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1344 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1345 throw new RSRuntimeException("Invalid dimension for Ap"); 1346 } 1347 if (incX <= 0) { 1348 throw new RSRuntimeException("Vector increments must be greater than 0"); 1349 } 1350 int expectedXDim = 1 + (N - 1) * incX; 1351 if (X.getType().getX() != expectedXDim) { 1352 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1353 } 1354 1355 return N; 1356 } 1357 1358 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1359 validateUplo(Uplo); 1360 if (!A.getType().getElement().isCompatible(e) || 1361 !X.getType().getElement().isCompatible(e) || 1362 !Y.getType().getElement().isCompatible(e)) { 1363 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1364 } 1365 1366 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1367 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1368 } 1369 1370 int N = A.getType().getX(); 1371 1372 if (N != A.getType().getY()) { 1373 throw new RSRuntimeException("A must be a symmetric matrix"); 1374 } 1375 if (incX <= 0 || incY <= 0) { 1376 throw new RSRuntimeException("Vector increments must be greater than 0"); 1377 } 1378 int expectedXDim = 1 + (N - 1) * incX; 1379 int expectedYDim = 1 + (N - 1) * incY; 1380 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1381 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1382 } 1383 return N; 1384 1385 } 1386 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1387 validateUplo(Uplo); 1388 if (!Ap.getType().getElement().isCompatible(e) || 1389 !X.getType().getElement().isCompatible(e) || 1390 !Y.getType().getElement().isCompatible(e)) { 1391 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1392 } 1393 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1394 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1395 } 1396 1397 if (Ap.getType().getY() > 1) { 1398 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1399 } 1400 1401 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1402 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1403 throw new RSRuntimeException("Invalid dimension for Ap"); 1404 } 1405 if (incX <= 0 || incY <= 0) { 1406 throw new RSRuntimeException("Vector increments must be greater than 0"); 1407 } 1408 int expectedXDim = 1 + (N - 1) * incX; 1409 int expectedYDim = 1 + (N - 1) * incY; 1410 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1411 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1412 } 1413 1414 return N; 1415 } 1416 1417 /** 1418 * SSYMV performs the matrix-vector operation 1419 * y := alpha*A*x + beta*y 1420 * 1421 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1422 * 1423 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1424 * @param alpha The scalar alpha. 1425 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1426 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1427 * @param incX The increment for the elements of vector x, must be larger than zero. 1428 * @param beta The scalar beta. 1429 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1430 * @param incY The increment for the elements of vector y, must be larger than zero. 1431 */ 1432 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1433 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1434 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1435 } 1436 1437 /** 1438 * SSBMV performs the matrix-vector operation 1439 * y := alpha*A*x + beta*y 1440 * 1441 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1442 * 1443 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1444 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1445 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1446 * for i in range(0, n): 1447 * for j in range(i, min(i+k+1, n)): 1448 * b[i, j-i] = a[i, j] 1449 * 1450 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1451 * @param K The number of off-diagonals of the matrix A 1452 * @param alpha The scalar alpha. 1453 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1454 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1455 * @param incX The increment for the elements of vector x, must be larger than zero. 1456 * @param beta The scalar beta. 1457 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1458 * @param incY The increment for the elements of vector y, must be larger than zero. 1459 */ 1460 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1461 // SBMV is the same as SYMV + K >= 0 1462 if (K < 0) { 1463 throw new RSRuntimeException("K must be greater than or equal to 0"); 1464 } 1465 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1466 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1467 } 1468 1469 /** 1470 * SSPMV performs the matrix-vector operation 1471 * y := alpha*A*x + beta*y 1472 * 1473 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1474 * 1475 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1476 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1477 * 'a' to packed matrix 'b'. 1478 * k = 0 1479 * for i in range(0, n): 1480 * for j in range(i, n): 1481 * b[k++] = a[i, j] 1482 * 1483 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1484 * @param alpha The scalar alpha. 1485 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1486 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1487 * @param incX The increment for the elements of vector x, must be larger than zero. 1488 * @param beta The scalar beta. 1489 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1490 * @param incY The increment for the elements of vector y, must be larger than zero. 1491 */ 1492 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1493 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1495 } 1496 1497 /** 1498 * SGER performs the rank 1 operation 1499 * A := alpha*x*y**T + A 1500 * 1501 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1502 * 1503 * @param alpha The scalar alpha. 1504 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1505 * @param incX The increment for the elements of vector x, must be larger than zero. 1506 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1507 * @param incY The increment for the elements of vector y, must be larger than zero. 1508 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1509 */ 1510 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1511 int M = A.getType().getY(); 1512 int N = A.getType().getX(); 1513 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1514 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1515 } 1516 1517 /** 1518 * SSYR performs the rank 1 operation 1519 * A := alpha*x*x**T + A 1520 * 1521 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1522 * 1523 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1524 * @param alpha The scalar alpha. 1525 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1526 * @param incX The increment for the elements of vector x, must be larger than zero. 1527 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1528 */ 1529 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1530 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1531 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1532 } 1533 1534 /** 1535 * SSPR performs the rank 1 operation 1536 * A := alpha*x*x**T + A 1537 * 1538 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1539 * 1540 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1541 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1542 * 'a' to packed matrix 'b'. 1543 * k = 0 1544 * for i in range(0, n): 1545 * for j in range(i, n): 1546 * b[k++] = a[i, j] 1547 * 1548 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1549 * @param alpha The scalar alpha. 1550 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1551 * @param incX The increment for the elements of vector x, must be larger than zero. 1552 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1553 */ 1554 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1555 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1556 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1557 } 1558 1559 /** 1560 * SSYR2 performs the symmetric rank 2 operation 1561 * A := alpha*x*y**T + alpha*y*x**T + A 1562 * 1563 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1564 * 1565 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1566 * @param alpha The scalar alpha. 1567 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1568 * @param incX The increment for the elements of vector x, must be larger than zero. 1569 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1570 * @param incY The increment for the elements of vector y, must be larger than zero. 1571 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1572 */ 1573 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1574 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1575 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1576 } 1577 1578 /** 1579 * SSPR2 performs the symmetric rank 2 operation 1580 * A := alpha*x*y**T + alpha*y*x**T + A 1581 * 1582 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1583 * 1584 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1585 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1586 * 'a' to packed matrix 'b'. 1587 * k = 0 1588 * for i in range(0, n): 1589 * for j in range(i, n): 1590 * b[k++] = a[i, j] 1591 * 1592 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1593 * @param alpha The scalar alpha. 1594 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1595 * @param incX The increment for the elements of vector x, must be larger than zero. 1596 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1597 * @param incY The increment for the elements of vector y, must be larger than zero. 1598 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1599 */ 1600 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1601 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1602 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1603 } 1604 1605 /** 1606 * DSYMV performs the matrix-vector operation 1607 * y := alpha*A*x + beta*y 1608 * 1609 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1610 * 1611 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1612 * @param alpha The scalar alpha. 1613 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1614 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1615 * @param incX The increment for the elements of vector x, must be larger than zero. 1616 * @param beta The scalar beta. 1617 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1618 * @param incY The increment for the elements of vector y, must be larger than zero. 1619 */ 1620 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1621 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1622 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1623 } 1624 1625 /** 1626 * DSBMV performs the matrix-vector operation 1627 * y := alpha*A*x + beta*y 1628 * 1629 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1630 * 1631 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1632 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1633 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1634 * for i in range(0, n): 1635 * for j in range(i, min(i+k+1, n)): 1636 * b[i, j-i] = a[i, j] 1637 * 1638 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1639 * @param K The number of off-diagonals of the matrix A 1640 * @param alpha The scalar alpha. 1641 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1642 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1643 * @param incX The increment for the elements of vector x, must be larger than zero. 1644 * @param beta The scalar beta. 1645 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1646 * @param incY The increment for the elements of vector y, must be larger than zero. 1647 */ 1648 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1649 // SBMV is the same as SYMV + K >= 0 1650 if (K < 0) { 1651 throw new RSRuntimeException("K must be greater than or equal to 0"); 1652 } 1653 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1654 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1655 } 1656 1657 /** 1658 * DSPMV performs the matrix-vector operation 1659 * y := alpha*A*x + beta*y 1660 * 1661 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 1662 * 1663 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1664 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1665 * 'a' to packed matrix 'b'. 1666 * k = 0 1667 * for i in range(0, n): 1668 * for j in range(i, n): 1669 * b[k++] = a[i, j] 1670 * 1671 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1672 * @param alpha The scalar alpha. 1673 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1674 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1675 * @param incX The increment for the elements of vector x, must be larger than zero. 1676 * @param beta The scalar beta. 1677 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1678 * @param incY The increment for the elements of vector y, must be larger than zero. 1679 */ 1680 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 1681 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 1682 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1683 } 1684 1685 /** 1686 * DGER performs the rank 1 operation 1687 * A := alpha*x*y**T + A 1688 * 1689 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 1690 * 1691 * @param alpha The scalar alpha. 1692 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1693 * @param incX The increment for the elements of vector x, must be larger than zero. 1694 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1695 * @param incY The increment for the elements of vector y, must be larger than zero. 1696 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1697 */ 1698 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1699 int M = A.getType().getY(); 1700 int N = A.getType().getX(); 1701 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 1702 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1703 } 1704 1705 /** 1706 * DSYR performs the rank 1 operation 1707 * A := alpha*x*x**T + A 1708 * 1709 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 1710 * 1711 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1712 * @param alpha The scalar alpha. 1713 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1714 * @param incX The increment for the elements of vector x, must be larger than zero. 1715 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1716 */ 1717 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 1718 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 1719 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1720 } 1721 1722 /** 1723 * DSPR performs the rank 1 operation 1724 * A := alpha*x*x**T + A 1725 * 1726 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 1727 * 1728 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1729 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1730 * 'a' to packed matrix 'b'. 1731 * k = 0 1732 * for i in range(0, n): 1733 * for j in range(i, n): 1734 * b[k++] = a[i, j] 1735 * 1736 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1737 * @param alpha The scalar alpha. 1738 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1739 * @param incX The increment for the elements of vector x, must be larger than zero. 1740 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1741 */ 1742 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 1743 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 1744 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1745 } 1746 1747 /** 1748 * DSYR2 performs the symmetric rank 2 operation 1749 * A := alpha*x*y**T + alpha*y*x**T + A 1750 * 1751 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 1752 * 1753 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1754 * @param alpha The scalar alpha. 1755 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1756 * @param incX The increment for the elements of vector x, must be larger than zero. 1757 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1758 * @param incY The increment for the elements of vector y, must be larger than zero. 1759 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1760 */ 1761 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1762 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 1763 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1764 } 1765 1766 /** 1767 * DSPR2 performs the symmetric rank 2 operation 1768 * A := alpha*x*y**T + alpha*y*x**T + A 1769 * 1770 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 1771 * 1772 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1773 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1774 * 'a' to packed matrix 'b'. 1775 * k = 0 1776 * for i in range(0, n): 1777 * for j in range(i, n): 1778 * b[k++] = a[i, j] 1779 * 1780 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1781 * @param alpha The scalar alpha. 1782 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1783 * @param incX The increment for the elements of vector x, must be larger than zero. 1784 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1785 * @param incY The increment for the elements of vector y, must be larger than zero. 1786 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1787 */ 1788 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1789 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 1790 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1791 } 1792 1793 1794 /** 1795 * Level 2, C and Z only 1796 */ 1797 1798 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1799 if (!A.getType().getElement().isCompatible(e) || 1800 !X.getType().getElement().isCompatible(e) || 1801 !Y.getType().getElement().isCompatible(e)) { 1802 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1803 } 1804 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1805 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1806 } 1807 1808 int M = A.getType().getY(); 1809 int N = A.getType().getX(); 1810 if (incX <= 0 || incY <= 0) { 1811 throw new RSRuntimeException("Vector increments must be greater than 0"); 1812 } 1813 int expectedXDim = 1 + (M - 1) * incX; 1814 if (X.getType().getX() != expectedXDim) { 1815 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1816 } 1817 int expectedYDim = 1 + (N - 1) * incY; 1818 if (Y.getType().getX() != expectedYDim) { 1819 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1820 } 1821 1822 } 1823 1824 /** 1825 * CHEMV performs the matrix-vector operation 1826 * y := alpha*A*x + beta*y 1827 * 1828 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 1829 * 1830 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1831 * @param alpha The scalar alpha. 1832 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1833 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1834 * @param incX The increment for the elements of vector x, must be larger than zero. 1835 * @param beta The scalar beta. 1836 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1837 * @param incY The increment for the elements of vector y, must be larger than zero. 1838 */ 1839 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1840 // HEMV is the same as SYR2 validation-wise 1841 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1842 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1843 } 1844 1845 /** 1846 * CHBMV performs the matrix-vector operation 1847 * y := alpha*A*x + beta*y 1848 * 1849 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 1850 * 1851 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1852 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1853 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1854 * for i in range(0, n): 1855 * for j in range(i, min(i+k+1, n)): 1856 * b[i, j-i] = a[i, j] 1857 * 1858 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1859 * @param K The number of off-diagonals of the matrix A 1860 * @param alpha The scalar alpha. 1861 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1862 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1863 * @param incX The increment for the elements of vector x, must be larger than zero. 1864 * @param beta The scalar beta. 1865 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1866 * @param incY The increment for the elements of vector y, must be larger than zero. 1867 */ 1868 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1869 // HBMV is the same as SYR2 validation-wise 1870 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1871 if (K < 0) { 1872 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 1873 } 1874 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1875 } 1876 1877 /** 1878 * CHPMV performs the matrix-vector operation 1879 * y := alpha*A*x + beta*y 1880 * 1881 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 1882 * 1883 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1884 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1885 * 'a' to packed matrix 'b'. 1886 * k = 0 1887 * for i in range(0, n): 1888 * for j in range(i, n): 1889 * b[k++] = a[i, j] 1890 * 1891 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1892 * @param alpha The scalar alpha. 1893 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1894 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1895 * @param incX The increment for the elements of vector x, must be larger than zero. 1896 * @param beta The scalar beta. 1897 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1898 * @param incY The increment for the elements of vector y, must be larger than zero. 1899 */ 1900 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1901 // HPMV is the same as SPR2 1902 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 1903 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1904 } 1905 1906 /** 1907 * CGERU performs the rank 1 operation 1908 * A := alpha*x*y**T + A 1909 * 1910 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 1911 * 1912 * @param alpha The scalar alpha. 1913 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1914 * @param incX The increment for the elements of vector x, must be larger than zero. 1915 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1916 * @param incY The increment for the elements of vector y, must be larger than zero. 1917 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1918 */ 1919 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1920 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1921 int M = A.getType().getY(); 1922 int N = A.getType().getX(); 1923 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1924 } 1925 1926 /** 1927 * CGERC performs the rank 1 operation 1928 * A := alpha*x*y**H + A 1929 * 1930 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 1931 * 1932 * @param alpha The scalar alpha. 1933 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1934 * @param incX The increment for the elements of vector x, must be larger than zero. 1935 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1936 * @param incY The increment for the elements of vector y, must be larger than zero. 1937 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1938 */ 1939 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1940 // same as GERU 1941 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1942 int M = A.getType().getY(); 1943 int N = A.getType().getX(); 1944 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1945 } 1946 1947 /** 1948 * CHER performs the rank 1 operation 1949 * A := alpha*x*x**H + A 1950 * 1951 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 1952 * 1953 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1954 * @param alpha The scalar alpha. 1955 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1956 * @param incX The increment for the elements of vector x, must be larger than zero. 1957 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1958 */ 1959 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1960 // same as SYR 1961 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 1962 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 1963 } 1964 1965 /** 1966 * CHPR performs the rank 1 operation 1967 * A := alpha*x*x**H + A 1968 * 1969 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 1970 * 1971 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1972 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1973 * 'a' to packed matrix 'b'. 1974 * k = 0 1975 * for i in range(0, n): 1976 * for j in range(i, n): 1977 * b[k++] = a[i, j] 1978 * 1979 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1980 * @param alpha The scalar alpha. 1981 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1982 * @param incX The increment for the elements of vector x, must be larger than zero. 1983 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1984 */ 1985 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1986 // equivalent to SPR for validation 1987 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 1988 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 1989 } 1990 1991 /** 1992 * CHER2 performs the symmetric rank 2 operation 1993 * A := alpha*x*y**H + alpha*y*x**H + A 1994 * 1995 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 1996 * 1997 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1998 * @param alpha The scalar alpha. 1999 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2000 * @param incX The increment for the elements of vector x, must be larger than zero. 2001 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2002 * @param incY The increment for the elements of vector y, must be larger than zero. 2003 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2004 */ 2005 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2006 // same as SYR2 2007 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2008 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2009 } 2010 2011 /** 2012 * CHPR2 performs the symmetric rank 2 operation 2013 * A := alpha*x*y**H + alpha*y*x**H + A 2014 * 2015 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2016 * 2017 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2018 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2019 * 'a' to packed matrix 'b'. 2020 * k = 0 2021 * for i in range(0, n): 2022 * for j in range(i, n): 2023 * b[k++] = a[i, j] 2024 * 2025 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2026 * @param alpha The scalar alpha. 2027 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2028 * @param incX The increment for the elements of vector x, must be larger than zero. 2029 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2030 * @param incY The increment for the elements of vector y, must be larger than zero. 2031 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2032 */ 2033 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2034 // same as SPR2 2035 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2036 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2037 } 2038 2039 /** 2040 * ZHEMV performs the matrix-vector operation 2041 * y := alpha*A*x + beta*y 2042 * 2043 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2044 * 2045 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2046 * @param alpha The scalar alpha. 2047 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2048 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2049 * @param incX The increment for the elements of vector x, must be larger than zero. 2050 * @param beta The scalar beta. 2051 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2052 * @param incY The increment for the elements of vector y, must be larger than zero. 2053 */ 2054 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2055 // HEMV is the same as SYR2 validation-wise 2056 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2057 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2058 } 2059 2060 /** 2061 * ZHBMV performs the matrix-vector operation 2062 * y := alpha*A*x + beta*y 2063 * 2064 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2065 * 2066 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2067 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2068 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2069 * for i in range(0, n): 2070 * for j in range(i, min(i+k+1, n)): 2071 * b[i, j-i] = a[i, j] 2072 * 2073 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2074 * @param K The number of off-diagonals of the matrix A 2075 * @param alpha The scalar alpha. 2076 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2077 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2078 * @param incX The increment for the elements of vector x, must be larger than zero. 2079 * @param beta The scalar beta. 2080 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2081 * @param incY The increment for the elements of vector y, must be larger than zero. 2082 */ 2083 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2084 // HBMV is the same as SYR2 validation-wise 2085 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2086 if (K < 0) { 2087 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2088 } 2089 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2090 } 2091 2092 /** 2093 * ZHPMV performs the matrix-vector operation 2094 * y := alpha*A*x + beta*y 2095 * 2096 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2097 * 2098 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2099 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2100 * 'a' to packed matrix 'b'. 2101 * k = 0 2102 * for i in range(0, n): 2103 * for j in range(i, n): 2104 * b[k++] = a[i, j] 2105 * 2106 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2107 * @param alpha The scalar alpha. 2108 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2109 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2110 * @param incX The increment for the elements of vector x, must be larger than zero. 2111 * @param beta The scalar beta. 2112 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2113 * @param incY The increment for the elements of vector y, must be larger than zero. 2114 */ 2115 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2116 // HPMV is the same as SPR2 2117 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2118 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2119 } 2120 2121 /** 2122 * ZGERU performs the rank 1 operation 2123 * A := alpha*x*y**T + A 2124 * 2125 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2126 * 2127 * @param alpha The scalar alpha. 2128 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2129 * @param incX The increment for the elements of vector x, must be larger than zero. 2130 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2131 * @param incY The increment for the elements of vector y, must be larger than zero. 2132 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2133 */ 2134 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2135 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2136 int M = A.getType().getY(); 2137 int N = A.getType().getX(); 2138 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2139 } 2140 2141 /** 2142 * ZGERC performs the rank 1 operation 2143 * A := alpha*x*y**H + A 2144 * 2145 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2146 * 2147 * @param alpha The scalar alpha. 2148 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2149 * @param incX The increment for the elements of vector x, must be larger than zero. 2150 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2151 * @param incY The increment for the elements of vector y, must be larger than zero. 2152 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2153 */ 2154 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2155 // same as GERU 2156 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2157 int M = A.getType().getY(); 2158 int N = A.getType().getX(); 2159 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2160 } 2161 2162 /** 2163 * ZHER performs the rank 1 operation 2164 * A := alpha*x*x**H + A 2165 * 2166 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2167 * 2168 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2169 * @param alpha The scalar alpha. 2170 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2171 * @param incX The increment for the elements of vector x, must be larger than zero. 2172 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2173 */ 2174 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2175 // same as SYR 2176 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2177 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 2178 } 2179 2180 /** 2181 * ZHPR performs the rank 1 operation 2182 * A := alpha*x*x**H + A 2183 * 2184 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2185 * 2186 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2187 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2188 * 'a' to packed matrix 'b'. 2189 * k = 0 2190 * for i in range(0, n): 2191 * for j in range(i, n): 2192 * b[k++] = a[i, j] 2193 * 2194 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2195 * @param alpha The scalar alpha. 2196 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2197 * @param incX The increment for the elements of vector x, must be larger than zero. 2198 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2199 */ 2200 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2201 // equivalent to SPR for validation 2202 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2203 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 2204 } 2205 2206 /** 2207 * ZHER2 performs the symmetric rank 2 operation 2208 * A := alpha*x*y**H + alpha*y*x**H + A 2209 * 2210 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2211 * 2212 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2213 * @param alpha The scalar alpha. 2214 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2215 * @param incX The increment for the elements of vector x, must be larger than zero. 2216 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2217 * @param incY The increment for the elements of vector y, must be larger than zero. 2218 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2219 */ 2220 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2221 // same as SYR2 2222 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2223 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2224 } 2225 2226 /** 2227 * ZHPR2 performs the symmetric rank 2 operation 2228 * A := alpha*x*y**H + alpha*y*x**H + A 2229 * 2230 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2231 * 2232 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2233 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2234 * 'a' to packed matrix 'b'. 2235 * k = 0 2236 * for i in range(0, n): 2237 * for j in range(i, n): 2238 * b[k++] = a[i, j] 2239 * 2240 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2241 * @param alpha The scalar alpha. 2242 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2243 * @param incX The increment for the elements of vector x, must be larger than zero. 2244 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2245 * @param incY The increment for the elements of vector y, must be larger than zero. 2246 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2247 */ 2248 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2249 // same as SPR2 2250 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2251 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2252 } 2253 2254 2255 /** 2256 * Level 3 BLAS 2257 */ 2258 2259 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2260 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2261 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2262 (B != null && !B.getType().getElement().isCompatible(e)) || 2263 (C != null && !C.getType().getElement().isCompatible(e))) { 2264 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2265 } 2266 if (C == null) { 2267 //since matrix C is used to store the result, it cannot be null. 2268 throw new RSRuntimeException("Allocation C cannot be null"); 2269 } 2270 cM = C.getType().getY(); 2271 cN = C.getType().getX(); 2272 2273 if (Side == RIGHT) { 2274 if ((A == null && B != null) || (A != null && B == null)) { 2275 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2276 } 2277 if (B != null) { 2278 bM = A.getType().getY(); 2279 bN = A.getType().getX(); 2280 } 2281 if (A != null) { 2282 aM = B.getType().getY(); 2283 aN = B.getType().getX(); 2284 } 2285 } else { 2286 if (A != null) { 2287 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2288 aN = A.getType().getY(); 2289 aM = A.getType().getX(); 2290 } else { 2291 aM = A.getType().getY(); 2292 aN = A.getType().getX(); 2293 } 2294 } 2295 if (B != null) { 2296 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2297 bN = B.getType().getY(); 2298 bM = B.getType().getX(); 2299 } else { 2300 bM = B.getType().getY(); 2301 bN = B.getType().getX(); 2302 } 2303 } 2304 } 2305 if (A != null && B != null && C != null) { 2306 if (aN != bM || aM != cM || bN != cN) { 2307 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2308 } 2309 } else if (A != null && C != null) { 2310 // A and C only, for SYRK 2311 if (cM != cN) { 2312 throw new RSRuntimeException("Matrix C is not symmetric"); 2313 } 2314 if (aM != cM) { 2315 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2316 } 2317 } else if (A != null && B != null) { 2318 // A and B only 2319 if (aN != bM) { 2320 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2321 } 2322 } 2323 2324 } 2325 2326 /** 2327 * SGEMM performs one of the matrix-matrix operations 2328 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2329 * 2330 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2331 * 2332 * @param TransA The type of transpose applied to matrix A. 2333 * @param TransB The type of transpose applied to matrix B. 2334 * @param alpha The scalar alpha. 2335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2336 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2337 * @param beta The scalar beta. 2338 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2339 */ 2340 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2341 Allocation B, float beta, Allocation C) { 2342 validateTranspose(TransA); 2343 validateTranspose(TransB); 2344 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2345 2346 int M = -1, N = -1, K = -1; 2347 if (TransA != NO_TRANSPOSE) { 2348 M = A.getType().getX(); 2349 K = A.getType().getY(); 2350 } else { 2351 M = A.getType().getY(); 2352 K = A.getType().getX(); 2353 } 2354 if (TransB != NO_TRANSPOSE) { 2355 N = B.getType().getY(); 2356 } else { 2357 N = B.getType().getX(); 2358 } 2359 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2360 beta, C.getID(mRS), 0, 0, 0, 0); 2361 } 2362 2363 /** 2364 * DGEMM performs one of the matrix-matrix operations 2365 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2366 * 2367 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2368 * 2369 * @param TransA The type of transpose applied to matrix A. 2370 * @param TransB The type of transpose applied to matrix B. 2371 * @param alpha The scalar alpha. 2372 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2373 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2374 * @param beta The scalar beta. 2375 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2376 */ 2377 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2378 Allocation B, double beta, Allocation C) { 2379 validateTranspose(TransA); 2380 validateTranspose(TransB); 2381 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2382 int M = -1, N = -1, K = -1; 2383 if (TransA != NO_TRANSPOSE) { 2384 M = A.getType().getX(); 2385 K = A.getType().getY(); 2386 } else { 2387 M = A.getType().getY(); 2388 K = A.getType().getX(); 2389 } 2390 if (TransB != NO_TRANSPOSE) { 2391 N = B.getType().getY(); 2392 } else { 2393 N = B.getType().getX(); 2394 } 2395 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2396 beta, C.getID(mRS), 0, 0, 0, 0); 2397 } 2398 2399 /** 2400 * CGEMM performs one of the matrix-matrix operations 2401 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2402 * 2403 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 2404 * 2405 * @param TransA The type of transpose applied to matrix A. 2406 * @param TransB The type of transpose applied to matrix B. 2407 * @param alpha The scalar alpha. 2408 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2409 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2410 * @param beta The scalar beta. 2411 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2412 */ 2413 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 2414 Allocation B, Float2 beta, Allocation C) { 2415 validateTranspose(TransA); 2416 validateTranspose(TransB); 2417 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 2418 int M = -1, N = -1, K = -1; 2419 if (TransA != NO_TRANSPOSE) { 2420 M = A.getType().getX(); 2421 K = A.getType().getY(); 2422 } else { 2423 M = A.getType().getY(); 2424 K = A.getType().getX(); 2425 } 2426 if (TransB != NO_TRANSPOSE) { 2427 N = B.getType().getY(); 2428 } else { 2429 N = B.getType().getX(); 2430 } 2431 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2432 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2433 } 2434 2435 /** 2436 * ZGEMM performs one of the matrix-matrix operations 2437 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2438 * 2439 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 2440 * 2441 * @param TransA The type of transpose applied to matrix A. 2442 * @param TransB The type of transpose applied to matrix B. 2443 * @param alpha The scalar alpha. 2444 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2445 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2446 * @param beta The scalar beta. 2447 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2448 */ 2449 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 2450 Allocation B, Double2 beta, Allocation C) { 2451 validateTranspose(TransA); 2452 validateTranspose(TransB); 2453 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 2454 int M = -1, N = -1, K = -1; 2455 if (TransA != NO_TRANSPOSE) { 2456 M = A.getType().getX(); 2457 K = A.getType().getY(); 2458 } else { 2459 M = A.getType().getY(); 2460 K = A.getType().getX(); 2461 } 2462 if (TransB != NO_TRANSPOSE) { 2463 N = B.getType().getY(); 2464 } else { 2465 N = B.getType().getX(); 2466 } 2467 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2468 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2469 } 2470 2471 /** 2472 * SSYMM performs one of the matrix-matrix operations 2473 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2474 * 2475 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 2476 * 2477 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2478 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2479 * @param alpha The scalar alpha. 2480 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2481 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2482 * @param beta The scalar beta. 2483 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2484 */ 2485 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 2486 Allocation B, float beta, Allocation C) { 2487 validateSide(Side); 2488 validateUplo(Uplo); 2489 //For SYMM, Matrix A should be symmetric 2490 if (A.getType().getX() != A.getType().getY()) { 2491 throw new RSRuntimeException("Matrix A is not symmetric"); 2492 } 2493 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 2494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2495 beta, C.getID(mRS), 0, 0, 0, 0); 2496 } 2497 2498 /** 2499 * DSYMM performs one of the matrix-matrix operations 2500 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2501 * 2502 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 2503 * 2504 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2505 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2506 * @param alpha The scalar alpha. 2507 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2508 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2509 * @param beta The scalar beta. 2510 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2511 */ 2512 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 2513 Allocation B, double beta, Allocation C) { 2514 validateSide(Side); 2515 validateUplo(Uplo); 2516 if (A.getType().getX() != A.getType().getY()) { 2517 throw new RSRuntimeException("Matrix A is not symmetric"); 2518 } 2519 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 2520 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2521 beta, C.getID(mRS), 0, 0, 0, 0); 2522 } 2523 2524 /** 2525 * CSYMM performs one of the matrix-matrix operations 2526 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2527 * 2528 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 2529 * 2530 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2531 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2532 * @param alpha The scalar alpha. 2533 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2534 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2535 * @param beta The scalar beta. 2536 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2537 */ 2538 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 2539 Allocation B, Float2 beta, Allocation C) { 2540 validateSide(Side); 2541 validateUplo(Uplo); 2542 if (A.getType().getX() != A.getType().getY()) { 2543 throw new RSRuntimeException("Matrix A is not symmetric"); 2544 } 2545 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 2546 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2547 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2548 } 2549 2550 /** 2551 * ZSYMM performs one of the matrix-matrix operations 2552 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2553 * 2554 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 2555 * 2556 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2557 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2558 * @param alpha The scalar alpha. 2559 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2560 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2561 * @param beta The scalar beta. 2562 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2563 */ 2564 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 2565 Allocation B, Double2 beta, Allocation C) { 2566 validateSide(Side); 2567 validateUplo(Uplo); 2568 if (A.getType().getX() != A.getType().getY()) { 2569 throw new RSRuntimeException("Matrix A is not symmetric"); 2570 } 2571 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 2572 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2573 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2574 } 2575 2576 /** 2577 * SSYRK performs one of the symmetric rank k operations 2578 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2579 * 2580 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 2581 * 2582 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2583 * @param Trans The type of transpose applied to the operation. 2584 * @param alpha The scalar alpha. 2585 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2586 * @param beta The scalar beta. 2587 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2588 */ 2589 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 2590 validateTranspose(Trans); 2591 validateUplo(Uplo); 2592 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 2593 int K = -1; 2594 if (Trans != NO_TRANSPOSE) { 2595 K = A.getType().getY(); 2596 } else { 2597 K = A.getType().getX(); 2598 } 2599 2600 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2601 } 2602 2603 /** 2604 * DSYRK performs one of the symmetric rank k operations 2605 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2606 * 2607 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 2608 * 2609 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2610 * @param Trans The type of transpose applied to the operation. 2611 * @param alpha The scalar alpha. 2612 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2613 * @param beta The scalar beta. 2614 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2615 */ 2616 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 2617 validateTranspose(Trans); 2618 validateUplo(Uplo); 2619 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 2620 int K = -1; 2621 if (Trans != NO_TRANSPOSE) { 2622 K = A.getType().getY(); 2623 } else { 2624 K = A.getType().getX(); 2625 } 2626 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2627 } 2628 2629 /** 2630 * CSYRK performs one of the symmetric rank k operations 2631 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2632 * 2633 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 2634 * 2635 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2636 * @param Trans The type of transpose applied to the operation. 2637 * @param alpha The scalar alpha. 2638 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2639 * @param beta The scalar beta. 2640 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2641 */ 2642 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 2643 validateTranspose(Trans); 2644 validateUplo(Uplo); 2645 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 2646 int K = -1; 2647 if (Trans != NO_TRANSPOSE) { 2648 K = A.getType().getY(); 2649 } else { 2650 K = A.getType().getX(); 2651 } 2652 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2653 C.getID(mRS), 0, 0, 0, 0); 2654 } 2655 2656 /** 2657 * ZSYRK performs one of the symmetric rank k operations 2658 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2659 * 2660 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 2661 * 2662 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2663 * @param Trans The type of transpose applied to the operation. 2664 * @param alpha The scalar alpha. 2665 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2666 * @param beta The scalar beta. 2667 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2668 */ 2669 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 2670 validateTranspose(Trans); 2671 validateUplo(Uplo); 2672 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 2673 int K = -1; 2674 if (Trans != NO_TRANSPOSE) { 2675 K = A.getType().getY(); 2676 } else { 2677 K = A.getType().getX(); 2678 } 2679 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2680 C.getID(mRS), 0, 0, 0, 0); 2681 } 2682 2683 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 2684 validateTranspose(Trans); 2685 if (!A.getType().getElement().isCompatible(e) || 2686 !B.getType().getElement().isCompatible(e) || 2687 !C.getType().getElement().isCompatible(e)) { 2688 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2689 } 2690 int Cdim = -1; 2691 // A is n x k if no transpose, k x n if transpose 2692 // C is n x n 2693 if (Trans == TRANSPOSE) { 2694 // check columns versus C 2695 Cdim = A.getType().getX(); 2696 } else { 2697 // check rows versus C 2698 Cdim = A.getType().getY(); 2699 } 2700 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 2701 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 2702 } 2703 // A dims == B dims 2704 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 2705 throw new RSRuntimeException("Invalid A and B in SYR2K"); 2706 } 2707 } 2708 2709 /** 2710 * SSYR2K performs one of the symmetric rank 2k operations 2711 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2712 * 2713 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 2714 * 2715 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2716 * @param Trans The type of transpose applied to the operation. 2717 * @param alpha The scalar alpha. 2718 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2719 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2720 * @param beta The scalar beta. 2721 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2722 */ 2723 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 2724 validateUplo(Uplo); 2725 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 2726 int K = -1; 2727 if (Trans != NO_TRANSPOSE) { 2728 K = A.getType().getY(); 2729 } else { 2730 K = A.getType().getX(); 2731 } 2732 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2733 } 2734 2735 /** 2736 * DSYR2K performs one of the symmetric rank 2k operations 2737 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2738 * 2739 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 2740 * 2741 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2742 * @param Trans The type of transpose applied to the operation. 2743 * @param alpha The scalar alpha. 2744 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2745 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2746 * @param beta The scalar beta. 2747 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2748 */ 2749 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 2750 validateUplo(Uplo); 2751 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 2752 int K = -1; 2753 if (Trans != NO_TRANSPOSE) { 2754 K = A.getType().getY(); 2755 } else { 2756 K = A.getType().getX(); 2757 } 2758 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2759 } 2760 2761 /** 2762 * CSYR2K performs one of the symmetric rank 2k operations 2763 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2764 * 2765 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 2766 * 2767 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2768 * @param Trans The type of transpose applied to the operation. 2769 * @param alpha The scalar alpha. 2770 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2771 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2772 * @param beta The scalar beta. 2773 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2774 */ 2775 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 2776 validateUplo(Uplo); 2777 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 2778 int K = -1; 2779 if (Trans != NO_TRANSPOSE) { 2780 K = A.getType().getY(); 2781 } else { 2782 K = A.getType().getX(); 2783 } 2784 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2785 } 2786 2787 /** 2788 * ZSYR2K performs one of the symmetric rank 2k operations 2789 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2790 * 2791 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 2792 * 2793 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2794 * @param Trans The type of transpose applied to the operation. 2795 * @param alpha The scalar alpha. 2796 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2797 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2798 * @param beta The scalar beta. 2799 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2800 */ 2801 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 2802 validateUplo(Uplo); 2803 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 2804 int K = -1; 2805 if (Trans != NO_TRANSPOSE) { 2806 K = A.getType().getY(); 2807 } else { 2808 K = A.getType().getX(); 2809 } 2810 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2811 } 2812 2813 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2814 validateSide(Side); 2815 validateTranspose(TransA); 2816 int aM = -1, aN = -1, bM = -1, bN = -1; 2817 if (!A.getType().getElement().isCompatible(e) || 2818 !B.getType().getElement().isCompatible(e)) { 2819 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2820 } 2821 2822 aM = A.getType().getY(); 2823 aN = A.getType().getX(); 2824 if (aM != aN) { 2825 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 2826 } 2827 2828 bM = B.getType().getY(); 2829 bN = B.getType().getX(); 2830 if (Side == LEFT) { 2831 if (aN != bM) { 2832 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2833 } 2834 } else { 2835 if (bN != aM) { 2836 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2837 } 2838 } 2839 } 2840 2841 /** 2842 * STRMM performs one of the matrix-matrix operations 2843 * B := alpha*op(A)*B or B := alpha*B*op(A) 2844 * op(A) is one of op(A) = A or op(A) = A**T 2845 * 2846 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 2847 * 2848 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2849 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2850 * @param TransA The type of transpose applied to matrix A. 2851 * @param Diag Specifies whether or not A is unit triangular. 2852 * @param alpha The scalar alpha. 2853 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2854 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2855 */ 2856 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2857 validateUplo(Uplo); 2858 validateDiag(Diag); 2859 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 2860 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2861 alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); 2862 } 2863 2864 /** 2865 * DTRMM performs one of the matrix-matrix operations 2866 * B := alpha*op(A)*B or B := alpha*B*op(A) 2867 * op(A) is one of op(A) = A or op(A) = A**T 2868 * 2869 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 2870 * 2871 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2872 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2873 * @param TransA The type of transpose applied to matrix A. 2874 * @param Diag Specifies whether or not A is unit triangular. 2875 * @param alpha The scalar alpha. 2876 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2877 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2878 */ 2879 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 2880 validateUplo(Uplo); 2881 validateDiag(Diag); 2882 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 2883 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2884 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2885 } 2886 2887 /** 2888 * CTRMM performs one of the matrix-matrix operations 2889 * B := alpha*op(A)*B or B := alpha*B*op(A) 2890 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2891 * 2892 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 2893 * 2894 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2895 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2896 * @param TransA The type of transpose applied to matrix A. 2897 * @param Diag Specifies whether or not A is unit triangular. 2898 * @param alpha The scalar alpha. 2899 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2900 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2901 */ 2902 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 2903 validateUplo(Uplo); 2904 validateDiag(Diag); 2905 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 2906 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2907 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2908 } 2909 2910 /** 2911 * ZTRMM performs one of the matrix-matrix operations 2912 * B := alpha*op(A)*B or B := alpha*B*op(A) 2913 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2914 * 2915 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 2916 * 2917 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2918 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2919 * @param TransA The type of transpose applied to matrix A. 2920 * @param Diag Specifies whether or not A is unit triangular. 2921 * @param alpha The scalar alpha. 2922 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2923 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2924 */ 2925 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 2926 validateUplo(Uplo); 2927 validateDiag(Diag); 2928 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 2929 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2930 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2931 } 2932 2933 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2934 int adim = -1, bM = -1, bN = -1; 2935 validateSide(Side); 2936 validateTranspose(TransA); 2937 if (!A.getType().getElement().isCompatible(e) || 2938 !B.getType().getElement().isCompatible(e)) { 2939 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2940 } 2941 adim = A.getType().getX(); 2942 if (adim != A.getType().getY()) { 2943 // this may be unnecessary, the restriction could potentially be relaxed 2944 // A needs to contain at least that symmetric matrix but could theoretically be larger 2945 // for now we assume adapters are sufficient, will reevaluate in the future 2946 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 2947 } 2948 bM = B.getType().getY(); 2949 bN = B.getType().getX(); 2950 if (Side == LEFT) { 2951 // A is M*M 2952 if (adim != bM) { 2953 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2954 } 2955 } else { 2956 // A is N*N 2957 if (adim != bN) { 2958 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2959 } 2960 } 2961 } 2962 2963 /** 2964 * STRSM solves one of the matrix equations 2965 * op(A)*X := alpha*B or X*op(A) := alpha*B 2966 * op(A) is one of op(A) = A or op(A) = A**T 2967 * 2968 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 2969 * 2970 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2971 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2972 * @param TransA The type of transpose applied to matrix A. 2973 * @param Diag Specifies whether or not A is unit triangular. 2974 * @param alpha The scalar alpha. 2975 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2976 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2977 */ 2978 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2979 validateUplo(Uplo); 2980 validateDiag(Diag); 2981 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 2982 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2983 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2984 } 2985 2986 /** 2987 * DTRSM solves one of the matrix equations 2988 * op(A)*X := alpha*B or X*op(A) := alpha*B 2989 * op(A) is one of op(A) = A or op(A) = A**T 2990 * 2991 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 2992 * 2993 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2994 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2995 * @param TransA The type of transpose applied to matrix A. 2996 * @param Diag Specifies whether or not A is unit triangular. 2997 * @param alpha The scalar alpha. 2998 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2999 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3000 */ 3001 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3002 validateUplo(Uplo); 3003 validateDiag(Diag); 3004 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3005 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3006 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 3007 } 3008 3009 /** 3010 * CTRSM solves one of the matrix equations 3011 * op(A)*X := alpha*B or X*op(A) := alpha*B 3012 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3013 * 3014 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3015 * 3016 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3017 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3018 * @param TransA The type of transpose applied to matrix A. 3019 * @param Diag Specifies whether or not A is unit triangular. 3020 * @param alpha The scalar alpha. 3021 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3022 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3023 */ 3024 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3025 validateUplo(Uplo); 3026 validateDiag(Diag); 3027 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3028 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3029 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3030 } 3031 3032 /** 3033 * ZTRSM solves one of the matrix equations 3034 * op(A)*X := alpha*B or X*op(A) := alpha*B 3035 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3036 * 3037 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3038 * 3039 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3040 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3041 * @param TransA The type of transpose applied to matrix A. 3042 * @param Diag Specifies whether or not A is unit triangular. 3043 * @param alpha The scalar alpha. 3044 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3045 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3046 */ 3047 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3048 validateUplo(Uplo); 3049 validateDiag(Diag); 3050 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3051 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3052 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3053 } 3054 3055 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3056 validateSide(Side); 3057 3058 if (!A.getType().getElement().isCompatible(e) || 3059 !B.getType().getElement().isCompatible(e) || 3060 !C.getType().getElement().isCompatible(e)) { 3061 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3062 } 3063 3064 // A must be square; can potentially be relaxed similar to TRSM 3065 int adim = A.getType().getX(); 3066 if (adim != A.getType().getY()) { 3067 throw new RSRuntimeException("Called HEMM with non-square A"); 3068 } 3069 if ((Side == LEFT && adim != B.getType().getY()) || 3070 (Side == RIGHT && adim != B.getType().getX())) { 3071 throw new RSRuntimeException("Called HEMM with invalid B"); 3072 } 3073 if (B.getType().getX() != C.getType().getX() || 3074 B.getType().getY() != C.getType().getY()) { 3075 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3076 } 3077 } 3078 3079 /** 3080 * CHEMM performs one of the matrix-matrix operations 3081 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3082 * 3083 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3084 * 3085 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3086 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3087 * @param alpha The scalar alpha. 3088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3089 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3090 * @param beta The scalar beta. 3091 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3092 */ 3093 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3094 validateUplo(Uplo); 3095 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3096 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3097 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3098 } 3099 3100 /** 3101 * ZHEMM performs one of the matrix-matrix operations 3102 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3103 * 3104 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3105 * 3106 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3107 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3108 * @param alpha The scalar alpha. 3109 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3110 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3111 * @param beta The scalar beta. 3112 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3113 */ 3114 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3115 validateUplo(Uplo); 3116 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3117 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3118 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3119 } 3120 3121 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3122 if (!A.getType().getElement().isCompatible(e) || 3123 !C.getType().getElement().isCompatible(e)) { 3124 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3125 } 3126 validateConjTranspose(Trans); 3127 int cdim = C.getType().getX(); 3128 if (cdim != C.getType().getY()) { 3129 throw new RSRuntimeException("Called HERK with non-square C"); 3130 } 3131 if (Trans == NO_TRANSPOSE) { 3132 if (cdim != A.getType().getY()) { 3133 throw new RSRuntimeException("Called HERK with invalid A"); 3134 } 3135 } else { 3136 if (cdim != A.getType().getX()) { 3137 throw new RSRuntimeException("Called HERK with invalid A"); 3138 } 3139 } 3140 } 3141 3142 /** 3143 * CHERK performs one of the hermitian rank k operations 3144 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3145 * 3146 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3147 * 3148 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3149 * @param Trans The type of transpose applied to the operation. 3150 * @param alpha The scalar alpha. 3151 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3152 * @param beta The scalar beta. 3153 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3154 */ 3155 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3156 validateUplo(Uplo); 3157 validateHERK(Element.F32_2(mRS), Trans, A, C); 3158 int k = 0; 3159 if (Trans == CONJ_TRANSPOSE) { 3160 k = A.getType().getY(); 3161 } else { 3162 k = A.getType().getX(); 3163 } 3164 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3165 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3166 } 3167 3168 /** 3169 * ZHERK performs one of the hermitian rank k operations 3170 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3171 * 3172 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 3173 * 3174 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3175 * @param Trans The type of transpose applied to the operation. 3176 * @param alpha The scalar alpha. 3177 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3178 * @param beta The scalar beta. 3179 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3180 */ 3181 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3182 validateUplo(Uplo); 3183 validateHERK(Element.F64_2(mRS), Trans, A, C); 3184 int k = 0; 3185 if (Trans == CONJ_TRANSPOSE) { 3186 k = A.getType().getY(); 3187 } else { 3188 k = A.getType().getX(); 3189 } 3190 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3191 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3192 } 3193 3194 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3195 if (!A.getType().getElement().isCompatible(e) || 3196 !B.getType().getElement().isCompatible(e) || 3197 !C.getType().getElement().isCompatible(e)) { 3198 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3199 } 3200 validateConjTranspose(Trans); 3201 int cdim = C.getType().getX(); 3202 if (cdim != C.getType().getY()) { 3203 throw new RSRuntimeException("Called HER2K with non-square C"); 3204 } 3205 if (Trans == NO_TRANSPOSE) { 3206 if (A.getType().getY() != cdim) { 3207 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3208 } 3209 } else { 3210 if (A.getType().getX() != cdim) { 3211 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3212 } 3213 } 3214 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3215 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 3216 } 3217 } 3218 3219 /** 3220 * CHER2K performs one of the hermitian rank 2k operations 3221 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3222 * 3223 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 3224 * 3225 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3226 * @param Trans The type of transpose applied to the operation. 3227 * @param alpha The scalar alpha. 3228 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3229 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3230 * @param beta The scalar beta. 3231 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3232 */ 3233 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 3234 validateUplo(Uplo); 3235 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 3236 int k = 0; 3237 if (Trans == NO_TRANSPOSE) { 3238 k = A.getType().getX(); 3239 } else { 3240 k = A.getType().getY(); 3241 } 3242 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3243 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3244 } 3245 3246 /** 3247 * ZHER2K performs one of the hermitian rank 2k operations 3248 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3249 * 3250 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 3251 * 3252 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3253 * @param Trans The type of transpose applied to the operation. 3254 * @param alpha The scalar alpha. 3255 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3256 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3257 * @param beta The scalar beta. 3258 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3259 */ 3260 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 3261 validateUplo(Uplo); 3262 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 3263 int k = 0; 3264 if (Trans == NO_TRANSPOSE) { 3265 k = A.getType().getX(); 3266 } else { 3267 k = A.getType().getY(); 3268 } 3269 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3270 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3271 } 3272 3273 3274 /** 3275 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 3276 * Calculations are done in 1.10.21 fixed-point format for the final output, 3277 * just before there's a shift down to drop the fractional parts. The output 3278 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 3279 * gives some headroom to avoid wrapping around on small overflows. 3280 * 3281 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 3282 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 3283 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 3284 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 3285 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 3286 * @param c_offset The offset for all values in matrix C. 3287 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 3288 **/ 3289 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 3290 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 3291 3292 if (a_offset < 0 || a_offset > 255) { 3293 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 3294 } 3295 if (b_offset < 0 || b_offset > 255) { 3296 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 3297 } 3298 int M = -1, N = -1, K = -1; 3299 M = A.getType().getY(); 3300 N = B.getType().getY(); 3301 K = A.getType().getX(); 3302 3303 3304 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult); 3305 3306 } 3307 3308 } 3309