1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud (a] inria.fr> 5 // Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1 (a] gmail.com> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_CORE_H 12 #define EIGEN_CORE_H 13 14 // first thing Eigen does: stop the compiler from committing suicide 15 #include "src/Core/util/DisableStupidWarnings.h" 16 17 // Handle NVCC/CUDA/SYCL 18 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) 19 // Do not try asserts on CUDA and SYCL! 20 #ifndef EIGEN_NO_DEBUG 21 #define EIGEN_NO_DEBUG 22 #endif 23 24 #ifdef EIGEN_INTERNAL_DEBUGGING 25 #undef EIGEN_INTERNAL_DEBUGGING 26 #endif 27 28 #ifdef EIGEN_EXCEPTIONS 29 #undef EIGEN_EXCEPTIONS 30 #endif 31 32 // All functions callable from CUDA code must be qualified with __device__ 33 #ifdef __CUDACC__ 34 // Do not try to vectorize on CUDA and SYCL! 35 #ifndef EIGEN_DONT_VECTORIZE 36 #define EIGEN_DONT_VECTORIZE 37 #endif 38 39 #define EIGEN_DEVICE_FUNC __host__ __device__ 40 // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro 41 // works properly on the device side 42 #include <math_functions.hpp> 43 #else 44 #define EIGEN_DEVICE_FUNC 45 #endif 46 47 #else 48 #define EIGEN_DEVICE_FUNC 49 50 #endif 51 52 // When compiling CUDA device code with NVCC, pull in math functions from the 53 // global namespace. In host mode, and when device doee with clang, use the 54 // std versions. 55 #if defined(__CUDA_ARCH__) && defined(__NVCC__) 56 #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; 57 #else 58 #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; 59 #endif 60 61 #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) 62 #define EIGEN_EXCEPTIONS 63 #endif 64 65 #ifdef EIGEN_EXCEPTIONS 66 #include <new> 67 #endif 68 69 // then include this file where all our macros are defined. It's really important to do it first because 70 // it's where we do all the alignment settings (platform detection and honoring the user's will if he 71 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. 72 #include "src/Core/util/Macros.h" 73 74 // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) 75 // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. 76 #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) 77 #pragma GCC optimize ("-fno-ipa-cp-clone") 78 #endif 79 80 #include <complex> 81 82 // this include file manages BLAS and MKL related macros 83 // and inclusion of their respective header files 84 #include "src/Core/util/MKL_support.h" 85 86 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into 87 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks 88 #if EIGEN_MAX_ALIGN_BYTES==0 89 #ifndef EIGEN_DONT_VECTORIZE 90 #define EIGEN_DONT_VECTORIZE 91 #endif 92 #endif 93 94 #if EIGEN_COMP_MSVC 95 #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled 96 #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later 97 // Remember that usage of defined() in a #define is undefined by the standard. 98 // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. 99 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 100 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER 101 #endif 102 #endif 103 #else 104 // Remember that usage of defined() in a #define is undefined by the standard 105 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) 106 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC 107 #endif 108 #endif 109 110 #ifndef EIGEN_DONT_VECTORIZE 111 112 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) 113 114 // Defines symbols for compile-time detection of which instructions are 115 // used. 116 // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used 117 #define EIGEN_VECTORIZE 118 #define EIGEN_VECTORIZE_SSE 119 #define EIGEN_VECTORIZE_SSE2 120 121 // Detect sse3/ssse3/sse4: 122 // gcc and icc defines __SSE3__, ... 123 // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you 124 // want to force the use of those instructions with msvc. 125 #ifdef __SSE3__ 126 #define EIGEN_VECTORIZE_SSE3 127 #endif 128 #ifdef __SSSE3__ 129 #define EIGEN_VECTORIZE_SSSE3 130 #endif 131 #ifdef __SSE4_1__ 132 #define EIGEN_VECTORIZE_SSE4_1 133 #endif 134 #ifdef __SSE4_2__ 135 #define EIGEN_VECTORIZE_SSE4_2 136 #endif 137 #ifdef __AVX__ 138 #define EIGEN_VECTORIZE_AVX 139 #define EIGEN_VECTORIZE_SSE3 140 #define EIGEN_VECTORIZE_SSSE3 141 #define EIGEN_VECTORIZE_SSE4_1 142 #define EIGEN_VECTORIZE_SSE4_2 143 #endif 144 #ifdef __AVX2__ 145 #define EIGEN_VECTORIZE_AVX2 146 #endif 147 #ifdef __FMA__ 148 #define EIGEN_VECTORIZE_FMA 149 #endif 150 #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) 151 #define EIGEN_VECTORIZE_AVX512 152 #define EIGEN_VECTORIZE_AVX2 153 #define EIGEN_VECTORIZE_AVX 154 #define EIGEN_VECTORIZE_FMA 155 #ifdef __AVX512DQ__ 156 #define EIGEN_VECTORIZE_AVX512DQ 157 #endif 158 #endif 159 160 // include files 161 162 // This extern "C" works around a MINGW-w64 compilation issue 163 // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 164 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). 165 // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations 166 // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; 167 // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. 168 // notice that since these are C headers, the extern "C" is theoretically needed anyways. 169 extern "C" { 170 // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. 171 // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: 172 #if EIGEN_COMP_ICC >= 1110 173 #include <immintrin.h> 174 #else 175 #include <mmintrin.h> 176 #include <emmintrin.h> 177 #include <xmmintrin.h> 178 #ifdef EIGEN_VECTORIZE_SSE3 179 #include <pmmintrin.h> 180 #endif 181 #ifdef EIGEN_VECTORIZE_SSSE3 182 #include <tmmintrin.h> 183 #endif 184 #ifdef EIGEN_VECTORIZE_SSE4_1 185 #include <smmintrin.h> 186 #endif 187 #ifdef EIGEN_VECTORIZE_SSE4_2 188 #include <nmmintrin.h> 189 #endif 190 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) 191 #include <immintrin.h> 192 #endif 193 #endif 194 } // end extern "C" 195 #elif defined __VSX__ 196 #define EIGEN_VECTORIZE 197 #define EIGEN_VECTORIZE_VSX 198 #include <altivec.h> 199 // We need to #undef all these ugly tokens defined in <altivec.h> 200 // => use __vector instead of vector 201 #undef bool 202 #undef vector 203 #undef pixel 204 #elif defined __ALTIVEC__ 205 #define EIGEN_VECTORIZE 206 #define EIGEN_VECTORIZE_ALTIVEC 207 #include <altivec.h> 208 // We need to #undef all these ugly tokens defined in <altivec.h> 209 // => use __vector instead of vector 210 #undef bool 211 #undef vector 212 #undef pixel 213 #elif (defined __ARM_NEON) || (defined __ARM_NEON__) 214 #define EIGEN_VECTORIZE 215 #define EIGEN_VECTORIZE_NEON 216 #include <arm_neon.h> 217 #elif (defined __s390x__ && defined __VEC__) 218 #define EIGEN_VECTORIZE 219 #define EIGEN_VECTORIZE_ZVECTOR 220 #include <vecintrin.h> 221 #endif 222 #endif 223 224 #if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) 225 // We can use the optimized fp16 to float and float to fp16 conversion routines 226 #define EIGEN_HAS_FP16_C 227 #endif 228 229 #if defined __CUDACC__ 230 #define EIGEN_VECTORIZE_CUDA 231 #include <vector_types.h> 232 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 233 #define EIGEN_HAS_CUDA_FP16 234 #endif 235 #endif 236 237 #if defined EIGEN_HAS_CUDA_FP16 238 #include <host_defines.h> 239 #include <cuda_fp16.h> 240 #endif 241 242 #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) 243 #define EIGEN_HAS_OPENMP 244 #endif 245 246 #ifdef EIGEN_HAS_OPENMP 247 #include <omp.h> 248 #endif 249 250 // MSVC for windows mobile does not have the errno.h file 251 #if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM 252 #define EIGEN_HAS_ERRNO 253 #endif 254 255 #ifdef EIGEN_HAS_ERRNO 256 #include <cerrno> 257 #endif 258 #include <cstddef> 259 #include <cstdlib> 260 #include <cmath> 261 #include <cassert> 262 #include <functional> 263 #include <iosfwd> 264 #include <cstring> 265 #include <string> 266 #include <limits> 267 #include <climits> // for CHAR_BIT 268 // for min/max: 269 #include <algorithm> 270 271 // for std::is_nothrow_move_assignable 272 #ifdef EIGEN_INCLUDE_TYPE_TRAITS 273 #include <type_traits> 274 #endif 275 276 // for outputting debug info 277 #ifdef EIGEN_DEBUG_ASSIGN 278 #include <iostream> 279 #endif 280 281 // required for __cpuid, needs to be included after cmath 282 #if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE 283 #include <intrin.h> 284 #endif 285 286 /** \brief Namespace containing all symbols from the %Eigen library. */ 287 namespace Eigen { 288 289 inline static const char *SimdInstructionSetsInUse(void) { 290 #if defined(EIGEN_VECTORIZE_AVX512) 291 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 292 #elif defined(EIGEN_VECTORIZE_AVX) 293 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 294 #elif defined(EIGEN_VECTORIZE_SSE4_2) 295 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 296 #elif defined(EIGEN_VECTORIZE_SSE4_1) 297 return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; 298 #elif defined(EIGEN_VECTORIZE_SSSE3) 299 return "SSE, SSE2, SSE3, SSSE3"; 300 #elif defined(EIGEN_VECTORIZE_SSE3) 301 return "SSE, SSE2, SSE3"; 302 #elif defined(EIGEN_VECTORIZE_SSE2) 303 return "SSE, SSE2"; 304 #elif defined(EIGEN_VECTORIZE_ALTIVEC) 305 return "AltiVec"; 306 #elif defined(EIGEN_VECTORIZE_VSX) 307 return "VSX"; 308 #elif defined(EIGEN_VECTORIZE_NEON) 309 return "ARM NEON"; 310 #elif defined(EIGEN_VECTORIZE_ZVECTOR) 311 return "S390X ZVECTOR"; 312 #else 313 return "None"; 314 #endif 315 } 316 317 } // end namespace Eigen 318 319 #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT 320 // This will generate an error message: 321 #error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information 322 #endif 323 324 namespace Eigen { 325 326 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to 327 // ensure QNX/QCC support 328 using std::size_t; 329 // gcc 4.6.0 wants std:: for ptrdiff_t 330 using std::ptrdiff_t; 331 332 } 333 334 /** \defgroup Core_Module Core module 335 * This is the main module of Eigen providing dense matrix and vector support 336 * (both fixed and dynamic size) with all the features corresponding to a BLAS library 337 * and much more... 338 * 339 * \code 340 * #include <Eigen/Core> 341 * \endcode 342 */ 343 344 #include "src/Core/util/Constants.h" 345 #include "src/Core/util/Meta.h" 346 #include "src/Core/util/ForwardDeclarations.h" 347 #include "src/Core/util/StaticAssert.h" 348 #include "src/Core/util/XprHelper.h" 349 #include "src/Core/util/Memory.h" 350 351 #include "src/Core/NumTraits.h" 352 #include "src/Core/MathFunctions.h" 353 #include "src/Core/GenericPacketMath.h" 354 #include "src/Core/MathFunctionsImpl.h" 355 356 #if defined EIGEN_VECTORIZE_AVX512 357 #include "src/Core/arch/SSE/PacketMath.h" 358 #include "src/Core/arch/AVX/PacketMath.h" 359 #include "src/Core/arch/AVX512/PacketMath.h" 360 #include "src/Core/arch/AVX512/MathFunctions.h" 361 #elif defined EIGEN_VECTORIZE_AVX 362 // Use AVX for floats and doubles, SSE for integers 363 #include "src/Core/arch/SSE/PacketMath.h" 364 #include "src/Core/arch/SSE/Complex.h" 365 #include "src/Core/arch/SSE/MathFunctions.h" 366 #include "src/Core/arch/AVX/PacketMath.h" 367 #include "src/Core/arch/AVX/MathFunctions.h" 368 #include "src/Core/arch/AVX/Complex.h" 369 #include "src/Core/arch/AVX/TypeCasting.h" 370 #elif defined EIGEN_VECTORIZE_SSE 371 #include "src/Core/arch/SSE/PacketMath.h" 372 #include "src/Core/arch/SSE/MathFunctions.h" 373 #include "src/Core/arch/SSE/Complex.h" 374 #include "src/Core/arch/SSE/TypeCasting.h" 375 #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) 376 #include "src/Core/arch/AltiVec/PacketMath.h" 377 #include "src/Core/arch/AltiVec/MathFunctions.h" 378 #include "src/Core/arch/AltiVec/Complex.h" 379 #elif defined EIGEN_VECTORIZE_NEON 380 #include "src/Core/arch/NEON/PacketMath.h" 381 #include "src/Core/arch/NEON/MathFunctions.h" 382 #include "src/Core/arch/NEON/Complex.h" 383 #elif defined EIGEN_VECTORIZE_ZVECTOR 384 #include "src/Core/arch/ZVector/PacketMath.h" 385 #include "src/Core/arch/ZVector/MathFunctions.h" 386 #include "src/Core/arch/ZVector/Complex.h" 387 #endif 388 389 // Half float support 390 #include "src/Core/arch/CUDA/Half.h" 391 #include "src/Core/arch/CUDA/PacketMathHalf.h" 392 #include "src/Core/arch/CUDA/TypeCasting.h" 393 394 #if defined EIGEN_VECTORIZE_CUDA 395 #include "src/Core/arch/CUDA/PacketMath.h" 396 #include "src/Core/arch/CUDA/MathFunctions.h" 397 #endif 398 399 #include "src/Core/arch/Default/Settings.h" 400 401 #include "src/Core/functors/TernaryFunctors.h" 402 #include "src/Core/functors/BinaryFunctors.h" 403 #include "src/Core/functors/UnaryFunctors.h" 404 #include "src/Core/functors/NullaryFunctors.h" 405 #include "src/Core/functors/StlFunctors.h" 406 #include "src/Core/functors/AssignmentFunctors.h" 407 408 // Specialized functors to enable the processing of complex numbers 409 // on CUDA devices 410 #include "src/Core/arch/CUDA/Complex.h" 411 412 #include "src/Core/IO.h" 413 #include "src/Core/DenseCoeffsBase.h" 414 #include "src/Core/DenseBase.h" 415 #include "src/Core/MatrixBase.h" 416 #include "src/Core/EigenBase.h" 417 418 #include "src/Core/Product.h" 419 #include "src/Core/CoreEvaluators.h" 420 #include "src/Core/AssignEvaluator.h" 421 422 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 423 // at least confirmed with Doxygen 1.5.5 and 1.5.6 424 #include "src/Core/Assign.h" 425 #endif 426 427 #include "src/Core/ArrayBase.h" 428 #include "src/Core/util/BlasUtil.h" 429 #include "src/Core/DenseStorage.h" 430 #include "src/Core/NestByValue.h" 431 432 // #include "src/Core/ForceAlignedAccess.h" 433 434 #include "src/Core/ReturnByValue.h" 435 #include "src/Core/NoAlias.h" 436 #include "src/Core/PlainObjectBase.h" 437 #include "src/Core/Matrix.h" 438 #include "src/Core/Array.h" 439 #include "src/Core/CwiseTernaryOp.h" 440 #include "src/Core/CwiseBinaryOp.h" 441 #include "src/Core/CwiseUnaryOp.h" 442 #include "src/Core/CwiseNullaryOp.h" 443 #include "src/Core/CwiseUnaryView.h" 444 #include "src/Core/SelfCwiseBinaryOp.h" 445 #include "src/Core/Dot.h" 446 #include "src/Core/StableNorm.h" 447 #include "src/Core/Stride.h" 448 #include "src/Core/MapBase.h" 449 #include "src/Core/Map.h" 450 #include "src/Core/Ref.h" 451 #include "src/Core/Block.h" 452 #include "src/Core/VectorBlock.h" 453 #include "src/Core/Transpose.h" 454 #include "src/Core/DiagonalMatrix.h" 455 #include "src/Core/Diagonal.h" 456 #include "src/Core/DiagonalProduct.h" 457 #include "src/Core/Redux.h" 458 #include "src/Core/Visitor.h" 459 #include "src/Core/Fuzzy.h" 460 #include "src/Core/Swap.h" 461 #include "src/Core/CommaInitializer.h" 462 #include "src/Core/GeneralProduct.h" 463 #include "src/Core/Solve.h" 464 #include "src/Core/Inverse.h" 465 #include "src/Core/SolverBase.h" 466 #include "src/Core/PermutationMatrix.h" 467 #include "src/Core/Transpositions.h" 468 #include "src/Core/TriangularMatrix.h" 469 #include "src/Core/SelfAdjointView.h" 470 #include "src/Core/products/GeneralBlockPanelKernel.h" 471 #include "src/Core/products/Parallelizer.h" 472 #include "src/Core/ProductEvaluators.h" 473 #include "src/Core/products/GeneralMatrixVector.h" 474 #include "src/Core/products/GeneralMatrixMatrix.h" 475 #include "src/Core/SolveTriangular.h" 476 #include "src/Core/products/GeneralMatrixMatrixTriangular.h" 477 #include "src/Core/products/SelfadjointMatrixVector.h" 478 #include "src/Core/products/SelfadjointMatrixMatrix.h" 479 #include "src/Core/products/SelfadjointProduct.h" 480 #include "src/Core/products/SelfadjointRank2Update.h" 481 #include "src/Core/products/TriangularMatrixVector.h" 482 #include "src/Core/products/TriangularMatrixMatrix.h" 483 #include "src/Core/products/TriangularSolverMatrix.h" 484 #include "src/Core/products/TriangularSolverVector.h" 485 #include "src/Core/BandMatrix.h" 486 #include "src/Core/CoreIterators.h" 487 #include "src/Core/ConditionEstimator.h" 488 489 #include "src/Core/BooleanRedux.h" 490 #include "src/Core/Select.h" 491 #include "src/Core/VectorwiseOp.h" 492 #include "src/Core/Random.h" 493 #include "src/Core/Replicate.h" 494 #include "src/Core/Reverse.h" 495 #include "src/Core/ArrayWrapper.h" 496 497 #ifdef EIGEN_USE_BLAS 498 #include "src/Core/products/GeneralMatrixMatrix_BLAS.h" 499 #include "src/Core/products/GeneralMatrixVector_BLAS.h" 500 #include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" 501 #include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" 502 #include "src/Core/products/SelfadjointMatrixVector_BLAS.h" 503 #include "src/Core/products/TriangularMatrixMatrix_BLAS.h" 504 #include "src/Core/products/TriangularMatrixVector_BLAS.h" 505 #include "src/Core/products/TriangularSolverMatrix_BLAS.h" 506 #endif // EIGEN_USE_BLAS 507 508 #ifdef EIGEN_USE_MKL_VML 509 #include "src/Core/Assign_MKL.h" 510 #endif 511 512 #include "src/Core/GlobalFunctions.h" 513 514 #include "src/Core/util/ReenableStupidWarnings.h" 515 516 #endif // EIGEN_CORE_H 517