ILP64: 64bit integer type
LP64: 32bit integer type
mkl_avx?.dll
: avx지원 dll
mkl_blacs_???.dll
: fortran77용 라이브러리
mkl_cdft_core.dll
: Cluster version of FFT functions
mkl_mc.dll
: Kernel library for Intel® SSSE3 enabled processors
mkl_mc3.dll
: Kernel library for Intel® SSE4.2 enabled processors
mkl_rt.dll
: Single Dynamic Library (SDL)
mkl_scalapack_??.dll
: ScaLAPACK routine library
mkl_vml_??.dll
: VM/VS/DF
mkl_sequential.dll
: Sequential library
mkl_pgi_thread.dll
: OpenMP threading library for the PGI compiler
mkl_tbb_thread.dll
: Intel TBB threading library for the Intel compilers
mkl_intel_thread.dll
: OpenMP threading library for the Intel compilers
https://launchpad.net/ubuntu/+source/intel-mkl
ILP64 vs LP64
mkl_core_dll.lib
mkl_intel_lp64_dll.lib
mkl_intel_thread_dll.lib
=====
여기서 mkl_intel_thread.dll 만 추가하면 Module Not Found가 뜸.
1033폴더를 넣어야 정상적으로 동작함.
lib파일들 중에서 mkl_core_dll.lib
와 mkl_intel_lp64_dll.lib
를 링크하고, 나머지 하나는 스레드와 관련된 링크파일이다.
pgi_thread는 PGI컴파일러 전용이므로 패스하고 멀티스레딩을 사용하려면 intel_thread 또는 tbb_thread를 사용해야 한다.
#include<mkl.h>
#include<random>
#include<vector>
#include<iostream>
#include<vspring.h>
#pragma comment(lib,"mkl_core_dll.lib")
#pragma comment(lib,"mkl_intel_lp64_dll.lib")
//#pragma comment(lib,"mkl_tbb_thread_dll.lib")
//#pragma comment(lib,"mkl_intel_thread_dll.lib")
#pragma comment(lib,"mkl_sequential_dll.lib")
int main() {
VSLStreamStatePtr stream;
vslNewStream(&stream, VSL_BRNG_MCG31, 110);
std::vector<float> a, b, c;
const int N = 10;
a.assign(N*N, 0); // [100]x[100]
b.assign(N*N, 0); // [100]x[100]
c.assign(N*N, 0); // [100]x[100]
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N*N, a.data(), -1.f, 1.f);
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N*N, b.data(), -1.f, 1.f);
cblas_sgemm(CBLAS_LAYOUT::CblasRowMajor, CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans, N, N, N, 1.f, a.data(), N, b.data(), N, 0.f, c.data(), N);
float matmul_sum=cblas_sasum(N*N, c.data(), 1);
std::cout << matmul_sum << std::endl;
cblas_ssbmv(CBLAS_LAYOUT::CblasRowMajor, CBLAS_UPLO::CblasLower, N*N, 0, 1.f, a.data(),1, b.data(), 1, 0.f, c.data(), 1);
float vecmul_sum = cblas_sasum(N*N, c.data(), 1);
std::cout << vecmul_sum << std::endl;
vsMul(N*N, a.data(), b.data(), c.data());
vecmul_sum = cblas_sasum(N*N, c.data(), 1);
std::cout << vecmul_sum << std::endl;
return 0;
}
g++ -fopenmp -Iinclude main.cpp -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -lpthread -liomp5 -ldl -Wl,-rpath,.
g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -liomp5 -ldl -fopenmp -Wl,-rpath,.
g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_gnu_thread -lpthread -ldl -m64 -fopenmp -Wl,-rpath,. -lmkl_vml_def
g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -ldl -Wl,-rpath,.