MKL

spring·2020년 11월 9일
0

ILP64: 64bit integer type
LP64: 32bit integer type

mkl_avx?.dll: avx지원 dll
mkl_blacs_???.dll: fortran77용 라이브러리
mkl_cdft_core.dll: Cluster version of FFT functions

mkl_mc.dll: Kernel library for Intel® SSSE3 enabled processors
mkl_mc3.dll: Kernel library for Intel® SSE4.2 enabled processors

mkl_rt.dll: Single Dynamic Library (SDL)
mkl_scalapack_??.dll: ScaLAPACK routine library

mkl_vml_??.dll: VM/VS/DF

mkl_sequential.dll: Sequential library
mkl_pgi_thread.dll: OpenMP threading library for the PGI compiler
mkl_tbb_thread.dll: Intel TBB threading library for the Intel compilers
mkl_intel_thread.dll: OpenMP threading library for the Intel compilers

References

https://launchpad.net/ubuntu/+source/intel-mkl
ILP64 vs LP64

mkl_core_dll.lib
mkl_intel_lp64_dll.lib
mkl_intel_thread_dll.lib
=====
여기서 mkl_intel_thread.dll 만 추가하면 Module Not Found가 뜸.
1033폴더를 넣어야 정상적으로 동작함.

MKL 스레드 옵션 선택

lib파일들 중에서 mkl_core_dll.libmkl_intel_lp64_dll.lib를 링크하고, 나머지 하나는 스레드와 관련된 링크파일이다.

  • mkl_sequential_dll.lib <싱글 스레드>
  • mkl_intel_thread_dll.lib <인텔 OpenMP>
  • mkl_tbb_thread_dll.lib <인텔 TBB>
  • mkl_pgi_thread_dll.lib

pgi_thread는 PGI컴파일러 전용이므로 패스하고 멀티스레딩을 사용하려면 intel_thread 또는 tbb_thread를 사용해야 한다.

#include<mkl.h>
#include<random>
#include<vector>
#include<iostream>
#include<vspring.h>


#pragma comment(lib,"mkl_core_dll.lib")
#pragma comment(lib,"mkl_intel_lp64_dll.lib")
//#pragma comment(lib,"mkl_tbb_thread_dll.lib")
//#pragma comment(lib,"mkl_intel_thread_dll.lib")
#pragma comment(lib,"mkl_sequential_dll.lib")
int main() {
	VSLStreamStatePtr stream;
	vslNewStream(&stream, VSL_BRNG_MCG31, 110);
	std::vector<float> a, b, c;
	const int N = 10;
	a.assign(N*N, 0);	// [100]x[100]
	b.assign(N*N, 0);	// [100]x[100]
	c.assign(N*N, 0);	// [100]x[100]
	vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N*N, a.data(), -1.f, 1.f);
	vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N*N, b.data(), -1.f, 1.f);


	cblas_sgemm(CBLAS_LAYOUT::CblasRowMajor, CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans, N, N, N, 1.f, a.data(), N, b.data(), N, 0.f, c.data(), N);
	float matmul_sum=cblas_sasum(N*N, c.data(), 1);
	std::cout << matmul_sum << std::endl;

	cblas_ssbmv(CBLAS_LAYOUT::CblasRowMajor, CBLAS_UPLO::CblasLower, N*N, 0, 1.f, a.data(),1, b.data(), 1, 0.f, c.data(), 1);
	float vecmul_sum = cblas_sasum(N*N, c.data(), 1);
	std::cout << vecmul_sum << std::endl;

	vsMul(N*N, a.data(), b.data(), c.data());
	vecmul_sum = cblas_sasum(N*N, c.data(), 1);
	std::cout << vecmul_sum << std::endl;


	return 0;
}    

Linux에서 MKL

g++ -fopenmp -Iinclude main.cpp  -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -lpthread -liomp5 -ldl -Wl,-rpath,.    

Intel Thread

g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -liomp5 -ldl -fopenmp -Wl,-rpath,. 

GNU Thread(런타임 안됨)

g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_gnu_thread -lpthread -ldl -m64 -fopenmp -Wl,-rpath,. -lmkl_vml_def
    

Sequential

g++ main.cpp -Iinclude -Llib -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -ldl -Wl,-rpath,.    
profile
Researcher & Developer @ NAVER Corp | Designer @ HONGIK Univ.

0개의 댓글