文章目录
说明1:Intel MKL官方参考手册以及官方参考例程(位于安装目录下examples文件夹中)是最好的参考资料,本文程序大多根据官方例程改写,均可正常运行,我用的编译环境是VS2015+Intel MKL库2019;
说明2:由于本人主要是使用Intel MKL库做通信系统仿真,因此文章函数大多是复数单精度浮点形式,实数域函数参考官方手册对参数类型进行相应修改即可,也可以参考以下文章:MKL学习——基本操作C++实现,该文章主要是针对实数域的操作。
1. 随机数生成
1.1 等概率的01分布 viRngUniform
/* 生成信源(等概率的01序列)—— 等概率的伯努利分布 */
#include <stdio.h>
#include <math.h>
#include "mkl.h"
#include "errcheck.inc"
#define SEED 111
#define BRNG VSL_BRNG_MCG31
#define METHOD VSL_RNG_METHOD_UNIFORM_STD
#define N 1000 //随机数的数量
#define NN 100
int main()
{
int r[N];
VSLStreamStatePtr stream;
int i, errcode;
int a = 0, b = 2;
/***** Initialize *****/
errcode = vslNewStream(&stream, BRNG, SEED);
CheckVslError(errcode); // 检测随机数生成过程是否出错
/***** Call RNG *****/
errcode = viRngUniform(METHOD, stream, N, r, a, b);
CheckVslError(errcode);
/***** Printing results *****/
printf("Results (first 100 of 1000):\n");
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("r[%d]=%d\n", i, r[i]);
}
/***** Deinitialize *****/
errcode = vslDeleteStream(&stream);
CheckVslError(errcode);
return 0;
}
1.2 (复)高斯分布 vsRngGaussian
/* 生成独立同分布的准静态瑞利衰落信道(还需要将元素储存到矩阵中) */
#include <stdio.h>
#include <math.h>
#include "mkl.h"
#include "errcheck.inc"
#define SEED 777
#define BRNG VSL_BRNG_MCG31
#define METHOD VSL_RNG_METHOD_GAUSSIAN_ICDF
#define N 1000
#define NN 10
int main()
{
float real[N];
float imag[N];
MKL_Complex8 H[1000];
VSLStreamStatePtr stream;
int i, errcode1,errcode2;
float a = 0.0, sigma = 1.0;
/***** Initialize *****/
vslNewStream(&stream, BRNG, SEED);
/***** Call RNG *****/
errcode1 = vsRngGaussian(METHOD, stream, N, real, a, sigma);
CheckVslError(errcode1);
errcode2 = vsRngGaussian(METHOD, stream, N, imag, a, sigma);
CheckVslError(errcode2);
for (i = 0; i<NN; i++) {
H[i].real = real[i]/(float)sqrt(2);
H[i].imag = imag[i]/(float)sqrt(2);
}
/***** Printing results *****/
printf("Results (first 10 of 1000):\n");
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("real[%d]=%.4f\n", i, real[i]);
}
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("imag[%d]=%.4f\n", i, imag[i]);
}
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("H[%d]=%.4f+(%.4fi)\n", i, H[i].real, H[i].imag);
}
/***** Deinitialize *****/
vslDeleteStream(&stream);
return 0;
}
2. 向量-向量运算
2.1 向量数乘与加法 cblas_caxpby
/* 向量数乘与加法 y = alpha*x+beta*y */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main( )
{
MKL_INT n, incx, incy;
MKL_Complex8 alpha, beta;
MKL_Complex8 *x, *y;
MKL_INT len_x, len_y;
/***************** 参数初始化 *****************/
n = 5;
incx = 1;
incy = 1;
alpha.imag = 0;
alpha.real = 2;
beta.imag = 0;
beta.real = 1;
len_x = 1 + (n - 1)* incx;
len_y = 1 + (n - 1)* incy;
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/******************* 给复数向量x,y赋值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
y[i].real = (float)(i + 2);
y[i].imag = (float)(i + 3);
}
/********** Call CBLAS_CAXPBY subroutine ( C Interface )**********/
cblas_caxpby(n, &alpha, x, incx, &beta, y, incy);
/********************** Printing results *******************/
printf("y = ax + by\n");
for (int i = 0; i < n; i++) {
printf("----------------------------------------\n");
printf("y[%d] = %.0f+%.0fi\n", i, y[i].real, y[i].imag);
}
mkl_free(x);
mkl_free(y);
return 0;
}
2.2 向量内积 cblas_cdotu_sub
/* 向量点乘(内积) res = \sigma i=0 to n (x_i*y_i) */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT n, incx, incy;
MKL_Complex8 *x, *y;
MKL_Complex8 res;
MKL_INT len_x, len_y;
/***************** 参数初始化 *****************/
n = 5;
incx = 1;
incy = 1;
len_x = 1 + (n - 1)*incx;
len_y = 1 + (n - 1)*incy;
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/******************* 给复数向量x,y赋值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
y[i].real = (float)(i + 2);
y[i].imag = (float)(i + 3);
}
/* Call CBLAS_CDOTU_SUB subroutine ( C Interface ) */
cblas_cdotu_sub(n, x, incx, y, incy, &res);
/********************** Printing results *******************/
printf("%.0f+%.0fi\n", res.real, res.imag);
mkl_free(x);
mkl_free(y);
return 0;
}
2.3 向量范数 cblas_scnrm2
/* 向量范数 res = ||x|| */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT n, incx;
float res;
MKL_Complex8 *x;
MKL_INT len_x;
/***************** 参数初始化 *****************/
n = 5;
incx = 1;
len_x = 1 + (n - 1)*(incx);
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
/******************* 给复数向量x赋值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
}
/************* Call CBLAS_SCNRM2 subroutine ( C Interface ) *********/
res = cblas_scnrm2(n, x, incx);
/************************* Print output data ********************/
printf("||x|| = %f\n", res);
mkl_free(x);
return 0;
}
3. 矩阵-向量运算
3.1 矩阵存储方案
/* 以“实数矩阵-向量乘法”为例说明矩阵储存方案 y = alpha*A*x + beta*y
以下代码以行优先 CblasRowMajor 为例 */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, lda, incx, incy;
float alpha, beta;
float *a, *x, *y;
CBLAS_LAYOUT layout = CblasRowMajor; // 可变为CblasColMajor
CBLAS_TRANSPOSE trans = CblasNoTrans;
MKL_INT nx, ny, len_x, len_y;
m = 2;
n = 5;
incx = incy = 1;
alpha = 1;
beta = 0;
a = (float *)mkl_calloc(m*n, sizeof(float), 64);
if (trans == CblasNoTrans) {
nx = n;
ny = m;
}
else {
nx = m;
ny = n;
}
if (layout == CblasRowMajor)
lda = n;
else
lda = m;
len_x = 1 + (nx - 1)*(incx);
len_y = 1 + (ny - 1)*(incy);
x = (float *)mkl_calloc(len_x, sizeof(float), 64);
y = (float *)mkl_calloc(len_y, sizeof(float), 64);
/* Print input data */
printf("矩阵为\n");
for (int i = 0; i < m*n; i++) {
if (i%nx == 0 && i != 0)
printf("\n");
a[i] = (float)i;
printf("%2.0f", a[i]);
}
printf("\n");
printf("向量为\n");
for (int i = 0; i < len_x; i++)
{
x[i] =(float)(i + 1);
printf("%2.0f", x[i]);
printf("\n");
}
/* Call CBLAS_SGEMV subroutine ( C Interface ) */
cblas_sgemv(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
/* Print output data */
printf("矩阵-向量乘法结果\n");
for (int i = 0; i < len_y; i++) {
printf("%2.0f ", y[i]);
printf("\n");
}
mkl_free(a);
mkl_free(x);
mkl_free(y);
return 0;
}
上述过程中,矩阵a由数组 对其赋值,具体储存过程如下:
行优先CblasRowMajor
:初始化时,将数组逐行存入矩阵,可表示如下:
列优先CblasColMajor
:行优先:初始化时,将数组逐列存入矩阵,可表示如下:
需注意,不管是行优先还是列优先,只要矩阵不转置,矩阵维度都不会发生变化。
3.2 矩阵-向量乘法 cblas_cgemv
trans参数控制矩阵A以什么形式参与运算,三个取值含义如下:
CblasNoTrans:A;CblasTrans:A的转置;CblasConjTrans:A的共轭转置
/* 复数矩阵-向量乘法 y = alpha*A*x + beta*y */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, lda, incx, incy;
MKL_Complex8 alpha, beta;
MKL_Complex8 *a, *x, *y;
CBLAS_LAYOUT layout = CblasRowMajor;
CBLAS_TRANSPOSE trans = CblasNoTrans;
MKL_INT nx, ny, len_x, len_y;
/***************** 参数初始化 *****************/
m = 2;
n = 5;
incx = incy = 1;
alpha.real = 1;
alpha.imag = 0;
beta.real = 0;
beta.imag = 0;
a = (MKL_Complex8 *)mkl_calloc(m*n, sizeof(MKL_Complex8), 64);
if (trans == CblasNoTrans) {
nx = n;
ny = m;
} else {
nx = m;
ny = n;
}
if (layout == CblasRowMajor)
lda = n;
else
lda = m;
len_x = 1 + (nx - 1)*(incx);
len_y = 1 + (ny - 1)*(incy);
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/************** 矩阵与向量赋值 *******************/
for (int i = 0; i < m*n; i++) {
a[i].real = (float)(i + 1);
a[i].imag = (float)i;
}
for (int i = 0; i < len_x; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
}
/******* Call CBLAS_CGEMV subroutine ( C Interface )*******/
cblas_cgemv(layout, trans, m, n, &alpha, a, lda, x, incx, &beta, y, incy);
/***************** Print output data *****************/
for (int i = 0; i < len_y; i++) {
printf("%f+%fi\n", y[i].real, y[i].imag);
}
mkl_free(a);
mkl_free(x);
mkl_free(y);
return 0;
}
4. 矩阵-矩阵运算
4.1 矩阵乘法 cblas_cgemm
/* 矩阵乘法 C = alpha A*B + beta*C */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, k;
MKL_INT lda, ldb, ldc;
MKL_Complex8 alpha, beta;
MKL_Complex8 *a, *b, *c;
CBLAS_LAYOUT layout = CblasRowMajor;
CBLAS_TRANSPOSE transA = CblasNoTrans;
CBLAS_TRANSPOSE transB = CblasNoTrans;
MKL_INT ma, na, mb, nb;
/***************** 参数初始化 *****************/
m = 2;
n = 5;
k = 3;
alpha.real = 1;
alpha.imag = 0;
beta.real = beta.imag = 0;
if (transA == CblasNoTrans) {
ma = m;
na = k;
} else {
ma = k;
na = m;
}
if (transB == CblasNoTrans) {
mb = k;
nb = n;
} else {
mb = n;
nb = k;
}
a = (MKL_Complex8 *)mkl_calloc(ma*na, sizeof(MKL_Complex8), 64);
b = (MKL_Complex8 *)mkl_calloc(mb*nb, sizeof(MKL_Complex8), 64);
c = (MKL_Complex8 *)mkl_calloc(ma*nb, sizeof(MKL_Complex8), 64);
/************** 矩阵与向量赋值 *******************/
for (int i = 0; i < ma*na; i++) {
a[i].real = (float)(i + 1);
a[i].imag = (float)i;
}
for (int i = 0; i < mb*nb; i++) {
b[i].real = (float)i;
b[i].imag = (float)(i + 1);
}
if (layout == CblasRowMajor) {
lda = na;
ldb = nb;
ldc = nb;
} else {
lda = ma;
ldb = mb;
ldc = ma;
}
/* Call CGEMM subroutine ( C Interface ) */
cblas_cgemm(layout, transA, transB, m, n, k, &alpha, a, lda, b, ldb,
&beta, c, ldc);
/* Print output data */
for (int i = 0; i < ma*nb; i++) {
printf("%f+%fi\n", c[i].real, c[i].imag);
}
mkl_free(a);
mkl_free(b);
mkl_free(c);
return 0;
}
4.2 矩阵求逆 LAPACKE_cgetrf+LAPACKE_cgetri
/* 矩阵求逆 */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
#define LAYOUT LAPACK_ROW_MAJOR
#define N 3
int main()
{
int layout = LAYOUT;
int m = N;
int n = N;
int info1, info2, i;
int lda = n;
int ipiv[3];
MKL_Complex8 *a;
a = (MKL_Complex8 *)mkl_calloc(m*n, sizeof(MKL_Complex8), 64);
/************* 矩阵赋值 ***************/
for (i=0; i<m*n; i++) {
a[i].real = (float)0;
a[i].imag = (float)0;
}
a[0].real = a[5].real = 1;
a[1].real = a[4].real = 2;
a[2].real = a[3].real = 3;
a[6].imag = 1;
a[7].imag = 3;
a[8].imag = 2;
/* LU分解 + 求逆 */
info1 = LAPACKE_cgetrf(layout, m, n, a, lda, ipiv);
info2 = LAPACKE_cgetri(layout, m, a, lda, ipiv);
/* print results*/
printf("%d\n", info2);
for (i = 0; i<N*N; i++) {
printf("%f+(%f)i ", a[i].real, a[i].imag);
}
printf("\n");
mkl_free(a);
return 0;
}
4.3 矩阵的QR分解 LAPACKE_cgeqrf+LAPACKE_cungqr
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mkl.h"
int main()
{
int nR = 4;
int nT = 3;
int iseed[4] = { 1,2,3,5 };
MKL_Complex8* H = NULL;
H = (MKL_Complex8 *)mkl_calloc((nR + nT)*nT, sizeof(MKL_Complex8), 64);
LAPACKE_clarnv(3, iseed, (nR + nT)*nT, H); // 复高斯分布CN(0,2)
printf("------- Before QR decomposition -------\n");
for (int i = 0; i < (nR + nT)*nT; i++) {
if (i%nT == 0)
printf("\n");
printf("%f+(%f)i ", H[i].real, H[i].imag);
}
printf("\n");
MKL_Complex8 tau[3]; // 注意此处数组大小应随nT变化
int info1 = LAPACKE_cgeqrf(LAPACK_ROW_MAJOR, nR + nT, nT, H, nT, tau); // QR分解
int info2 = LAPACKE_cungqr(LAPACK_ROW_MAJOR, nR + nT, nT, nT, H, nT, tau); // 仅输出Q矩阵的前nT列
printf("------- After QR decomposition -------\n");
for (int i = 0; i < (nR + nT)*nT; i++) {
if (i%nT == 0)
printf("\n");
printf("%f+(%f)i ", H[i].real, H[i].imag);
}
printf("\n");
mkl_free(H);
return 0;
}
5. 常用的子函数模块
5.1 计时函数
double start = dsecnd();
....
double stop = dsecnd();
printf("Elapsed time = %f seconds\n", stop - start);
// 首次调用时初始化会花费一些时间,如果想要更精确的结果,可在正式计时开始前先调用一次
5.2 Print matrix
void print_matrix(MKL_LAYOUT layout, MKL_INT m, MKL_INT n, MKL_Complex8* a, MKL_INT lda) {
MKL_INT i, j;
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
if (layout == MKL_COL_MAJOR) {
printf(" (%6.3f,%6.3f)", a[i + j * lda].real, a[i + j * lda].imag);
} else {
printf(" (%6.3f,%6.3f)", a[j + i * lda].real, a[j + i * lda].imag);
}
}
printf("\n");
}
}
5.3 生成单位矩阵
int n=500; // 矩阵维度
float *IdentityMatrix;
IdentityMatrix = (float *)mkl_calloc(n*n, sizeof(float), 64);
for (int i=0; i<n*n; i++) {
IdentityMatrix[i*(n+1)] = 1.0f;
}
mkl_free(IdentityMatrix);
博主不定期发布『保研/推免、C/C++、5G移动通信、Linux、生活随笔』系列文章,如果觉得本文对你有帮助,可以『点赞+关注』支持一下哦!