1000*1000的矩阵A、B、C,C=A*B,用C/C++实现 参考:blocking factor,http://home.iae.nl/users/mhx/mm.c 效率可以对比matlab
int m; int i, j, k;
double r; double *A, *B, *C;
m = 1000;
A = new double[m*m]; B = new double[m*m]; C = new double[m*m];
//置初值 for (i=0; i<m*m; i++) { A[i] = 1; B[i] = 1; C[i] = 0; }
int bf; // blocking factor int jj, kk, im; int minj, mink;
bf = 48; // 可以修改
for (jj=0; jj<m; jj+=bf) for (kk=0; kk<m; kk+=bf) for ( i=0; i<m; ++i) { minj = (jj+bf)<m ? (jj+bf):m; for (j=jj; j<minj; ++j) { r = 0; im = i*m; mink = (kk+bf)<m ? (kk+bf):m; for (k=kk; k<mink; ++k) { r += A[im+k]*B[k*m+j]; } C[im+j] += r; } }
delete[] A; delete[] B; delete[] C;
这个程序比matlab要慢1倍,我曾到网上搜索到一个fortran程序(分块dgemm), 其速度比matlab慢0.5倍左右,不知道有谁能写一个能与matlab媲美的程序?