高性能矩阵乘代码编写问题
我目前在做BLAS数学库中dgemm函数的c语言实现,虽然目前有很多高性能的数学库开源代码,但大部分都是用汇编代码和fortran代码实现。我在用c语言实现的时候,在Intel平台上测试性能很低,只达到理论峰值的三分之一,希望在这方面有研究的大侠高手多多指教,比较急切,谢谢各位!!
#include<iostream> #include<climits> #define MAX 1024 using namespace std; class MatrixChain{ private: int *p; int **m,**s; const int size; protected: public: MatrixChain(int n):size(n) { p=new int[n+1]; for( int i=0;i<n;i++) { cin>>p[i]; } m=new int*[n+1]; s=new int*[n+1]; for( int i=0;i<n+1;i++) { m[i]=new int[n+1]; s[i]=new int[n+1]; } } ~MatrixChain(){ delete[] p; for(int i=0;i<size+1;i++) { delete[] m[i]; delete[] s[i]; } delete [] m; delete [] s; } void Matrix_Chain_Order() { int n=size-1; for( int i;i <=n;i++) { m[i][i]=0; } for( int l=2;l<=n;l++) { for( int i=1; i<=n-l+1;i++) { int j= i+l-1; m[i][j]=INT_MAX; for(int k= i; k<=j-1;k++) { int q=m[i][k]+m[k+1][j]+p[i-1]*p[k]*p[j]; if(q<m[i][j]) { m[i][j]=q; s[i][j]=k; } } } } } void Print_Optimal_Parens( int i, int j) { if( i==j) { cout<<"A"<<i; return ; } else{ cout<<"("; Print_Optimal_Parens(i,s[i][j]); Print_Optimal_Parens(s[i][j]+1,j); cout<<")"; } } }; int main() { int s; while(cin>>s && !cin.eof()) { if(s==0) break; MatrixChain matrix(s); matrix.Matrix_Chain_Order(); matrix.Print_Optimal_Parens(1,s-1); cout<<endl; } return 0; }