#include <itpp/itbase.h>

using namespace itpp;

//These lines are needed for use of cout and endl
using std::cout;
using std::endl;

mat matmul ( mat &A, mat &B ) {
	mat C ( A.rows(),B.cols() );
	int i,j,k;
	double sum;

	for ( i=0;i<A.rows();i++ ) {
		for ( j=0;j<A.cols();j++ ) {
			sum = 0.0;
			for ( k=0;k<A.cols();k++ ) {
				sum+=A._elem ( i,k ) *B._elem ( k,j );
			}
			C ( i,j ) = sum;
		}
	}
	return C;
}

void matmul2 ( int n,  double *A, double *B, double *C ) {
	int i,j,k;
	double sum;

	for ( i=0;i<n;i++ ) {
		for ( j=0;j<n;j++ ) {
			sum = 0.0;
			for ( k=0;k<n;k++ ) {
				sum+=A [ i*n+k ] * B [ k*n+j ];
			}
			C[ i*n+j] = sum;
		}
	}
//	return C;
}

int main() {
	Real_Timer tt;
	vec exec_times ( 4 );
	vec exec_times_b ( 4 );
	vec exec_times_c ( 4 );

	mat A;
	mat B;
	mat C;

	vec vn="5 50 200 500";
	int n;

	for ( int i=0;i<vn.length();i++ ) {
		n = vn ( i );
		A = randu ( n,n );
		B = randu ( n,n );

		tt.tic();
		for ( int ii=0;ii<10;ii++ ) {C = matmul ( A,B );}
		exec_times ( i ) =tt.toc();

		tt.tic();
		for ( int ii=0;ii<10;ii++ ) {C = A*B;}
		exec_times_b ( i ) =tt.toc();

		C = zeros(n,n);
		tt.tic();
		for ( int ii=0;ii<10;ii++ ) { matmul2(n,A._data(),B._data(),C._data());}
		exec_times_c ( i ) =tt.toc();
	}
	cout << exec_times <<endl;
	cout << exec_times_b <<endl;
	cout << exec_times_c <<endl;

	it_file itf ( "blas_test.it" );
	itf << Name ( "exec_times" ) <<exec_times;
	itf << Name ( "exec_times_b" ) <<exec_times_b;
	itf << Name ( "exec_times_c" ) <<exec_times_c;

	return 0;
}
