SolverGPU_AGM Class Reference

#include <solvergpu_agm.h>

Inheritance diagram for SolverGPU_AGM:

Collaboration diagram for SolverGPU_AGM:

Public Member Functions
	SolverGPU_AGM (double tol, unsigned nIt)
virtual	~SolverGPU_AGM ()
virtual void	solve (const SparseMatrix< double > &M, VecDouble &sol, const VecDouble &rhs)
virtual void	solveAgain (const SparseMatrix< double > &M, VecDouble &sol, const VecDouble &rhs)
void	solve (const SparseMatrix< double > &A, VecDouble &sol, const VecDouble &vRHS, double tol, unsigned nIt)
void	gpu_agmSchur (std::vector< int > &Acnt, std::vector< int > &Acol, std::vector< double > &Aele, std::vector< int > &Bcnt, std::vector< int > &Bcol, std::vector< double > &Bele, std::vector< int > &tcnt, std::vector< int > &tcol, std::vector< double > &tele, std::vector< double > &_f, std::vector< double > &_g, std::vector< double > &_u, std::vector< double > &_p)
Protected Attributes
double	m_tol
unsigned	m_nIt
Private Member Functions
void	convertSparseMatrix (const SparseMatrix< double > &M, std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele)
void	gpu_agm (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt)
void	add_diagonal (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, const VecDouble &addElem)
void	gpu_agmII (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele_agm, std::vector< double > &ele_cg, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt)
void	gpu_agm_compressible (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt)

Detailed Description

SolverGPU_AGM

Definition at line 10 of file solvergpu_agm.h.

Constructor & Destructor Documentation

SolverGPU_AGM::SolverGPU_AGM	(	double	tol,
		unsigned	nIt
	)

Definition at line 31 of file solvergpu_agm.cpp.

00032 :m_tol(tol),m_nIt(nIt)
00033 {
00034 
00035 }

virtual SolverGPU_AGM::~SolverGPU_AGM ( ) [inline, virtual]

Definition at line 29 of file solvergpu_agm.h.

00029 {}

Member Function Documentation

void SolverGPU_AGM::add_diagonal	(	std::vector< int > &	cnt,
		std::vector< int > &	col,
		std::vector< double > &	ele,
		const VecDouble &	addElem
	)			`[private]`

Definition at line 374 of file solvergpu_agm.cpp.

00375 {
00376   assert(cnt.size() == addElem.size());
00377   unsigned indexFirst=0;
00378   unsigned indexSent;
00379   unsigned j;
00380   for (unsigned i=0;i<cnt.size();i++)
00381   {
00382     indexSent=indexFirst+cnt[i];
00383     for (j=indexFirst;j<indexSent;j++)
00384     {
00385       if (col[j] == i)
00386       {
00387         ele[j]+=addElem(j);
00388         break;
00389       }
00390     }
00391     assert(j < indexSent);
00392     indexFirst=indexSent;
00393   }
00394 }

void SolverGPU_AGM::convertSparseMatrix	(	const SparseMatrix< double > &	M,
		std::vector< int > &	cnt,
		std::vector< int > &	col,
		std::vector< double > &	ele
	)			`[private]`

Definition at line 513 of file solvergpu_agm.cpp.

00514 {
00515   vElem.resize(A.n_nonzero_elements());
00516   vCols.resize(A.n_nonzero_elements());
00517   vCnt.resize(A.m());
00518   
00519 
00520   const SparsityPattern &pattern = A.get_sparsity_pattern();
00521   for (unsigned i=0;i<vCnt.size();i++)
00522   {
00523     vCnt[i] = pattern.row_length(i); 
00524   }
00525   
00526   std::vector<double>::iterator itElem = vElem.begin();
00527   std::vector<int>::iterator itCols = vCols.begin();
00528   for (SparseMatrix<double>::const_iterator it  = A.begin();it!=A.end();it++,itElem++,itCols++)
00529   {
00530     *itElem = it->value();
00531     *itCols = it->column();
00532   }
00533 
00534   
00535   if (pattern.optimize_diagonal())
00536   {
00537     itElem = vElem.begin();
00538     itCols = vCols.begin();
00539     unsigned nRows = A.m();
00540     //for each row do
00541     for (unsigned i=0;i<nRows;i++)
00542     {
00543       //number of non zero entries in row i excluding the diagonal
00544       int nNonDiagColsPerRow = pattern.row_length(i) - 1; 
00545       assert(nNonDiagColsPerRow >= 0);
00546       std::vector<double>::iterator itElemDiag = itElem++;
00547       std::vector<int>::iterator itColDiag = itCols++;
00548       for (int j=0;j<nNonDiagColsPerRow;j++)
00549       {
00550         if (*itColDiag > *itCols)
00551         {
00552           std::swap(*itColDiag++,*itCols++);
00553           std::swap(*itElemDiag++,*itElem++);
00554         }
00555         else
00556         {
00557           itCols++;
00558           itElem++;
00559         }
00560       }
00561     }
00562   }
00563 
00564 }

void SolverGPU_AGM::gpu_agm	(	std::vector< int > &	cnt,
		std::vector< int > &	col,
		std::vector< double > &	ele,
		std::vector< double > &	_b,
		std::vector< double > &	_x,
		double	tol,
		unsigned	nIt
	)			`[private]`

This method call the solver developed by Manfred. This method should call a C routine from the Manfred`s code, but for while it just print the vectors defining the sparse matrix

Parameters:

	vCount	Array of ints containing the column number for each data in the vElem vector.
	vCols	Array of ints containing the number of nonzero elements for each row.
	vElem	Array of doubles containing the values of the nonzero entries.

Returns:

Definition at line 75 of file solvergpu_agm.cpp.

00076 {
00077   /*
00078   assert(col.size() == ele.size());
00079 
00080   std::vector<double>::iterator  elemIt = ele.begin();
00081   std::vector<int>::iterator colIt = col.begin(); 
00082   
00083   for(unsigned i = 0; i< cnt.size();i++)
00084   {
00085     for(int j=0;j<cnt[i];j++)
00086     {
00087       printf("(%d,%d,) %g\n",i,*colIt++,*elemIt++);
00088     }
00089   }
00090   return;
00091   */
00092 
00093   
00094   
00095   //network::init(argc, argv);
00096   double gta, gtb, gtc, ta, tb;
00097   network::time(gta);
00098   int size, rank;
00099   network::size(size);
00100   network::rank(rank);
00101 
00102   std::vector<int> nod(cnt.size());
00103   for(int i = 0; i < cnt.size(); i++) nod[i] = i;
00104   
00106   network::barrier();
00107   network::time(ta);
00108   communicator<int, double> com(nod);
00109   network::time(tb);
00110   network::barrier();
00111   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00112 
00113   linear_operator<int, double> lin(cnt, col, ele);
00114   vector_product<int, double> sca(com);
00115   //vector<double> b(gnumnode);
00116   //vector<double> x(gnumnode);
00117   network::barrier();
00118   network::time(ta);
00119   double epsilon = 0.0, omega = 1;
00120   int max_level = 16;
00121   int min_nodes = 1;
00122   bool gauss_seidel = false;
00123   static amg_solver<int, double> amg(nod, cnt, col, ele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00124   network::time(tb);
00125   network::barrier();
00126   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00127 
00128 
00129   int n = 0;
00130   {
00131     double s = 0.0;
00132     vector<double> t(_b);
00133     lin(_x, t);
00134     sub_scale(t, _b, 1.0);
00135     vector<double> tt(t);
00136     com.accumulate(tt);
00137     scalar_product(tt, t, s);
00138     com.collect(s);
00139     //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00140   }
00141   network::barrier();
00142   network::time(ta);
00143   conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false);
00144   cg(_b, _x);
00145   n = cg.iterations();
00146   network::time(tb);
00147   network::barrier();
00148   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00149   //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl;
00150 
00151   {
00152     double s = 0.0;
00153     vector<double> t(_b);
00154     lin(_x, t);
00155     sub_scale(t, _b, 1.0);
00156     vector<double> tt(t);
00157     com.accumulate(tt);
00158     scalar_product(tt, t, s);
00159     com.collect(s);
00160     //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00161   }
00162 
00163   network::time(gtc);
00164   if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00165 
00166   //network::finalize();
00167   if (n >= nIt)
00168   {
00169     
00170     throw new Exception("AGM did not converged %d %d",n,nIt); 
00171   }
00172 }

void SolverGPU_AGM::gpu_agm_compressible	(	std::vector< int > &	cnt,
		std::vector< int > &	col,
		std::vector< double > &	ele,
		std::vector< double > &	b,
		std::vector< double > &	x,
		double	tol,
		unsigned	nIt
	)			`[private]`

void SolverGPU_AGM::gpu_agmII	(	std::vector< int > &	cnt,
		std::vector< int > &	col,
		std::vector< double > &	ele_agm,
		std::vector< double > &	ele_cg,
		std::vector< double > &	b,
		std::vector< double > &	x,
		double	tol,
		unsigned	nIt
	)			`[private]`

Definition at line 397 of file solvergpu_agm.cpp.

00398 {
00399 
00400   
00401   
00402   //network::init(argc, argv);
00403   double gta, gtb, gtc, ta, tb;
00404   network::time(gta);
00405   int size, rank;
00406   network::size(size);
00407   network::rank(rank);
00408 
00409   std::vector<int> nod(cnt.size());
00410   for(int i = 0; i < cnt.size(); i++) nod[i] = i;
00411   
00413   network::barrier();
00414   network::time(ta);
00415   communicator<int, double> com(nod);
00416   network::time(tb);
00417   network::barrier();
00418   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00419 
00420   linear_operator<int, double> lin(cnt, col, ele_cg);
00421   vector_product<int, double> sca(com);
00422   //vector<double> b(gnumnode);
00423   //vector<double> x(gnumnode);
00424   network::barrier();
00425   network::time(ta);
00426   double epsilon = 0.0, omega = 1;
00427   int max_level = 16;
00428   int min_nodes = 1;
00429   bool gauss_seidel = false;
00430   static amg_solver<int, double> amg(nod, cnt, col, ele_amg, max_level, min_nodes, epsilon, omega, gauss_seidel);
00431   network::time(tb);
00432   network::barrier();
00433   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00434 
00435 
00436   int n = 0;
00437   {
00438     double s = 0.0;
00439     vector<double> t(_b);
00440     lin(_x, t);
00441     sub_scale(t, _b, 1.0);
00442     vector<double> tt(t);
00443     com.accumulate(tt);
00444     scalar_product(tt, t, s);
00445     com.collect(s);
00446     //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00447   }
00448   network::barrier();
00449   network::time(ta);
00450   conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false);
00451   cg(_b, _x);
00452   n = cg.iterations();
00453   network::time(tb);
00454   network::barrier();
00455   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00456   //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl;
00457 
00458   {
00459     double s = 0.0;
00460     vector<double> t(_b);
00461     lin(_x, t);
00462     sub_scale(t, _b, 1.0);
00463     vector<double> tt(t);
00464     com.accumulate(tt);
00465     scalar_product(tt, t, s);
00466     com.collect(s);
00467     //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00468   }
00469 
00470   network::time(gtc);
00471   if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00472 
00473   //network::finalize();
00474   if (n >= nIt)
00475   {
00476     
00477     throw new Exception("AGM did not converged %d %d",n,nIt); 
00478   }
00479 
00480 }

void SolverGPU_AGM::gpu_agmSchur	(	std::vector< int > &	Acnt,
		std::vector< int > &	Acol,
		std::vector< double > &	Aele,
		std::vector< int > &	Bcnt,
		std::vector< int > &	Bcol,
		std::vector< double > &	Bele,
		std::vector< int > &	tcnt,
		std::vector< int > &	tcol,
		std::vector< double > &	tele,
		std::vector< double > &	_f,
		std::vector< double > &	_g,
		std::vector< double > &	_u,
		std::vector< double > &	_p
	)

Definition at line 175 of file solvergpu_agm.cpp.

00179 {
00180     
00181   //network::init(argc, argv);
00182   double gta, gtb, gtc, ta, tb;
00183   network::time(gta);
00184   int size, rank;
00185   network::size(size);
00186   network::rank(rank);
00187 
00188   std::vector<int> anod(acnt.size());
00189   for(int i = 0; i < acnt.size(); i++) anod[i] = i;
00190   std::vector<int> snod(tcnt.size());
00191   for(int i = 0; i < tcnt.size(); i++) snod[i] = i;
00192   cout << "AN: " << acnt.size() << " BN: " << tcnt.size() << endl;
00193   //  for(int i = 0; i < _f.size(); i++) cout << _f[i] << endl;
00194   //for(int i = 0; i < _g.size(); i++) cout << _g[i] << endl;
00195 
00197   network::barrier();
00198   network::time(ta);
00199   communicator<int, double> acom(anod);
00200   communicator<int, double> scom(snod);
00201   network::time(tb);
00202   network::barrier();
00203   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00204 
00205   linear_operator<int, double> lin(acnt, acol, aele);
00206   vector_product<int, double> sca(acom);
00207 
00208   network::barrier();
00209   network::time(ta);
00210   double epsilon = 0.0, omega = 1.0;
00211   int max_level = 16;
00212   int min_nodes = 1;
00213   bool gauss_seidel = false;
00214   amg_solver<int, double> amg(anod, acnt, acol, aele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00215   network::time(tb);
00216   network::barrier();
00217   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00218 
00219   linear_operator<int, double> linp(bcnt, bcol, bele);
00220   linear_operator<int, double> linr(tcnt, tcol, tele);
00221 
00222   conjugate_gradient<int, double> cg(lin, amg, sca, 1.0e-8, 64, false);
00223 
00224   vector<double> _s(acnt.size());
00225   vector<double> _t(acnt.size());
00226   schur_complement<int, double> schur(cg, linp, linr, _s, _t);
00227 
00228   vector<double> _h(tcnt.size());
00229   cg(_f, _u);
00230   linr(_u, _h);
00231   sub_scale(_h, _g, 1.0);
00232 
00233 #if 0
00234 //  vector<double> mdia(tcnt.size(), 1.0);
00235 //  diagonal_solver<int, double> pcg(mdia, scom);
00236 
00237   simple_triple_product<int, double> triple_product;
00238   vector<int> pnod(tcnt.size(), 0);
00239   for(int i = 0; i < tcnt.size(); i++) pnod[i] = i;
00240   vector<int> pcnt;
00241   vector<int> pcol;
00242   vector<double> pele;
00243   triple_product(acnt, acol, aele, bcnt, bcol, bele, pcnt, pcol, pele);
00244   linear_operator<int, double> plin(pcnt, pcol, pele);
00245 
00246   network::barrier();
00247   network::time(ta);
00248   epsilon = 0.0, omega = 1.0;
00249   max_level = 16;
00250   min_nodes = 1;
00251   gauss_seidel = true;
00252   amg_solver<int, double> pcg(pnod, pcnt, pcol, pele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00253 //  vector_product<int, double> psca(scom);
00254 //  conjugate_gradient<int, double> pcg(plin, pamg, psca, 1.0e-8, 64, false);
00255 /*
00256   vector<double> pdia(pcnt.size(), 0.0);
00257   for(int k = 0, i = 0; i < pcnt.size(); i++)
00258   {
00259     for(int j = 0; j < pcnt[i]; j++, k++)
00260     {
00261       if(i == pcol[k]) pdia[i] = pele[k];
00262     }
00263   }
00264   vector<double> ptmp(pcnt.size(), 0.0);
00265   jacobi<int, double> pcg(pcnt, pcol, pele, pdia, ptmp, 0.66, scom); 
00266 */
00267   network::time(tb);
00268   network::barrier();
00269   if(rank == 0) cout << "Rank: " << rank << " PAMG Setup Time: " << tb - ta << " sec" << endl;
00270 
00271 #else
00272   vector<double> mdia(acnt.size(), 0.0);
00273   for(int k = 0, i = 0; i < acnt.size(); i++)
00274   {
00275     for(int j = 0; j < acnt[i]; j++, k++)
00276     {
00277       if(i == acol[k]) mdia[i] = aele[k];
00278     }
00279   }
00280   acom.accumulate(mdia);
00281   for(int i = 0; i < acnt.size(); i++) mdia[i] = 1.0 / mdia[i];
00282   diagonal_solver<int, double> mass(mdia, acom);
00283   schur_complement<int, double> prec(mass, linp, linr, _s, _t);
00284   vector<double> pdia(tcnt.size(), 0.0);
00285   for(int k = 0, i = 0; i < tcnt.size(); i++)
00286   {
00287     double sum = 0.0;
00288     for(int j = 0; j < tcnt[i]; j++, k++)
00289     {
00290       sum += tele[k] * tele[k] * mdia[tcol[k]];
00291     }
00292     pdia[i] = 1.0 / sum;
00293     //pdia[i] = sum;
00294   }
00295   diagonal_solver<int, double> pinv(pdia, acom);
00296   conjugate_gradient<int, double> pcg(prec, pinv, sca, 1.0e-6, 32, false);
00297 #endif
00298   
00299   int n = 0;
00300   double s0, s1;
00301   {
00302     double s = 0.0;
00303     vector<double> t(_h);
00304     schur(_p, t);
00305     sub_scale(t, _h, 1.0);
00306     vector<double> tt(t);
00307     scom.accumulate(tt);
00308     scalar_product(tt, t, s);
00309     scom.collect(s);
00310     //cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00311     s0 = s;
00312   }
00313   network::time(ta);
00314   vector_product<int, double> scal(scom);
00315   conjugate_gradient<int, double> scg(schur, pcg, scal, 1.0e-6, 256, true);
00316   scg(_h, _p);
00317   n = scg.iterations();
00318   network::time(tb);
00319   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00320   cout << "Rank: " << rank << " MFLOPS: " << 2.0 * (aele.size() + 2 * bele.size()) * n / ((tb - ta) * 1000000.0) << endl;
00321   {
00322     double s = 0.0;
00323     vector<double> t(_h);
00324     schur(_p, t);
00325     sub_scale(t, _h, 1.0);
00326     vector<double> tt(t);
00327     scom.accumulate(tt);
00328     scalar_product(tt, t, s);
00329     scom.collect(s);
00330     //cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00331     s1 = s;
00332   }
00333   //cout << "Rank: " << rank << " CHECK REL: " << sqrt(s1/s0) << endl;
00334 
00335   linp(_p, _u);
00336   _t.assign(_f.begin(), _f.end());
00337   sub_scale(_t, _u, 1.0);
00338   cg(_t, _u);
00339 
00340   network::time(gtc);
00341   //cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00342   //network::finalize();
00343 }

void SolverGPU_AGM::solve	(	const SparseMatrix< double > &	A,
		VecDouble &	sol,
		const VecDouble &	vRHS,
		double	tol,
		unsigned	nIt
	)

Make all the necessary conversions beetwen the data of the simulator and the data for the AGM solver

Parameters:

	A	The sparse matrix to solve
	vRHS	The right hand side vector

Definition at line 42 of file solvergpu_agm.cpp.

00043 {
00044   
00045   std::vector<double> vElem;
00046   std::vector<int> vCols;
00047   std::vector<int> vCnt;
00048   
00049   convertSparseMatrix(A,vCnt,vCols,vElem); 
00050 
00051 
00052   std::vector<double> b(A.m());
00053   std::vector<double> x(A.m());
00054 
00055   for (unsigned i=0;i<b.size();i++)
00056     b[i]=vRHS(i);
00057   gpu_agm(vCnt,vCols,vElem,b,x,tol,nIt);
00058 
00059   for (unsigned i=0;i<x.size();i++)
00060     sol(i) = x[i];
00061 
00062   
00063 
00064 }

void SolverGPU_AGM::solve	(	const SparseMatrix< double > &	M,
		VecDouble &	sol,
		const VecDouble &	rhs
	)			`[virtual]`

Implements LinearSolver.

Definition at line 357 of file solvergpu_agm.cpp.

00358 {
00359   try {
00360   solve(M,sol,rhs,m_tol,m_nIt);
00361   }
00362   catch (Exception *e)
00363   {
00364     std::cout << "Printing Matrix\n";
00365     std::cout << "IsSymetric: " << NumericMethods::IsSymetric(M,1e-10) << std::endl;
00366     //NumericMethods::printSparseMatrixOctave(cout,M);
00367 
00368     throw e;
00369   }
00370 
00371 }

void SolverGPU_AGM::solveAgain	(	const SparseMatrix< double > &	M,
		VecDouble &	sol,
		const VecDouble &	rhs
	)			`[virtual]`

Implements LinearSolver.

Definition at line 348 of file solvergpu_agm.cpp.

00349 {
00350   solve(M,sol,rhs);
00351 }

Member Data Documentation

unsigned SolverGPU_AGM::m_nIt [protected]

Definition at line 26 of file solvergpu_agm.h.

double SolverGPU_AGM::m_tol [protected]

Definition at line 25 of file solvergpu_agm.h.

The documentation for this class was generated from the following files:

SolverGPU_AGM Class Reference

Public Member Functions

Protected Attributes

Private Member Functions

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation