CO2INJECTION: solvergpu_agm.cpp Source File

00001 #include "solvergpu_agm.h"
00002 #include <vector>
00003 #define NOMPI
00004 #include <string.h>
00005 #include <toolbox.h>
00006 #include "exception.h"
00007 #include "numericmethods.h"
00008 
00009 template<class T, class S> class schur_complement : public generic_operator<T, S>
00010 {
00011 private:
00012         inverse_operator<T, S> &_A;
00013         const generic_operator<T, S> &_P;
00014         const generic_operator<T, S> &_R;
00015         vector<S> &_s;
00016         vector<S> &_t;
00017 public:
00018         schur_complement(inverse_operator<T, S> &_A, const generic_operator<T, S> &_P, const generic_operator<T, S> &_R, vector<S> &_s, vector<S> &_t)
00019                 : _A(_A), _P(_P), _R(_R), _s(_s), _t(_t)
00020         {
00021         }
00022         void operator()(const vector<S> &_u, vector<S> &_v) const
00023         {
00024                 _P(_u, _s);
00025                 _A(_s, _t);
00026                 _R(_t, _v);
00027         }
00028 };
00029 
00030 
00031  SolverGPU_AGM::SolverGPU_AGM(double tol,unsigned nIt) 
00032 :m_tol(tol),m_nIt(nIt)
00033 {
00034 
00035 }
00036 
00042 void SolverGPU_AGM::solve(const SparseMatrix<double> &A,VecDouble &sol,const VecDouble &vRHS,double tol,unsigned nIt) 
00043 {
00044   
00045   std::vector<double> vElem;
00046   std::vector<int> vCols;
00047   std::vector<int> vCnt;
00048   
00049   convertSparseMatrix(A,vCnt,vCols,vElem); 
00050 
00051 
00052   std::vector<double> b(A.m());
00053   std::vector<double> x(A.m());
00054 
00055   for (unsigned i=0;i<b.size();i++)
00056     b[i]=vRHS(i);
00057   gpu_agm(vCnt,vCols,vElem,b,x,tol,nIt);
00058 
00059   for (unsigned i=0;i<x.size();i++)
00060     sol(i) = x[i];
00061 
00062   
00063 
00064 }
00065 
00066 
00067 
00075 void SolverGPU_AGM::gpu_agm(std::vector<int> &cnt,std::vector<int> &col, std::vector<double> &ele, std::vector<double> &_b, std::vector<double> &_x,double tol,unsigned nIt) 
00076 {
00077   /*
00078   assert(col.size() == ele.size());
00079 
00080   std::vector<double>::iterator  elemIt = ele.begin();
00081   std::vector<int>::iterator colIt = col.begin(); 
00082   
00083   for(unsigned i = 0; i< cnt.size();i++)
00084   {
00085     for(int j=0;j<cnt[i];j++)
00086     {
00087       printf("(%d,%d,) %g\n",i,*colIt++,*elemIt++);
00088     }
00089   }
00090   return;
00091   */
00092 
00093   
00094   
00095   //network::init(argc, argv);
00096   double gta, gtb, gtc, ta, tb;
00097   network::time(gta);
00098   int size, rank;
00099   network::size(size);
00100   network::rank(rank);
00101 
00102   std::vector<int> nod(cnt.size());
00103   for(int i = 0; i < cnt.size(); i++) nod[i] = i;
00104   
00106   network::barrier();
00107   network::time(ta);
00108   communicator<int, double> com(nod);
00109   network::time(tb);
00110   network::barrier();
00111   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00112 
00113   linear_operator<int, double> lin(cnt, col, ele);
00114   vector_product<int, double> sca(com);
00115   //vector<double> b(gnumnode);
00116   //vector<double> x(gnumnode);
00117   network::barrier();
00118   network::time(ta);
00119   double epsilon = 0.0, omega = 1;
00120   int max_level = 16;
00121   int min_nodes = 1;
00122   bool gauss_seidel = false;
00123   static amg_solver<int, double> amg(nod, cnt, col, ele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00124   network::time(tb);
00125   network::barrier();
00126   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00127 
00128 
00129   int n = 0;
00130   {
00131     double s = 0.0;
00132     vector<double> t(_b);
00133     lin(_x, t);
00134     sub_scale(t, _b, 1.0);
00135     vector<double> tt(t);
00136     com.accumulate(tt);
00137     scalar_product(tt, t, s);
00138     com.collect(s);
00139     //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00140   }
00141   network::barrier();
00142   network::time(ta);
00143   conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false);
00144   cg(_b, _x);
00145   n = cg.iterations();
00146   network::time(tb);
00147   network::barrier();
00148   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00149   //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl;
00150 
00151   {
00152     double s = 0.0;
00153     vector<double> t(_b);
00154     lin(_x, t);
00155     sub_scale(t, _b, 1.0);
00156     vector<double> tt(t);
00157     com.accumulate(tt);
00158     scalar_product(tt, t, s);
00159     com.collect(s);
00160     //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00161   }
00162 
00163   network::time(gtc);
00164   if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00165 
00166   //network::finalize();
00167   if (n >= nIt)
00168   {
00169     
00170     throw new Exception("AGM did not converged %d %d",n,nIt); 
00171   }
00172 }
00173 
00174 
00175 void SolverGPU_AGM::gpu_agmSchur(std::vector<int> &acnt,std::vector<int> &acol, std::vector<double> &aele,
00176                                  std::vector<int> &bcnt,std::vector<int> &bcol, std::vector<double> &bele,
00177                                  std::vector<int> &tcnt,std::vector<int> &tcol, std::vector<double> &tele,
00178                                  std::vector<double> &_f, std::vector<double> &_g,std::vector<double> &_u, std::vector<double> &_p) 
00179 {
00180     
00181   //network::init(argc, argv);
00182   double gta, gtb, gtc, ta, tb;
00183   network::time(gta);
00184   int size, rank;
00185   network::size(size);
00186   network::rank(rank);
00187 
00188   std::vector<int> anod(acnt.size());
00189   for(int i = 0; i < acnt.size(); i++) anod[i] = i;
00190   std::vector<int> snod(tcnt.size());
00191   for(int i = 0; i < tcnt.size(); i++) snod[i] = i;
00192   cout << "AN: " << acnt.size() << " BN: " << tcnt.size() << endl;
00193   //  for(int i = 0; i < _f.size(); i++) cout << _f[i] << endl;
00194   //for(int i = 0; i < _g.size(); i++) cout << _g[i] << endl;
00195 
00197   network::barrier();
00198   network::time(ta);
00199   communicator<int, double> acom(anod);
00200   communicator<int, double> scom(snod);
00201   network::time(tb);
00202   network::barrier();
00203   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00204 
00205   linear_operator<int, double> lin(acnt, acol, aele);
00206   vector_product<int, double> sca(acom);
00207 
00208   network::barrier();
00209   network::time(ta);
00210   double epsilon = 0.0, omega = 1.0;
00211   int max_level = 16;
00212   int min_nodes = 1;
00213   bool gauss_seidel = false;
00214   amg_solver<int, double> amg(anod, acnt, acol, aele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00215   network::time(tb);
00216   network::barrier();
00217   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00218 
00219   linear_operator<int, double> linp(bcnt, bcol, bele);
00220   linear_operator<int, double> linr(tcnt, tcol, tele);
00221 
00222   conjugate_gradient<int, double> cg(lin, amg, sca, 1.0e-8, 64, false);
00223 
00224   vector<double> _s(acnt.size());
00225   vector<double> _t(acnt.size());
00226   schur_complement<int, double> schur(cg, linp, linr, _s, _t);
00227 
00228   vector<double> _h(tcnt.size());
00229   cg(_f, _u);
00230   linr(_u, _h);
00231   sub_scale(_h, _g, 1.0);
00232 
00233 #if 0
00234 //  vector<double> mdia(tcnt.size(), 1.0);
00235 //  diagonal_solver<int, double> pcg(mdia, scom);
00236 
00237   simple_triple_product<int, double> triple_product;
00238   vector<int> pnod(tcnt.size(), 0);
00239   for(int i = 0; i < tcnt.size(); i++) pnod[i] = i;
00240   vector<int> pcnt;
00241   vector<int> pcol;
00242   vector<double> pele;
00243   triple_product(acnt, acol, aele, bcnt, bcol, bele, pcnt, pcol, pele);
00244   linear_operator<int, double> plin(pcnt, pcol, pele);
00245 
00246   network::barrier();
00247   network::time(ta);
00248   epsilon = 0.0, omega = 1.0;
00249   max_level = 16;
00250   min_nodes = 1;
00251   gauss_seidel = true;
00252   amg_solver<int, double> pcg(pnod, pcnt, pcol, pele, max_level, min_nodes, epsilon, omega, gauss_seidel);
00253 //  vector_product<int, double> psca(scom);
00254 //  conjugate_gradient<int, double> pcg(plin, pamg, psca, 1.0e-8, 64, false);
00255 /*
00256   vector<double> pdia(pcnt.size(), 0.0);
00257   for(int k = 0, i = 0; i < pcnt.size(); i++)
00258   {
00259     for(int j = 0; j < pcnt[i]; j++, k++)
00260     {
00261       if(i == pcol[k]) pdia[i] = pele[k];
00262     }
00263   }
00264   vector<double> ptmp(pcnt.size(), 0.0);
00265   jacobi<int, double> pcg(pcnt, pcol, pele, pdia, ptmp, 0.66, scom); 
00266 */
00267   network::time(tb);
00268   network::barrier();
00269   if(rank == 0) cout << "Rank: " << rank << " PAMG Setup Time: " << tb - ta << " sec" << endl;
00270 
00271 #else
00272   vector<double> mdia(acnt.size(), 0.0);
00273   for(int k = 0, i = 0; i < acnt.size(); i++)
00274   {
00275     for(int j = 0; j < acnt[i]; j++, k++)
00276     {
00277       if(i == acol[k]) mdia[i] = aele[k];
00278     }
00279   }
00280   acom.accumulate(mdia);
00281   for(int i = 0; i < acnt.size(); i++) mdia[i] = 1.0 / mdia[i];
00282   diagonal_solver<int, double> mass(mdia, acom);
00283   schur_complement<int, double> prec(mass, linp, linr, _s, _t);
00284   vector<double> pdia(tcnt.size(), 0.0);
00285   for(int k = 0, i = 0; i < tcnt.size(); i++)
00286   {
00287     double sum = 0.0;
00288     for(int j = 0; j < tcnt[i]; j++, k++)
00289     {
00290       sum += tele[k] * tele[k] * mdia[tcol[k]];
00291     }
00292     pdia[i] = 1.0 / sum;
00293     //pdia[i] = sum;
00294   }
00295   diagonal_solver<int, double> pinv(pdia, acom);
00296   conjugate_gradient<int, double> pcg(prec, pinv, sca, 1.0e-6, 32, false);
00297 #endif
00298   
00299   int n = 0;
00300   double s0, s1;
00301   {
00302     double s = 0.0;
00303     vector<double> t(_h);
00304     schur(_p, t);
00305     sub_scale(t, _h, 1.0);
00306     vector<double> tt(t);
00307     scom.accumulate(tt);
00308     scalar_product(tt, t, s);
00309     scom.collect(s);
00310     //cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00311     s0 = s;
00312   }
00313   network::time(ta);
00314   vector_product<int, double> scal(scom);
00315   conjugate_gradient<int, double> scg(schur, pcg, scal, 1.0e-6, 256, true);
00316   scg(_h, _p);
00317   n = scg.iterations();
00318   network::time(tb);
00319   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00320   cout << "Rank: " << rank << " MFLOPS: " << 2.0 * (aele.size() + 2 * bele.size()) * n / ((tb - ta) * 1000000.0) << endl;
00321   {
00322     double s = 0.0;
00323     vector<double> t(_h);
00324     schur(_p, t);
00325     sub_scale(t, _h, 1.0);
00326     vector<double> tt(t);
00327     scom.accumulate(tt);
00328     scalar_product(tt, t, s);
00329     scom.collect(s);
00330     //cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00331     s1 = s;
00332   }
00333   //cout << "Rank: " << rank << " CHECK REL: " << sqrt(s1/s0) << endl;
00334 
00335   linp(_p, _u);
00336   _t.assign(_f.begin(), _f.end());
00337   sub_scale(_t, _u, 1.0);
00338   cg(_t, _u);
00339 
00340   network::time(gtc);
00341   //cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00342   //network::finalize();
00343 }
00344 
00345 
00346 
00347 
00348 void SolverGPU_AGM::solveAgain(const SparseMatrix<double> &M,VecDouble &sol,const VecDouble &rhs) 
00349 {
00350   solve(M,sol,rhs);
00351 }
00352 
00353 
00354 
00355 
00356 
00357  void SolverGPU_AGM::solve(const SparseMatrix<double> &M,VecDouble &sol,const VecDouble &rhs) 
00358 {
00359   try {
00360   solve(M,sol,rhs,m_tol,m_nIt);
00361   }
00362   catch (Exception *e)
00363   {
00364     std::cout << "Printing Matrix\n";
00365     std::cout << "IsSymetric: " << NumericMethods::IsSymetric(M,1e-10) << std::endl;
00366     //NumericMethods::printSparseMatrixOctave(cout,M);
00367 
00368     throw e;
00369   }
00370 
00371 }
00372 
00373 
00374 void SolverGPU_AGM::add_diagonal(std::vector<int> &cnt,std::vector<int> &col, std::vector<double> &ele,const VecDouble &addElem) 
00375 {
00376   assert(cnt.size() == addElem.size());
00377   unsigned indexFirst=0;
00378   unsigned indexSent;
00379   unsigned j;
00380   for (unsigned i=0;i<cnt.size();i++)
00381   {
00382     indexSent=indexFirst+cnt[i];
00383     for (j=indexFirst;j<indexSent;j++)
00384     {
00385       if (col[j] == i)
00386       {
00387         ele[j]+=addElem(j);
00388         break;
00389       }
00390     }
00391     assert(j < indexSent);
00392     indexFirst=indexSent;
00393   }
00394 }
00395 
00396 
00397 void SolverGPU_AGM::gpu_agmII(std::vector<int> &cnt,std::vector<int> &col, std::vector<double> &ele_amg,std::vector<double> &ele_cg, std::vector<double> &_b, std::vector<double> &_x,double tol,unsigned nIt) 
00398 {
00399 
00400   
00401   
00402   //network::init(argc, argv);
00403   double gta, gtb, gtc, ta, tb;
00404   network::time(gta);
00405   int size, rank;
00406   network::size(size);
00407   network::rank(rank);
00408 
00409   std::vector<int> nod(cnt.size());
00410   for(int i = 0; i < cnt.size(); i++) nod[i] = i;
00411   
00413   network::barrier();
00414   network::time(ta);
00415   communicator<int, double> com(nod);
00416   network::time(tb);
00417   network::barrier();
00418   //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl;
00419 
00420   linear_operator<int, double> lin(cnt, col, ele_cg);
00421   vector_product<int, double> sca(com);
00422   //vector<double> b(gnumnode);
00423   //vector<double> x(gnumnode);
00424   network::barrier();
00425   network::time(ta);
00426   double epsilon = 0.0, omega = 1;
00427   int max_level = 16;
00428   int min_nodes = 1;
00429   bool gauss_seidel = false;
00430   static amg_solver<int, double> amg(nod, cnt, col, ele_amg, max_level, min_nodes, epsilon, omega, gauss_seidel);
00431   network::time(tb);
00432   network::barrier();
00433   //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl;
00434 
00435 
00436   int n = 0;
00437   {
00438     double s = 0.0;
00439     vector<double> t(_b);
00440     lin(_x, t);
00441     sub_scale(t, _b, 1.0);
00442     vector<double> tt(t);
00443     com.accumulate(tt);
00444     scalar_product(tt, t, s);
00445     com.collect(s);
00446     //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl;
00447   }
00448   network::barrier();
00449   network::time(ta);
00450   conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false);
00451   cg(_b, _x);
00452   n = cg.iterations();
00453   network::time(tb);
00454   network::barrier();
00455   cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl;
00456   //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl;
00457 
00458   {
00459     double s = 0.0;
00460     vector<double> t(_b);
00461     lin(_x, t);
00462     sub_scale(t, _b, 1.0);
00463     vector<double> tt(t);
00464     com.accumulate(tt);
00465     scalar_product(tt, t, s);
00466     com.collect(s);
00467     //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl;
00468   }
00469 
00470   network::time(gtc);
00471   if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl;
00472 
00473   //network::finalize();
00474   if (n >= nIt)
00475   {
00476     
00477     throw new Exception("AGM did not converged %d %d",n,nIt); 
00478   }
00479 
00480 }
00481 
00482 
00483 
00484 /*
00485 void SolverGPU_AGM::solveII(const SparseMatrix<double> &A,VecDouble &sol,const VecDouble &vRHS,const VecDouble &addDiag) 
00486 {
00487   std::vector<double> vElem;
00488   std::vector<int> vCols;
00489   std::vector<int> vCnt;
00490   
00491   convertSparseMatrix(A,vCnt,vCols,vElem); 
00492 
00493 
00494   std::vector<double> vElemAGM=vElem;
00495 
00496   add_diagonal(vCnt,vCols, vElemAGM,addDiag);
00497 
00498   
00499   std::vector<double> b(A.m());
00500   std::vector<double> x(A.m());
00501 
00502   for (unsigned i=0;i<b.size();i++)
00503     b[i]=vRHS(i);
00504   gpu_agmII(vCnt,vCols,vElemAGM,vElem,b,x,m_tol,m_nIt);
00505 
00506   for (unsigned i=0;i<x.size();i++)
00507     sol(i) = x[i];
00508 
00509 
00510 
00511 }
00512 */
00513 void SolverGPU_AGM::convertSparseMatrix(const SparseMatrix<double> &A,std::vector<int> &vCnt,std::vector<int> &vCols, std::vector<double> &vElem) 
00514 {
00515   vElem.resize(A.n_nonzero_elements());
00516   vCols.resize(A.n_nonzero_elements());
00517   vCnt.resize(A.m());
00518   
00519 
00520   const SparsityPattern &pattern = A.get_sparsity_pattern();
00521   for (unsigned i=0;i<vCnt.size();i++)
00522   {
00523     vCnt[i] = pattern.row_length(i); 
00524   }
00525   
00526   std::vector<double>::iterator itElem = vElem.begin();
00527   std::vector<int>::iterator itCols = vCols.begin();
00528   for (SparseMatrix<double>::const_iterator it  = A.begin();it!=A.end();it++,itElem++,itCols++)
00529   {
00530     *itElem = it->value();
00531     *itCols = it->column();
00532   }
00533 
00534   
00535   if (pattern.optimize_diagonal())
00536   {
00537     itElem = vElem.begin();
00538     itCols = vCols.begin();
00539     unsigned nRows = A.m();
00540     //for each row do
00541     for (unsigned i=0;i<nRows;i++)
00542     {
00543       //number of non zero entries in row i excluding the diagonal
00544       int nNonDiagColsPerRow = pattern.row_length(i) - 1; 
00545       assert(nNonDiagColsPerRow >= 0);
00546       std::vector<double>::iterator itElemDiag = itElem++;
00547       std::vector<int>::iterator itColDiag = itCols++;
00548       for (int j=0;j<nNonDiagColsPerRow;j++)
00549       {
00550         if (*itColDiag > *itCols)
00551         {
00552           std::swap(*itColDiag++,*itCols++);
00553           std::swap(*itElemDiag++,*itElem++);
00554         }
00555         else
00556         {
00557           itCols++;
00558           itElem++;
00559         }
00560       }
00561     }
00562   }
00563 
00564 }