#include <solvergpu_agm.h>
Public Member Functions | |
SolverGPU_AGM (double tol, unsigned nIt) | |
virtual | ~SolverGPU_AGM () |
virtual void | solve (const SparseMatrix< double > &M, VecDouble &sol, const VecDouble &rhs) |
virtual void | solveAgain (const SparseMatrix< double > &M, VecDouble &sol, const VecDouble &rhs) |
void | solve (const SparseMatrix< double > &A, VecDouble &sol, const VecDouble &vRHS, double tol, unsigned nIt) |
void | gpu_agmSchur (std::vector< int > &Acnt, std::vector< int > &Acol, std::vector< double > &Aele, std::vector< int > &Bcnt, std::vector< int > &Bcol, std::vector< double > &Bele, std::vector< int > &tcnt, std::vector< int > &tcol, std::vector< double > &tele, std::vector< double > &_f, std::vector< double > &_g, std::vector< double > &_u, std::vector< double > &_p) |
Protected Attributes | |
double | m_tol |
unsigned | m_nIt |
Private Member Functions | |
void | convertSparseMatrix (const SparseMatrix< double > &M, std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele) |
void | gpu_agm (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt) |
void | add_diagonal (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, const VecDouble &addElem) |
void | gpu_agmII (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele_agm, std::vector< double > &ele_cg, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt) |
void | gpu_agm_compressible (std::vector< int > &cnt, std::vector< int > &col, std::vector< double > &ele, std::vector< double > &b, std::vector< double > &x, double tol, unsigned nIt) |
Definition at line 10 of file solvergpu_agm.h.
SolverGPU_AGM::SolverGPU_AGM | ( | double | tol, | |
unsigned | nIt | |||
) |
Definition at line 31 of file solvergpu_agm.cpp.
virtual SolverGPU_AGM::~SolverGPU_AGM | ( | ) | [inline, virtual] |
Definition at line 29 of file solvergpu_agm.h.
void SolverGPU_AGM::add_diagonal | ( | std::vector< int > & | cnt, | |
std::vector< int > & | col, | |||
std::vector< double > & | ele, | |||
const VecDouble & | addElem | |||
) | [private] |
Definition at line 374 of file solvergpu_agm.cpp.
00375 { 00376 assert(cnt.size() == addElem.size()); 00377 unsigned indexFirst=0; 00378 unsigned indexSent; 00379 unsigned j; 00380 for (unsigned i=0;i<cnt.size();i++) 00381 { 00382 indexSent=indexFirst+cnt[i]; 00383 for (j=indexFirst;j<indexSent;j++) 00384 { 00385 if (col[j] == i) 00386 { 00387 ele[j]+=addElem(j); 00388 break; 00389 } 00390 } 00391 assert(j < indexSent); 00392 indexFirst=indexSent; 00393 } 00394 }
void SolverGPU_AGM::convertSparseMatrix | ( | const SparseMatrix< double > & | M, | |
std::vector< int > & | cnt, | |||
std::vector< int > & | col, | |||
std::vector< double > & | ele | |||
) | [private] |
Definition at line 513 of file solvergpu_agm.cpp.
00514 { 00515 vElem.resize(A.n_nonzero_elements()); 00516 vCols.resize(A.n_nonzero_elements()); 00517 vCnt.resize(A.m()); 00518 00519 00520 const SparsityPattern &pattern = A.get_sparsity_pattern(); 00521 for (unsigned i=0;i<vCnt.size();i++) 00522 { 00523 vCnt[i] = pattern.row_length(i); 00524 } 00525 00526 std::vector<double>::iterator itElem = vElem.begin(); 00527 std::vector<int>::iterator itCols = vCols.begin(); 00528 for (SparseMatrix<double>::const_iterator it = A.begin();it!=A.end();it++,itElem++,itCols++) 00529 { 00530 *itElem = it->value(); 00531 *itCols = it->column(); 00532 } 00533 00534 00535 if (pattern.optimize_diagonal()) 00536 { 00537 itElem = vElem.begin(); 00538 itCols = vCols.begin(); 00539 unsigned nRows = A.m(); 00540 //for each row do 00541 for (unsigned i=0;i<nRows;i++) 00542 { 00543 //number of non zero entries in row i excluding the diagonal 00544 int nNonDiagColsPerRow = pattern.row_length(i) - 1; 00545 assert(nNonDiagColsPerRow >= 0); 00546 std::vector<double>::iterator itElemDiag = itElem++; 00547 std::vector<int>::iterator itColDiag = itCols++; 00548 for (int j=0;j<nNonDiagColsPerRow;j++) 00549 { 00550 if (*itColDiag > *itCols) 00551 { 00552 std::swap(*itColDiag++,*itCols++); 00553 std::swap(*itElemDiag++,*itElem++); 00554 } 00555 else 00556 { 00557 itCols++; 00558 itElem++; 00559 } 00560 } 00561 } 00562 } 00563 00564 }
void SolverGPU_AGM::gpu_agm | ( | std::vector< int > & | cnt, | |
std::vector< int > & | col, | |||
std::vector< double > & | ele, | |||
std::vector< double > & | _b, | |||
std::vector< double > & | _x, | |||
double | tol, | |||
unsigned | nIt | |||
) | [private] |
This method call the solver developed by Manfred. This method should call a C routine from the Manfred`s code, but for while it just print the vectors defining the sparse matrix
vCount | Array of ints containing the column number for each data in the vElem vector. | |
vCols | Array of ints containing the number of nonzero elements for each row. | |
vElem | Array of doubles containing the values of the nonzero entries. |
Definition at line 75 of file solvergpu_agm.cpp.
00076 { 00077 /* 00078 assert(col.size() == ele.size()); 00079 00080 std::vector<double>::iterator elemIt = ele.begin(); 00081 std::vector<int>::iterator colIt = col.begin(); 00082 00083 for(unsigned i = 0; i< cnt.size();i++) 00084 { 00085 for(int j=0;j<cnt[i];j++) 00086 { 00087 printf("(%d,%d,) %g\n",i,*colIt++,*elemIt++); 00088 } 00089 } 00090 return; 00091 */ 00092 00093 00094 00095 //network::init(argc, argv); 00096 double gta, gtb, gtc, ta, tb; 00097 network::time(gta); 00098 int size, rank; 00099 network::size(size); 00100 network::rank(rank); 00101 00102 std::vector<int> nod(cnt.size()); 00103 for(int i = 0; i < cnt.size(); i++) nod[i] = i; 00104 00106 network::barrier(); 00107 network::time(ta); 00108 communicator<int, double> com(nod); 00109 network::time(tb); 00110 network::barrier(); 00111 //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl; 00112 00113 linear_operator<int, double> lin(cnt, col, ele); 00114 vector_product<int, double> sca(com); 00115 //vector<double> b(gnumnode); 00116 //vector<double> x(gnumnode); 00117 network::barrier(); 00118 network::time(ta); 00119 double epsilon = 0.0, omega = 1; 00120 int max_level = 16; 00121 int min_nodes = 1; 00122 bool gauss_seidel = false; 00123 static amg_solver<int, double> amg(nod, cnt, col, ele, max_level, min_nodes, epsilon, omega, gauss_seidel); 00124 network::time(tb); 00125 network::barrier(); 00126 //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl; 00127 00128 00129 int n = 0; 00130 { 00131 double s = 0.0; 00132 vector<double> t(_b); 00133 lin(_x, t); 00134 sub_scale(t, _b, 1.0); 00135 vector<double> tt(t); 00136 com.accumulate(tt); 00137 scalar_product(tt, t, s); 00138 com.collect(s); 00139 //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl; 00140 } 00141 network::barrier(); 00142 network::time(ta); 00143 conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false); 00144 cg(_b, _x); 00145 n = cg.iterations(); 00146 network::time(tb); 00147 network::barrier(); 00148 cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl; 00149 //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl; 00150 00151 { 00152 double s = 0.0; 00153 vector<double> t(_b); 00154 lin(_x, t); 00155 sub_scale(t, _b, 1.0); 00156 vector<double> tt(t); 00157 com.accumulate(tt); 00158 scalar_product(tt, t, s); 00159 com.collect(s); 00160 //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl; 00161 } 00162 00163 network::time(gtc); 00164 if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl; 00165 00166 //network::finalize(); 00167 if (n >= nIt) 00168 { 00169 00170 throw new Exception("AGM did not converged %d %d",n,nIt); 00171 } 00172 }
void SolverGPU_AGM::gpu_agm_compressible | ( | std::vector< int > & | cnt, | |
std::vector< int > & | col, | |||
std::vector< double > & | ele, | |||
std::vector< double > & | b, | |||
std::vector< double > & | x, | |||
double | tol, | |||
unsigned | nIt | |||
) | [private] |
void SolverGPU_AGM::gpu_agmII | ( | std::vector< int > & | cnt, | |
std::vector< int > & | col, | |||
std::vector< double > & | ele_agm, | |||
std::vector< double > & | ele_cg, | |||
std::vector< double > & | b, | |||
std::vector< double > & | x, | |||
double | tol, | |||
unsigned | nIt | |||
) | [private] |
Definition at line 397 of file solvergpu_agm.cpp.
00398 { 00399 00400 00401 00402 //network::init(argc, argv); 00403 double gta, gtb, gtc, ta, tb; 00404 network::time(gta); 00405 int size, rank; 00406 network::size(size); 00407 network::rank(rank); 00408 00409 std::vector<int> nod(cnt.size()); 00410 for(int i = 0; i < cnt.size(); i++) nod[i] = i; 00411 00413 network::barrier(); 00414 network::time(ta); 00415 communicator<int, double> com(nod); 00416 network::time(tb); 00417 network::barrier(); 00418 //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl; 00419 00420 linear_operator<int, double> lin(cnt, col, ele_cg); 00421 vector_product<int, double> sca(com); 00422 //vector<double> b(gnumnode); 00423 //vector<double> x(gnumnode); 00424 network::barrier(); 00425 network::time(ta); 00426 double epsilon = 0.0, omega = 1; 00427 int max_level = 16; 00428 int min_nodes = 1; 00429 bool gauss_seidel = false; 00430 static amg_solver<int, double> amg(nod, cnt, col, ele_amg, max_level, min_nodes, epsilon, omega, gauss_seidel); 00431 network::time(tb); 00432 network::barrier(); 00433 //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl; 00434 00435 00436 int n = 0; 00437 { 00438 double s = 0.0; 00439 vector<double> t(_b); 00440 lin(_x, t); 00441 sub_scale(t, _b, 1.0); 00442 vector<double> tt(t); 00443 com.accumulate(tt); 00444 scalar_product(tt, t, s); 00445 com.collect(s); 00446 //if(rank == 0) cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl; 00447 } 00448 network::barrier(); 00449 network::time(ta); 00450 conjugate_gradient<int, double> cg(lin, amg, sca, tol, nIt, false); 00451 cg(_b, _x); 00452 n = cg.iterations(); 00453 network::time(tb); 00454 network::barrier(); 00455 cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl; 00456 //cout << "Rank: " << rank << " MFLOPS: " << 2.0 * ele.size() * n / ((tb - ta) * 1000000.0) << endl; 00457 00458 { 00459 double s = 0.0; 00460 vector<double> t(_b); 00461 lin(_x, t); 00462 sub_scale(t, _b, 1.0); 00463 vector<double> tt(t); 00464 com.accumulate(tt); 00465 scalar_product(tt, t, s); 00466 com.collect(s); 00467 //if(rank == 0) cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl; 00468 } 00469 00470 network::time(gtc); 00471 if(rank == 0) cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl; 00472 00473 //network::finalize(); 00474 if (n >= nIt) 00475 { 00476 00477 throw new Exception("AGM did not converged %d %d",n,nIt); 00478 } 00479 00480 }
void SolverGPU_AGM::gpu_agmSchur | ( | std::vector< int > & | Acnt, | |
std::vector< int > & | Acol, | |||
std::vector< double > & | Aele, | |||
std::vector< int > & | Bcnt, | |||
std::vector< int > & | Bcol, | |||
std::vector< double > & | Bele, | |||
std::vector< int > & | tcnt, | |||
std::vector< int > & | tcol, | |||
std::vector< double > & | tele, | |||
std::vector< double > & | _f, | |||
std::vector< double > & | _g, | |||
std::vector< double > & | _u, | |||
std::vector< double > & | _p | |||
) |
Definition at line 175 of file solvergpu_agm.cpp.
00179 { 00180 00181 //network::init(argc, argv); 00182 double gta, gtb, gtc, ta, tb; 00183 network::time(gta); 00184 int size, rank; 00185 network::size(size); 00186 network::rank(rank); 00187 00188 std::vector<int> anod(acnt.size()); 00189 for(int i = 0; i < acnt.size(); i++) anod[i] = i; 00190 std::vector<int> snod(tcnt.size()); 00191 for(int i = 0; i < tcnt.size(); i++) snod[i] = i; 00192 cout << "AN: " << acnt.size() << " BN: " << tcnt.size() << endl; 00193 // for(int i = 0; i < _f.size(); i++) cout << _f[i] << endl; 00194 //for(int i = 0; i < _g.size(); i++) cout << _g[i] << endl; 00195 00197 network::barrier(); 00198 network::time(ta); 00199 communicator<int, double> acom(anod); 00200 communicator<int, double> scom(snod); 00201 network::time(tb); 00202 network::barrier(); 00203 //cout << "Rank: " << rank << " COM IO Time: " << tb - ta << " sec" << endl; 00204 00205 linear_operator<int, double> lin(acnt, acol, aele); 00206 vector_product<int, double> sca(acom); 00207 00208 network::barrier(); 00209 network::time(ta); 00210 double epsilon = 0.0, omega = 1.0; 00211 int max_level = 16; 00212 int min_nodes = 1; 00213 bool gauss_seidel = false; 00214 amg_solver<int, double> amg(anod, acnt, acol, aele, max_level, min_nodes, epsilon, omega, gauss_seidel); 00215 network::time(tb); 00216 network::barrier(); 00217 //if(rank == 0) cout << "Rank: " << rank << " AMG Setup Time: " << tb - ta << " sec" << endl; 00218 00219 linear_operator<int, double> linp(bcnt, bcol, bele); 00220 linear_operator<int, double> linr(tcnt, tcol, tele); 00221 00222 conjugate_gradient<int, double> cg(lin, amg, sca, 1.0e-8, 64, false); 00223 00224 vector<double> _s(acnt.size()); 00225 vector<double> _t(acnt.size()); 00226 schur_complement<int, double> schur(cg, linp, linr, _s, _t); 00227 00228 vector<double> _h(tcnt.size()); 00229 cg(_f, _u); 00230 linr(_u, _h); 00231 sub_scale(_h, _g, 1.0); 00232 00233 #if 0 00234 // vector<double> mdia(tcnt.size(), 1.0); 00235 // diagonal_solver<int, double> pcg(mdia, scom); 00236 00237 simple_triple_product<int, double> triple_product; 00238 vector<int> pnod(tcnt.size(), 0); 00239 for(int i = 0; i < tcnt.size(); i++) pnod[i] = i; 00240 vector<int> pcnt; 00241 vector<int> pcol; 00242 vector<double> pele; 00243 triple_product(acnt, acol, aele, bcnt, bcol, bele, pcnt, pcol, pele); 00244 linear_operator<int, double> plin(pcnt, pcol, pele); 00245 00246 network::barrier(); 00247 network::time(ta); 00248 epsilon = 0.0, omega = 1.0; 00249 max_level = 16; 00250 min_nodes = 1; 00251 gauss_seidel = true; 00252 amg_solver<int, double> pcg(pnod, pcnt, pcol, pele, max_level, min_nodes, epsilon, omega, gauss_seidel); 00253 // vector_product<int, double> psca(scom); 00254 // conjugate_gradient<int, double> pcg(plin, pamg, psca, 1.0e-8, 64, false); 00255 /* 00256 vector<double> pdia(pcnt.size(), 0.0); 00257 for(int k = 0, i = 0; i < pcnt.size(); i++) 00258 { 00259 for(int j = 0; j < pcnt[i]; j++, k++) 00260 { 00261 if(i == pcol[k]) pdia[i] = pele[k]; 00262 } 00263 } 00264 vector<double> ptmp(pcnt.size(), 0.0); 00265 jacobi<int, double> pcg(pcnt, pcol, pele, pdia, ptmp, 0.66, scom); 00266 */ 00267 network::time(tb); 00268 network::barrier(); 00269 if(rank == 0) cout << "Rank: " << rank << " PAMG Setup Time: " << tb - ta << " sec" << endl; 00270 00271 #else 00272 vector<double> mdia(acnt.size(), 0.0); 00273 for(int k = 0, i = 0; i < acnt.size(); i++) 00274 { 00275 for(int j = 0; j < acnt[i]; j++, k++) 00276 { 00277 if(i == acol[k]) mdia[i] = aele[k]; 00278 } 00279 } 00280 acom.accumulate(mdia); 00281 for(int i = 0; i < acnt.size(); i++) mdia[i] = 1.0 / mdia[i]; 00282 diagonal_solver<int, double> mass(mdia, acom); 00283 schur_complement<int, double> prec(mass, linp, linr, _s, _t); 00284 vector<double> pdia(tcnt.size(), 0.0); 00285 for(int k = 0, i = 0; i < tcnt.size(); i++) 00286 { 00287 double sum = 0.0; 00288 for(int j = 0; j < tcnt[i]; j++, k++) 00289 { 00290 sum += tele[k] * tele[k] * mdia[tcol[k]]; 00291 } 00292 pdia[i] = 1.0 / sum; 00293 //pdia[i] = sum; 00294 } 00295 diagonal_solver<int, double> pinv(pdia, acom); 00296 conjugate_gradient<int, double> pcg(prec, pinv, sca, 1.0e-6, 32, false); 00297 #endif 00298 00299 int n = 0; 00300 double s0, s1; 00301 { 00302 double s = 0.0; 00303 vector<double> t(_h); 00304 schur(_p, t); 00305 sub_scale(t, _h, 1.0); 00306 vector<double> tt(t); 00307 scom.accumulate(tt); 00308 scalar_product(tt, t, s); 00309 scom.collect(s); 00310 //cout << "Rank: " << rank << " CHECK IN: " << sqrt(s) << endl; 00311 s0 = s; 00312 } 00313 network::time(ta); 00314 vector_product<int, double> scal(scom); 00315 conjugate_gradient<int, double> scg(schur, pcg, scal, 1.0e-6, 256, true); 00316 scg(_h, _p); 00317 n = scg.iterations(); 00318 network::time(tb); 00319 cout << "Rank: " << rank << " ITERATIONS: " << n << " SOLVE Time: " << tb - ta << " (" << (tb - ta) / n << ") sec" << endl; 00320 cout << "Rank: " << rank << " MFLOPS: " << 2.0 * (aele.size() + 2 * bele.size()) * n / ((tb - ta) * 1000000.0) << endl; 00321 { 00322 double s = 0.0; 00323 vector<double> t(_h); 00324 schur(_p, t); 00325 sub_scale(t, _h, 1.0); 00326 vector<double> tt(t); 00327 scom.accumulate(tt); 00328 scalar_product(tt, t, s); 00329 scom.collect(s); 00330 //cout << "Rank: " << rank << " CHECK OUT: " << sqrt(s) << endl; 00331 s1 = s; 00332 } 00333 //cout << "Rank: " << rank << " CHECK REL: " << sqrt(s1/s0) << endl; 00334 00335 linp(_p, _u); 00336 _t.assign(_f.begin(), _f.end()); 00337 sub_scale(_t, _u, 1.0); 00338 cg(_t, _u); 00339 00340 network::time(gtc); 00341 //cout << "Rank: " << rank << " GLOBAL Time: " << gtc - gta << " (" << gtc - gtb << ") sec" << endl; 00342 //network::finalize(); 00343 }
void SolverGPU_AGM::solve | ( | const SparseMatrix< double > & | A, | |
VecDouble & | sol, | |||
const VecDouble & | vRHS, | |||
double | tol, | |||
unsigned | nIt | |||
) |
Make all the necessary conversions beetwen the data of the simulator and the data for the AGM solver
A | The sparse matrix to solve | |
vRHS | The right hand side vector |
Definition at line 42 of file solvergpu_agm.cpp.
00043 { 00044 00045 std::vector<double> vElem; 00046 std::vector<int> vCols; 00047 std::vector<int> vCnt; 00048 00049 convertSparseMatrix(A,vCnt,vCols,vElem); 00050 00051 00052 std::vector<double> b(A.m()); 00053 std::vector<double> x(A.m()); 00054 00055 for (unsigned i=0;i<b.size();i++) 00056 b[i]=vRHS(i); 00057 gpu_agm(vCnt,vCols,vElem,b,x,tol,nIt); 00058 00059 for (unsigned i=0;i<x.size();i++) 00060 sol(i) = x[i]; 00061 00062 00063 00064 }
void SolverGPU_AGM::solve | ( | const SparseMatrix< double > & | M, | |
VecDouble & | sol, | |||
const VecDouble & | rhs | |||
) | [virtual] |
Implements LinearSolver.
Definition at line 357 of file solvergpu_agm.cpp.
00358 { 00359 try { 00360 solve(M,sol,rhs,m_tol,m_nIt); 00361 } 00362 catch (Exception *e) 00363 { 00364 std::cout << "Printing Matrix\n"; 00365 std::cout << "IsSymetric: " << NumericMethods::IsSymetric(M,1e-10) << std::endl; 00366 //NumericMethods::printSparseMatrixOctave(cout,M); 00367 00368 throw e; 00369 } 00370 00371 }
void SolverGPU_AGM::solveAgain | ( | const SparseMatrix< double > & | M, | |
VecDouble & | sol, | |||
const VecDouble & | rhs | |||
) | [virtual] |
Implements LinearSolver.
Definition at line 348 of file solvergpu_agm.cpp.
00349 { 00350 solve(M,sol,rhs); 00351 }
unsigned SolverGPU_AGM::m_nIt [protected] |
Definition at line 26 of file solvergpu_agm.h.
double SolverGPU_AGM::m_tol [protected] |
Definition at line 25 of file solvergpu_agm.h.