00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <EpetraExt_ConfigDefs.h>
00030 #include <EpetraExt_MatrixMatrix.h>
00031
00032 #include <EpetraExt_Transpose_RowMatrix.h>
00033
00034 #include <Epetra_Export.h>
00035 #include <Epetra_Import.h>
00036 #include <Epetra_Util.h>
00037 #include <Epetra_Map.h>
00038 #include <Epetra_Comm.h>
00039 #include <Epetra_CrsMatrix.h>
00040 #include <Epetra_Directory.h>
00041 #include <Epetra_HashTable.h>
00042 #include <Epetra_Distributor.h>
00043
00044 #ifdef HAVE_VECTOR
00045 #include <vector>
00046 #endif
00047
00048 namespace EpetraExt {
00049
00050
00051
00052
00053
00054
00055 double sparsedot(double* u, int* u_ind, int u_len,
00056 double* v, int* v_ind, int v_len)
00057 {
00058 double result = 0.0;
00059
00060 int v_idx = 0;
00061 int u_idx = 0;
00062
00063 while(v_idx < v_len && u_idx < u_len) {
00064 int ui = u_ind[u_idx];
00065 int vi = v_ind[v_idx];
00066
00067 if (ui < vi) {
00068 ++u_idx;
00069 }
00070 else if (ui > vi) {
00071 ++v_idx;
00072 }
00073 else {
00074 result += u[u_idx++]*v[v_idx++];
00075 }
00076 }
00077
00078 return(result);
00079 }
00080
00081
00082
00083
00084 class CrsMatrixStruct {
00085 public:
00086 CrsMatrixStruct()
00087 : numRows(0), numEntriesPerRow(NULL), indices(NULL), values(NULL),
00088 remote(NULL), numRemote(0), rowMap(NULL), colMap(NULL),
00089 domainMap(NULL), importColMap(NULL), importMatrix(NULL)
00090 {}
00091
00092 virtual ~CrsMatrixStruct()
00093 {
00094 deleteContents();
00095 }
00096
00097 void deleteContents()
00098 {
00099 numRows = 0;
00100 delete [] numEntriesPerRow; numEntriesPerRow = NULL;
00101 delete [] indices; indices = NULL;
00102 delete [] values; values = NULL;
00103 delete [] remote; remote = NULL;
00104 numRemote = 0;
00105 delete importMatrix;
00106 }
00107
00108 int numRows;
00109 int* numEntriesPerRow;
00110 int** indices;
00111 double** values;
00112 bool* remote;
00113 int numRemote;
00114 const Epetra_Map* origRowMap;
00115 const Epetra_Map* rowMap;
00116 const Epetra_Map* colMap;
00117 const Epetra_Map* domainMap;
00118 const Epetra_Map* importColMap;
00119 Epetra_CrsMatrix* importMatrix;
00120 };
00121
00122 int dumpCrsMatrixStruct(const CrsMatrixStruct& M)
00123 {
00124 cout << "proc " << M.rowMap->Comm().MyPID()<<endl;
00125 cout << "numRows: " << M.numRows<<endl;
00126 for(int i=0; i<M.numRows; ++i) {
00127 for(int j=0; j<M.numEntriesPerRow[i]; ++j) {
00128 if (M.remote[i]) {
00129 cout << " *"<<M.rowMap->GID(i)<<" "
00130 <<M.importColMap->GID(M.indices[i][j])<<" "<<M.values[i][j]<<endl;
00131 }
00132 else {
00133 cout << " "<<M.rowMap->GID(i)<<" "
00134 <<M.colMap->GID(M.indices[i][j])<<" "<<M.values[i][j]<<endl;
00135 }
00136 }
00137 }
00138 return(0);
00139 }
00140
00141
00142 int mult_A_B(CrsMatrixStruct& Aview,
00143 CrsMatrixStruct& Bview,
00144 Epetra_CrsMatrix& C)
00145 {
00146 int C_firstCol = Bview.colMap->MinLID();
00147 int C_lastCol = Bview.colMap->MaxLID();
00148
00149 int C_firstCol_import = 0;
00150 int C_lastCol_import = -1;
00151
00152 int* bcols = Bview.colMap->MyGlobalElements();
00153 int* bcols_import = NULL;
00154 if (Bview.importColMap != NULL) {
00155 C_firstCol_import = Bview.importColMap->MinLID();
00156 C_lastCol_import = Bview.importColMap->MaxLID();
00157
00158 bcols_import = Bview.importColMap->MyGlobalElements();
00159 }
00160
00161 int C_numCols = C_lastCol - C_firstCol + 1;
00162 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00163
00164 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00165 double* dwork = new double[C_numCols];
00166 int* iwork = new int[C_numCols];
00167
00168 double* C_row_i = dwork;
00169 int* C_cols = iwork;
00170
00171 int C_row_i_length, i, j, k;
00172
00173
00174
00175
00176
00177
00178
00179
00180 bool C_filled = C.Filled();
00181
00182
00183 for(i=0; i<Aview.numRows; ++i) {
00184
00185
00186
00187
00188 if (Aview.remote[i]) {
00189 continue;
00190 }
00191
00192 int* Aindices_i = Aview.indices[i];
00193 double* Aval_i = Aview.values[i];
00194
00195 int global_row = Aview.rowMap->GID(i);
00196
00197
00198
00199
00200
00201
00202
00203 for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00204 int Ak = Bview.rowMap->LID(Aview.colMap->GID(Aindices_i[k]));
00205 double Aval = Aval_i[k];
00206
00207 int* Bcol_inds = Bview.indices[Ak];
00208 double* Bvals_k = Bview.values[Ak];
00209
00210 C_row_i_length = 0;
00211
00212 if (Bview.remote[Ak]) {
00213 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00214 C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00215 C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]];
00216 }
00217 }
00218 else {
00219 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00220 C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00221 C_cols[C_row_i_length++] = bcols[Bcol_inds[j]];
00222 }
00223 }
00224
00225
00226
00227
00228
00229 int err = C_filled ?
00230 C.SumIntoGlobalValues(global_row, C_row_i_length, C_row_i, C_cols)
00231 :
00232 C.InsertGlobalValues(global_row, C_row_i_length, C_row_i, C_cols);
00233
00234 if (err < 0) {
00235 return(err);
00236 }
00237 if (err > 0) {
00238 if (C_filled) {
00239
00240
00241 return(err);
00242 }
00243 }
00244 }
00245 }
00246
00247 delete [] dwork;
00248 delete [] iwork;
00249
00250 return(0);
00251 }
00252
00253
00254 int mult_A_Btrans(CrsMatrixStruct& Aview,
00255 CrsMatrixStruct& Bview,
00256 Epetra_CrsMatrix& C)
00257 {
00258 int i, j, k;
00259 int returnValue = 0;
00260
00261 int maxlen = 0;
00262 for(i=0; i<Aview.numRows; ++i) {
00263 if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00264 }
00265 for(i=0; i<Bview.numRows; ++i) {
00266 if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00267 }
00268
00269
00270
00271
00272
00273
00274
00275 int numBcols = Bview.colMap->NumMyElements();
00276 int numBrows = Bview.numRows;
00277
00278 int iworklen = maxlen*2 + numBcols;
00279 int* iwork = new int[iworklen];
00280
00281 int* bcols = iwork+maxlen*2;
00282 int* bgids = Bview.colMap->MyGlobalElements();
00283 double* bvals = new double[maxlen*2];
00284 double* avals = bvals+maxlen;
00285
00286 int max_all_b = Bview.colMap->MaxAllGID();
00287 int min_all_b = Bview.colMap->MinAllGID();
00288
00289
00290
00291 for(i=0; i<numBcols; ++i) {
00292 int blid = Bview.colMap->LID(bgids[i]);
00293 bcols[blid] = bgids[i];
00294 }
00295
00296
00297
00298
00299
00300 int* b_firstcol = new int[2*numBrows];
00301 int* b_lastcol = b_firstcol+numBrows;
00302 int temp;
00303 for(i=0; i<numBrows; ++i) {
00304 b_firstcol[i] = max_all_b;
00305 b_lastcol[i] = min_all_b;
00306
00307 int Blen_i = Bview.numEntriesPerRow[i];
00308 if (Blen_i < 1) continue;
00309 int* Bindices_i = Bview.indices[i];
00310
00311 if (Bview.remote[i]) {
00312 for(k=0; k<Blen_i; ++k) {
00313 temp = Bview.importColMap->GID(Bindices_i[k]);
00314 if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00315 if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00316 }
00317 }
00318 else {
00319 for(k=0; k<Blen_i; ++k) {
00320 temp = bcols[Bindices_i[k]];
00321 if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00322 if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00323 }
00324 }
00325 }
00326
00327 Epetra_Util util;
00328
00329 int* Aind = iwork;
00330 int* Bind = iwork+maxlen;
00331
00332 bool C_filled = C.Filled();
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343 for(i=0; i<Aview.numRows; ++i) {
00344 if (Aview.remote[i]) {
00345 continue;
00346 }
00347
00348 int* Aindices_i = Aview.indices[i];
00349 double* Aval_i = Aview.values[i];
00350 int A_len_i = Aview.numEntriesPerRow[i];
00351 if (A_len_i < 1) {
00352 continue;
00353 }
00354
00355 for(k=0; k<A_len_i; ++k) {
00356 Aind[k] = Aview.colMap->GID(Aindices_i[k]);
00357 avals[k] = Aval_i[k];
00358 }
00359
00360 util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL);
00361
00362 int mina = Aind[0];
00363 int maxa = Aind[A_len_i-1];
00364
00365 if (mina > max_all_b || maxa < min_all_b) {
00366 continue;
00367 }
00368
00369 int global_row = Aview.rowMap->GID(i);
00370
00371
00372 for(j=0; j<Bview.numRows; ++j) {
00373 if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00374 continue;
00375 }
00376
00377 int* Bindices_j = Bview.indices[j];
00378 int B_len_j = Bview.numEntriesPerRow[j];
00379 if (B_len_j < 1) {
00380 continue;
00381 }
00382
00383 int tmp, Blen = 0;
00384
00385 if (Bview.remote[j]) {
00386 for(k=0; k<B_len_j; ++k) {
00387 tmp = Bview.importColMap->GID(Bindices_j[k]);
00388 if (tmp < mina || tmp > maxa) {
00389 continue;
00390 }
00391
00392 bvals[Blen] = Bview.values[j][k];
00393 Bind[Blen++] = tmp;
00394 }
00395 }
00396 else {
00397 for(k=0; k<B_len_j; ++k) {
00398 tmp = bcols[Bindices_j[k]];
00399 if (tmp < mina || tmp > maxa) {
00400 continue;
00401 }
00402
00403 bvals[Blen] = Bview.values[j][k];
00404 Bind[Blen++] = tmp;
00405 }
00406 }
00407
00408 if (Blen < 1) {
00409 continue;
00410 }
00411
00412 util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL);
00413
00414 double C_ij = sparsedot(avals, Aind, A_len_i,
00415 bvals, Bind, Blen);
00416
00417 if (C_ij == 0.0) {
00418 continue;
00419 }
00420 int global_col = Bview.rowMap->GID(j);
00421
00422 int err = C_filled ?
00423 C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00424 :
00425 C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00426
00427 if (err < 0) {
00428 return(err);
00429 }
00430 if (err > 0) {
00431 if (C_filled) {
00432
00433
00434
00435 std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00436 << "to insert value in result matrix at position "<<global_row
00437 <<"," <<global_col<<", possibly because result matrix has a "
00438 << "column-map that doesn't include column "<<global_col
00439 <<" on this proc." <<std::endl;
00440 return(err);
00441 }
00442 }
00443 }
00444 }
00445
00446 delete [] iwork;
00447 delete [] bvals;
00448 delete [] b_firstcol;
00449
00450 return(returnValue);
00451 }
00452
00453
00454 int mult_Atrans_B(CrsMatrixStruct& Aview,
00455 CrsMatrixStruct& Bview,
00456 Epetra_CrsMatrix& C)
00457 {
00458 int C_firstCol = Bview.colMap->MinLID();
00459 int C_lastCol = Bview.colMap->MaxLID();
00460
00461 int C_firstCol_import = 0;
00462 int C_lastCol_import = -1;
00463
00464 if (Bview.importColMap != NULL) {
00465 C_firstCol_import = Bview.importColMap->MinLID();
00466 C_lastCol_import = Bview.importColMap->MaxLID();
00467 }
00468
00469 int C_numCols = C_lastCol - C_firstCol + 1;
00470 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00471
00472 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00473
00474 double* C_row_i = new double[C_numCols];
00475 int* C_colInds = new int[C_numCols];
00476
00477 int i, j, k;
00478
00479 for(j=0; j<C_numCols; ++j) {
00480 C_row_i[j] = 0.0;
00481 C_colInds[j] = 0;
00482 }
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495 int localProc = Bview.colMap->Comm().MyPID();
00496
00497 int* Arows = Aview.rowMap->MyGlobalElements();
00498
00499 bool C_filled = C.Filled();
00500
00501
00502 for(i=0; i<Aview.numRows; ++i) {
00503
00504 int* Aindices_i = Aview.indices[i];
00505 double* Aval_i = Aview.values[i];
00506
00507
00508
00509 int Bi = Bview.rowMap->LID(Arows[i]);
00510 if (Bi<0) {
00511 cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00512 <<Arows[i]<<" of matrix B, but doesn't have it."<<endl;
00513 return(-1);
00514 }
00515
00516 int* Bcol_inds = Bview.indices[Bi];
00517 double* Bvals_i = Bview.values[Bi];
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527 int Blen = Bview.numEntriesPerRow[Bi];
00528 if (Bview.remote[Bi]) {
00529 for(j=0; j<Blen; ++j) {
00530 C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]);
00531 }
00532 }
00533 else {
00534 for(j=0; j<Blen; ++j) {
00535 C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]);
00536 }
00537 }
00538
00539
00540 for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00541
00542 int Aj = Aindices_i[j];
00543 double Aval = Aval_i[j];
00544
00545 int global_row;
00546 if (Aview.remote[i]) {
00547 global_row = Aview.importColMap->GID(Aj);
00548 }
00549 else {
00550 global_row = Aview.colMap->GID(Aj);
00551 }
00552
00553 if (!C.RowMap().MyGID(global_row)) {
00554 continue;
00555 }
00556
00557 for(k=0; k<Blen; ++k) {
00558 C_row_i[k] = Aval*Bvals_i[k];
00559 }
00560
00561
00562
00563
00564
00565 int err = C_filled ?
00566 C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00567 :
00568 C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00569
00570 if (err < 0) {
00571 return(err);
00572 }
00573 if (err > 0) {
00574 if (C_filled) {
00575
00576 return(err);
00577 }
00578 }
00579 }
00580 }
00581
00582 delete [] C_row_i;
00583 delete [] C_colInds;
00584
00585 return(0);
00586 }
00587
00588
00589 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00590 CrsMatrixStruct& Bview,
00591 Epetra_CrsMatrix& C)
00592 {
00593 int C_firstCol = Aview.rowMap->MinLID();
00594 int C_lastCol = Aview.rowMap->MaxLID();
00595
00596 int C_firstCol_import = 0;
00597 int C_lastCol_import = -1;
00598
00599 if (Aview.importColMap != NULL) {
00600 C_firstCol_import = Aview.importColMap->MinLID();
00601 C_lastCol_import = Aview.importColMap->MaxLID();
00602 }
00603
00604 int C_numCols = C_lastCol - C_firstCol + 1;
00605 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00606
00607 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00608
00609 double* dwork = new double[C_numCols];
00610 int* iwork = new int[C_numCols];
00611
00612 double* C_col_j = dwork;
00613 int* C_inds = iwork;
00614
00615
00616
00617
00618
00619
00620
00621
00622 int i, j, k;
00623
00624 for(j=0; j<C_numCols; ++j) {
00625 C_col_j[j] = 0.0;
00626 C_inds[j] = -1;
00627 }
00628
00629 int* A_col_inds = Aview.colMap->MyGlobalElements();
00630 int* A_col_inds_import = Aview.importColMap ?
00631 Aview.importColMap->MyGlobalElements() : 0;
00632
00633 const Epetra_Map* Crowmap = &(C.RowMap());
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643 int* Brows = Bview.rowMap->MyGlobalElements();
00644
00645
00646 for(j=0; j<Bview.numRows; ++j) {
00647 int* Bindices_j = Bview.indices[j];
00648 double* Bvals_j = Bview.values[j];
00649
00650 int global_col = Brows[j];
00651
00652
00653
00654
00655
00656
00657
00658 for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00659
00660 int bk = Bindices_j[k];
00661 double Bval = Bvals_j[k];
00662
00663 int global_k;
00664 if (Bview.remote[j]) {
00665 global_k = Bview.importColMap->GID(bk);
00666 }
00667 else {
00668 global_k = Bview.colMap->GID(bk);
00669 }
00670
00671
00672 int ak = Aview.rowMap->LID(global_k);
00673 if (ak<0) {
00674 continue;
00675 }
00676
00677 int* Aindices_k = Aview.indices[ak];
00678 double* Avals_k = Aview.values[ak];
00679
00680 int C_len = 0;
00681
00682 if (Aview.remote[ak]) {
00683 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00684 C_col_j[C_len] = Avals_k[i]*Bval;
00685 C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00686 }
00687 }
00688 else {
00689 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00690 C_col_j[C_len] = Avals_k[i]*Bval;
00691 C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00692 }
00693 }
00694
00695
00696
00697 for(i=0; i < C_len ; ++i) {
00698 if (C_col_j[i] == 0.0) continue;
00699
00700 int global_row = C_inds[i];
00701 if (!Crowmap->MyGID(global_row)) {
00702 continue;
00703 }
00704
00705 int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00706
00707 if (err < 0) {
00708 return(err);
00709 }
00710 else {
00711 if (err > 0) {
00712 err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00713 if (err < 0) {
00714 return(err);
00715 }
00716 }
00717 }
00718 }
00719
00720 }
00721 }
00722
00723 delete [] dwork;
00724 delete [] iwork;
00725
00726 return(0);
00727 }
00728
00729 int import_and_extract_views(const Epetra_CrsMatrix& M,
00730 const Epetra_Map& targetMap,
00731 CrsMatrixStruct& Mview)
00732 {
00733
00734
00735
00736
00737
00738
00739
00740 Mview.deleteContents();
00741
00742 const Epetra_Map& Mrowmap = M.RowMap();
00743
00744 int numProcs = Mrowmap.Comm().NumProc();
00745
00746 Mview.numRows = targetMap.NumMyElements();
00747
00748 int* Mrows = targetMap.MyGlobalElements();
00749
00750 if (Mview.numRows > 0) {
00751 Mview.numEntriesPerRow = new int[Mview.numRows];
00752 Mview.indices = new int*[Mview.numRows];
00753 Mview.values = new double*[Mview.numRows];
00754 Mview.remote = new bool[Mview.numRows];
00755 }
00756
00757 Mview.numRemote = 0;
00758
00759 int i;
00760 for(i=0; i<Mview.numRows; ++i) {
00761 int mlid = Mrowmap.LID(Mrows[i]);
00762 if (mlid < 0) {
00763 Mview.remote[i] = true;
00764 ++Mview.numRemote;
00765 }
00766 else {
00767 EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i],
00768 Mview.values[i], Mview.indices[i]) );
00769 Mview.remote[i] = false;
00770 }
00771 }
00772
00773 Mview.origRowMap = &(M.RowMap());
00774 Mview.rowMap = &targetMap;
00775 Mview.colMap = &(M.ColMap());
00776 Mview.domainMap = &(M.DomainMap());
00777 Mview.importColMap = NULL;
00778
00779 if (numProcs < 2) {
00780 if (Mview.numRemote > 0) {
00781 cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00782 << "attempting to import remote matrix rows."<<endl;
00783 return(-1);
00784 }
00785
00786
00787 return(0);
00788 }
00789
00790
00791
00792
00793
00794
00795 int globalMaxNumRemote = 0;
00796 Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00797
00798 if (globalMaxNumRemote > 0) {
00799
00800
00801 int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL;
00802 int offset = 0;
00803 for(i=0; i<Mview.numRows; ++i) {
00804 if (Mview.remote[i]) {
00805 MremoteRows[offset++] = Mrows[i];
00806 }
00807 }
00808
00809 Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows,
00810 Mrowmap.IndexBase(), Mrowmap.Comm());
00811
00812
00813
00814 Epetra_Import importer(MremoteRowMap, Mrowmap);
00815
00816
00817
00818 Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1);
00819
00820 EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) );
00821
00822 EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) );
00823
00824
00825
00826 for(i=0; i<Mview.numRows; ++i) {
00827 if (Mview.remote[i]) {
00828 int importLID = MremoteRowMap.LID(Mrows[i]);
00829 EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID,
00830 Mview.numEntriesPerRow[i],
00831 Mview.values[i],
00832 Mview.indices[i]) );
00833 }
00834 }
00835
00836 Mview.importColMap = &(Mview.importMatrix->ColMap());
00837
00838 delete [] MremoteRows;
00839 }
00840
00841 return(0);
00842 }
00843
00844 int distribute_list(const Epetra_Comm& Comm,
00845 int lenSendList,
00846 const int* sendList,
00847 int& maxSendLen,
00848 int*& recvList)
00849 {
00850 maxSendLen = 0;
00851 Comm.MaxAll(&lenSendList, &maxSendLen, 1);
00852 int numProcs = Comm.NumProc();
00853 recvList = new int[numProcs*maxSendLen];
00854 int* send = new int[maxSendLen];
00855 for(int i=0; i<lenSendList; ++i) {
00856 send[i] = sendList[i];
00857 }
00858
00859 Comm.GatherAll(send, recvList, maxSendLen);
00860 delete [] send;
00861
00862 return(0);
00863 }
00864
00865 Epetra_Map* create_map_from_imported_rows(const Epetra_Map* map,
00866 int totalNumSend,
00867 int* sendRows,
00868 int numProcs,
00869 int* numSendPerProc)
00870 {
00871
00872
00873
00874
00875
00876
00877 Epetra_Distributor* distributor = map->Comm().CreateDistributor();
00878
00879 int* sendPIDs = totalNumSend>0 ? new int[totalNumSend] : NULL;
00880 int offset = 0;
00881 for(int i=0; i<numProcs; ++i) {
00882 for(int j=0; j<numSendPerProc[i]; ++j) {
00883 sendPIDs[offset++] = i;
00884 }
00885 }
00886
00887 int numRecv = 0;
00888 int err = distributor->CreateFromSends(totalNumSend, sendPIDs,
00889 true, numRecv);
00890 assert( err == 0 );
00891
00892 char* c_recv_objs = numRecv>0 ? new char[numRecv*sizeof(int)] : NULL;
00893 int num_c_recv = numRecv*(int)sizeof(int);
00894
00895 err = distributor->Do(reinterpret_cast<char*>(sendRows),
00896 (int)sizeof(int), num_c_recv, c_recv_objs);
00897 assert( err == 0 );
00898
00899 int* recvRows = reinterpret_cast<int*>(c_recv_objs);
00900
00901
00902 Epetra_Map* import_rows = new Epetra_Map(-1, numRecv, recvRows,
00903 map->IndexBase(), map->Comm());
00904
00905 delete [] c_recv_objs;
00906 delete [] sendPIDs;
00907
00908 delete distributor;
00909
00910 return( import_rows );
00911 }
00912
00913 int form_map_union(const Epetra_Map* map1,
00914 const Epetra_Map* map2,
00915 const Epetra_Map*& mapunion)
00916 {
00917
00918
00919 if (map1 == NULL) {
00920 mapunion = new Epetra_Map(*map2);
00921 return(0);
00922 }
00923
00924 if (map2 == NULL) {
00925 mapunion = new Epetra_Map(*map1);
00926 return(0);
00927 }
00928
00929 int map1_len = map1->NumMyElements();
00930 int* map1_elements = map1->MyGlobalElements();
00931 int map2_len = map2->NumMyElements();
00932 int* map2_elements = map2->MyGlobalElements();
00933
00934 int* union_elements = new int[map1_len+map2_len];
00935
00936 int map1_offset = 0, map2_offset = 0, union_offset = 0;
00937
00938 while(map1_offset < map1_len && map2_offset < map2_len) {
00939 int map1_elem = map1_elements[map1_offset];
00940 int map2_elem = map2_elements[map2_offset];
00941
00942 if (map1_elem < map2_elem) {
00943 union_elements[union_offset++] = map1_elem;
00944 ++map1_offset;
00945 }
00946 else if (map1_elem > map2_elem) {
00947 union_elements[union_offset++] = map2_elem;
00948 ++map2_offset;
00949 }
00950 else {
00951 union_elements[union_offset++] = map1_elem;
00952 ++map1_offset;
00953 ++map2_offset;
00954 }
00955 }
00956
00957 int i;
00958 for(i=map1_offset; i<map1_len; ++i) {
00959 union_elements[union_offset++] = map1_elements[i];
00960 }
00961
00962 for(i=map2_offset; i<map2_len; ++i) {
00963 union_elements[union_offset++] = map2_elements[i];
00964 }
00965
00966 mapunion = new Epetra_Map(-1, union_offset, union_elements,
00967 map1->IndexBase(), map1->Comm());
00968
00969 delete [] union_elements;
00970
00971 return(0);
00972 }
00973
00974 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
00975 const Epetra_Map* colmap)
00976 {
00977
00978
00979
00980
00981 int numProcs = colmap->Comm().NumProc();
00982 int localProc = colmap->Comm().MyPID();
00983
00984 if (numProcs < 2) {
00985 Epetra_Map* result_map = NULL;
00986
00987 int err = form_map_union(&(M.RowMap()), NULL,
00988 (const Epetra_Map*&)result_map);
00989 if (err != 0) {
00990 return(NULL);
00991 }
00992 return(result_map);
00993 }
00994
00995 int MnumRows = M.NumMyRows();
00996 int numCols = colmap->NumMyElements();
00997
00998 int* iwork = new int[numCols+2*numProcs+numProcs*MnumRows];
00999 int iworkOffset = 0;
01000
01001 int* cols = &(iwork[iworkOffset]); iworkOffset += numCols;
01002
01003 cols[0] = numCols;
01004 colmap->MyGlobalElements( &(cols[1]) );
01005
01006
01007
01008 Epetra_Util util;
01009 util.Sort(true, numCols, &(cols[1]), 0, NULL, 0, NULL);
01010
01011 int* all_proc_cols = NULL;
01012
01013 int max_num_cols;
01014 distribute_list(colmap->Comm(), numCols+1, cols, max_num_cols, all_proc_cols);
01015
01016 const Epetra_CrsGraph& Mgraph = M.Graph();
01017 const Epetra_Map& Mrowmap = M.RowMap();
01018 const Epetra_Map& Mcolmap = M.ColMap();
01019 int MminMyLID = Mrowmap.MinLID();
01020
01021 int* procNumCols = &(iwork[iworkOffset]); iworkOffset += numProcs;
01022 int* procNumRows = &(iwork[iworkOffset]); iworkOffset += numProcs;
01023 int* procRows_1D = &(iwork[iworkOffset]);
01024 int** procCols = new int*[numProcs];
01025 int** procRows = new int*[numProcs];
01026 int i, err;
01027 int offset = 0;
01028 for(i=0; i<numProcs; ++i) {
01029 procNumCols[i] = all_proc_cols[offset];
01030 procCols[i] = &(all_proc_cols[offset+1]);
01031 offset += max_num_cols;
01032
01033 procNumRows[i] = 0;
01034 procRows[i] = &(procRows_1D[i*MnumRows]);
01035 }
01036
01037 int* Mindices;
01038
01039 for(int row=0; row<MnumRows; ++row) {
01040 int localRow = MminMyLID+row;
01041 int globalRow = Mrowmap.GID(localRow);
01042 int MnumCols;
01043 err = Mgraph.ExtractMyRowView(localRow, MnumCols, Mindices);
01044 if (err != 0) {
01045 cerr << "proc "<<localProc<<", error in Mgraph.ExtractMyRowView, row "
01046 <<localRow<<endl;
01047 return(NULL);
01048 }
01049
01050 for(int j=0; j<MnumCols; ++j) {
01051 int colGID = Mcolmap.GID(Mindices[j]);
01052
01053 for(int p=0; p<numProcs; ++p) {
01054 if (p==localProc) continue;
01055
01056 int insertPoint;
01057 int foundOffset = Epetra_Util_binary_search(colGID, procCols[p],
01058 procNumCols[p], insertPoint);
01059 if (foundOffset > -1) {
01060 int numRowsP = procNumRows[p];
01061 int* prows = procRows[p];
01062 if (numRowsP < 1 || prows[numRowsP-1] < globalRow) {
01063 prows[numRowsP] = globalRow;
01064 procNumRows[p]++;
01065 }
01066 }
01067 }
01068 }
01069 }
01070
01071
01072
01073 offset = procNumRows[0];
01074 for(i=1; i<numProcs; ++i) {
01075 for(int j=0; j<procNumRows[i]; ++j) {
01076 procRows_1D[offset++] = procRows[i][j];
01077 }
01078 }
01079
01080 int totalNumSend = offset;
01081
01082
01083
01084 Epetra_Map* recvd_rows =
01085 create_map_from_imported_rows(&Mrowmap, totalNumSend,
01086 procRows_1D, numProcs, procNumRows);
01087
01088 Epetra_Map* result_map = NULL;
01089
01090 err = form_map_union(&(M.RowMap()), recvd_rows, (const Epetra_Map*&)result_map);
01091 if (err != 0) {
01092 return(NULL);
01093 }
01094
01095 delete [] iwork;
01096 delete [] procCols;
01097 delete [] procRows;
01098 delete [] all_proc_cols;
01099 delete recvd_rows;
01100
01101 return(result_map);
01102 }
01103
01104 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01105 bool transposeA,
01106 const Epetra_CrsMatrix& B,
01107 bool transposeB,
01108 Epetra_CrsMatrix& C,
01109 bool call_FillComplete_on_result)
01110 {
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121 if (!A.Filled() || !B.Filled()) {
01122 EPETRA_CHK_ERR(-1);
01123 }
01124
01125
01126
01127
01128 int scenario = 1;
01129 if (transposeB && !transposeA) scenario = 2;
01130 if (transposeA && !transposeB) scenario = 3;
01131 if (transposeA && transposeB) scenario = 4;
01132
01133
01134 int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows();
01135 int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols();
01136 int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols();
01137 int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows();
01138 if (Ainner != Binner) {
01139 cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01140 << "must match for matrix-matrix product. op(A) is "
01141 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl;
01142 return(-1);
01143 }
01144
01145
01146
01147
01148
01149 if (Aouter > C.NumGlobalRows()) {
01150 cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01151 << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows()
01152 << " rows, should have at least "<<Aouter << endl;
01153 return(-1);
01154 }
01155
01156
01157
01158
01159
01160
01161
01162
01163 int numProcs = A.Comm().NumProc();
01164
01165
01166
01167
01168 const Epetra_Map* domainMap_A = &(A.DomainMap());
01169 const Epetra_Map* domainMap_B = &(B.DomainMap());
01170
01171 const Epetra_Map* rowmap_A = &(A.RowMap());
01172 const Epetra_Map* rowmap_B = &(B.RowMap());
01173
01174
01175
01176 const Epetra_Map* workmap1 = NULL;
01177 const Epetra_Map* workmap2 = NULL;
01178 const Epetra_Map* mapunion1 = NULL;
01179
01180
01181
01182 CrsMatrixStruct Aview;
01183 CrsMatrixStruct Bview;
01184
01185 const Epetra_Map* targetMap_A = rowmap_A;
01186 const Epetra_Map* targetMap_B = rowmap_B;
01187
01188 if (numProcs > 1) {
01189
01190
01191
01192 if (transposeA) {
01193 workmap1 = find_rows_containing_cols(A, domainMap_A);
01194 targetMap_A = workmap1;
01195 }
01196 }
01197
01198
01199 EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) );
01200
01201
01202
01203 if (numProcs > 1) {
01204 const Epetra_Map* colmap_op_A = NULL;
01205 if (transposeA) {
01206 colmap_op_A = targetMap_A;
01207 }
01208 else {
01209 colmap_op_A = &(A.ColMap());
01210 }
01211
01212 targetMap_B = colmap_op_A;
01213
01214
01215
01216
01217
01218 if (transposeB) {
01219 EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) );
01220 workmap2 = find_rows_containing_cols(B, mapunion1);
01221 targetMap_B = workmap2;
01222 }
01223 }
01224
01225
01226 EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) );
01227
01228
01229 EPETRA_CHK_ERR( C.PutScalar(0.0) );
01230
01231
01232
01233
01234 switch(scenario) {
01235 case 1: EPETRA_CHK_ERR( mult_A_B(Aview, Bview, C) );
01236 break;
01237 case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, C) );
01238 break;
01239 case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, C) );
01240 break;
01241 case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, C) );
01242 break;
01243 }
01244
01245 if (call_FillComplete_on_result) {
01246
01247
01248
01249
01250
01251
01252
01253 const Epetra_Map* domainmap =
01254 transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01255
01256 const Epetra_Map* rangemap =
01257 transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01258
01259 if (!C.Filled()) {
01260 EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01261 }
01262 }
01263
01264
01265
01266
01267 delete mapunion1; mapunion1 = NULL;
01268 delete workmap1; workmap1 = NULL;
01269 delete workmap2; workmap2 = NULL;
01270
01271 return(0);
01272 }
01273
01274 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01275 bool transposeA,
01276 double scalarA,
01277 Epetra_CrsMatrix& B,
01278 double scalarB )
01279 {
01280
01281
01282
01283
01284
01285
01286
01287
01288 if (!A.Filled() ) {
01289 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01290 EPETRA_CHK_ERR(-1);
01291 }
01292
01293
01294 Epetra_CrsMatrix * Aprime = 0;
01295 EpetraExt::RowMatrix_Transpose * Atrans = 0;
01296 if( transposeA )
01297 {
01298 Atrans = new EpetraExt::RowMatrix_Transpose();
01299 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01300 }
01301 else
01302 Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01303
01304 int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01305 int A_NumEntries, B_NumEntries;
01306 int * A_Indices = new int[MaxNumEntries];
01307 double * A_Values = new double[MaxNumEntries];
01308 int* B_Indices;
01309 double* B_Values;
01310
01311 int NumMyRows = B.NumMyRows();
01312 int Row, err;
01313
01314 if( scalarA )
01315 {
01316
01317 for( int i = 0; i < NumMyRows; ++i )
01318 {
01319 Row = B.GRID(i);
01320 EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01321
01322 if (scalarB != 1.0) {
01323 if (!B.Filled()) {
01324 EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01325 B_Values, B_Indices));
01326 }
01327 else {
01328 EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01329 B_Values, B_Indices));
01330 }
01331
01332 for(int jj=0; jj<B_NumEntries; ++jj) {
01333 B_Values[jj] = scalarB*B_Values[jj];
01334 }
01335 }
01336
01337 if( scalarA != 1.0 ) {
01338 for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01339 }
01340
01341 if( B.Filled() ) {
01342 err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01343 assert( err == 0 );
01344 }
01345 else {
01346 err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01347 assert( err == 0 || err == 1 || err == 3 );
01348 }
01349 }
01350 }
01351 else {
01352 EPETRA_CHK_ERR( B.Scale(scalarB) );
01353 }
01354
01355 delete [] A_Indices;
01356 delete [] A_Values;
01357
01358 if( Atrans ) delete Atrans;
01359
01360 return(0);
01361 }
01362
01363 }
01364