Amesos Package Browser (Single Doxygen Collection) Development
Superludist2_OO.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                Amesos: Direct Sparse Solver Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #define KLOPTER
00030 #include "Superludist2_OO.h"
00031 #ifdef EPETRA_MPI
00032 #include "Epetra_MpiComm.h"
00033 #else
00034     This code cannot be compiled without mpi.h.
00035 #include "Epetra_Comm.h"
00036 #endif
00037 #include "Epetra_Map.h"
00038 #include "Epetra_LocalMap.h"
00039 #include "Epetra_Vector.h"
00040 #include "Epetra_RowMatrix.h"
00041 #include "Epetra_Operator.h"
00042 #include "Epetra_Import.h"
00043 #include "Epetra_Export.h"
00044 #include "Epetra_CrsMatrix.h"
00045 #include "CrsMatrixTranspose.h"
00046 #include <vector>
00047 
00048 #ifdef DEBUG
00049 #include "Comm_assert_equal.h"
00050     // #include "CrsMatricesAreIdentical.h"
00051 #endif
00052 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00053 #include "ExtractCrsFromRowMatrix.h"
00054 #endif
00055 /*
00056   Returns the largest number of rows that allows NumProcs to be used within a 
00057   rectangular grid with no more rows than columns.
00058 
00059   i.e. max i such that there exists j > i such that i*j = NumProcs
00060 */
00061 int SLU_NumProcRows( int NumProcs ) {
00062 #ifdef TFLOP
00063   //  Else, parameter 6 of DTRSV CTNLU is incorrect 
00064   return 1;
00065 #else
00066   int i;
00067   int NumProcRows ;
00068   for ( i = 1; i*i <= NumProcs; i++ ) 
00069     ;
00070   bool done = false ;
00071   for ( NumProcRows = i-1 ; done == false ; ) {
00072     int NumCols = NumProcs / NumProcRows ; 
00073     if ( NumProcRows * NumCols == NumProcs ) 
00074       done = true; 
00075     else 
00076       NumProcRows-- ; 
00077   }
00078   return NumProcRows;
00079 #endif
00080 }
00081 
00082 //=============================================================================
00083 Superludist2_OO::Superludist2_OO(const Epetra_LinearProblem &prob ) {
00084   //  AllocAzArrays();
00085 
00086   Problem_ = &prob ; 
00087   Transpose_ = false; 
00088   A_and_LU_built = false ; 
00089   Factored_ = false ; 
00090   FirstCallToSolve_ = true ; 
00091   //
00092   //  The following are initialized just on general principle
00093   //
00094   numprocs = -13 ; 
00095   nprow = -13 ; 
00096   npcol = -13 ; 
00097   numrows = -13 ; 
00098 
00099 }
00100 
00101 //=============================================================================
00102 Superludist2_OO::~Superludist2_OO(void) {
00103   //  DeleteMemory();
00104   //  DeleteAzArrays();
00105 
00106   if ( A_and_LU_built ) { 
00107     //    Destroy_CompRowLoc_Matrix_dist(&A);
00108     SUPERLU_FREE( A.Store );
00109     ScalePermstructFree(&ScalePermstruct);
00110     Destroy_LU(numrows, &grid, &LUstruct);
00111     LUstructFree(&LUstruct);
00112     if ( options.SolveInitialized ) {
00113       dSolveFinalize(&options, &SOLVEstruct ) ; 
00114     }
00115     superlu_gridexit(&grid);
00116   }
00117 
00118 }
00119 
00120 //=============================================================================
00121 
00122 //
00123 //  Solve() uses several intermediate matrices to convert the input matrix
00124 //  to one that we can pass to the Sparse Direct Solver
00125 //
00126 //  Epetra_RowMatrix *RowMatrixA - The input matrix
00127 //  Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
00128 //  Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix 
00129 //                                 (Unused if RowMatrix is an Epetra_CrsMatrix)
00130 //  Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
00131 //  Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
00132 //  Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
00133 //    (i.e. stored exclusively on one process)
00134 //  Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
00135 //  Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the 
00136 //    proper transposition
00137 //  Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the 
00138 //    proper transposition 
00139 //
00140 //  This is what the code does:
00141 //  Step 1)  Convert the matrix to an Epetra_CrsMatrix
00142 //  Step 2)  Coalesce the matrix onto process 0
00143 //  Step 3)  Transpose the matrix 
00144 //  Step 4)  Replicate the matrix
00145 //  Step 5)  Convert vector b to a replicated vector
00146 //  Step 6)  Convert the matrix to Ap, Ai, Aval
00147 //  Step 7)  Call SuperLUdist
00148 //  Step 8)  Convert vector x back to a distributed vector
00149 //
00150 //  Remaining tasks:
00151 //  1)  I still need to make it possible for Superludist2_OO to accept a 
00152 //  replicated matrix without using any additional memory.
00153 //  I believe that all that I need is to move the definition
00154 //  of ExtractCrsMatrixA,  SerialCrsMatrixA and Phase3MatTrans up 
00155 //  to the top of the code and add a conditional which tests to 
00156 //  see if RowMatrixA is actually in the exact format that we need,
00157 //  and if so, skip all the matrix transformations.  Also, Phase5Mat 
00158 //  needs to be changed to Phase4Replicated with an added pointer, named
00159 //  *Phase5Mat.
00160 //  2)  Can we handle a transposed matrix any cleaner?  We build Ap, 
00161 //  Ai and Avalues - can we build that as a transpose more efficiently
00162 //  than doing a full CrsMatrix to CrsMatrix transpose?
00163 //  
00164 //  Memory usage:
00165 //    ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
00166 //    SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
00167 //    Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
00168 //    Phase5Mat - 1 
00169 //  If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
00170 //  serial matrix, hence three extra serial copies is the maximum.
00171 //
00172 
00173 #undef EPETRA_CHK_ERR
00174 #define EPETRA_CHK_ERR(xxx) assert( (xxx) == 0 ) 
00175 
00176 int Superludist2_OO::Solve(bool factor) { 
00177   //
00178   //  I am going to put these here until I determine that I need them in 
00179   //  Superludist2_OO.h 
00180   //
00181 
00182 
00183   bool CheckExtraction = false;    //  Set to true to force extraction for unit test
00184 
00185   assert( GetTrans() == false ) ; 
00186 
00187   Epetra_RowMatrix *RowMatrixA = 
00188     dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
00189   
00190   EPETRA_CHK_ERR( RowMatrixA == 0 ) ; 
00191 
00192   Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ; 
00193 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00194   Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
00195 #endif
00196   Epetra_CrsMatrix *Phase2Mat = 0 ;
00197   const Epetra_Comm &Comm = RowMatrixA->Comm();
00198   
00199   int iam = Comm.MyPID() ;
00200 #if 0
00201   //
00202   //  The following lines allow us time to attach the debugger
00203   //
00204   int hatever;
00205   if ( iam == 0 )  cin >> hatever ; 
00206   Comm.Barrier();
00207 #endif
00208 
00209 
00210   //
00211   //  Step 1)  Convert the matrix to an Epetra_CrsMatrix
00212   //
00213   //  If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails, 
00214   //  extract a CrsMatrix from the RowMatrix.
00215   //
00216   if ( CastCrsMatrixA != 0 && ! CheckExtraction ) { 
00217     Phase2Mat = CastCrsMatrixA ; 
00218   } else {
00219 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00220     ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ; 
00221 
00222     Phase2Mat = ExtractCrsMatrixA ; 
00223 #ifdef DEBUG
00224     if ( CheckExtraction ) 
00225       assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ; 
00226 #endif
00227 #else
00228     assert( false ) ;
00229 #endif
00230   }
00231   assert( Phase2Mat != NULL ) ; 
00232   const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ; 
00233 
00234   //
00235   //  Old Glossary:
00236   //    numrows, numcols = m,n, i.e. the size of the full, global, matrix
00237   //    m_loc            = the number of rows owned by this process
00238   //    nnz_loc          = the number of non zeros in the rows owned by this process
00239   //    Ap, Aval, Ai,    = rowptr, nzval, colind, = sparse row storage 
00240   //    MyRowPtr         = a redundant computation of Ap  (rowptr) 
00241 
00242   //
00243   //  Compute nnz_loc, m_loc, and MyRowPtr from the map
00244   //  
00245 
00246 #if 0
00247   cout << " A  Comm.NumProc() = " <<  Comm.NumProc() << endl ; 
00248 
00249   cout << " true = " << true << " LInMap = " <<  Phase2Matmap.LinearMap() << endl  ; // Map must be contiguously divided
00250   cout << " Superludist2_OO.cpp::   traceback mode = " << Epetra_Object::GetTracebackMode() << endl ; 
00251   cerr << " Send this to cerr cerr cerr   traceback mode = " << Epetra_Object::GetTracebackMode() << endl ; 
00252 #endif
00253 
00254   numrows = Phase2Matmap.NumGlobalElements() ; 
00255   assert( numrows == Phase2Mat->NumGlobalRows() ) ; 
00256   int numcols = Phase2Mat->NumGlobalCols() ; 
00257   assert( numrows == numcols ) ; 
00258 
00259   //
00260   //  Here I compute what a uniform distribution should look like
00261   //  so that I can compare what I think MyFirstElement should be 
00262   //  against what it really is.
00263   //
00264   int m_per_p = numrows / Comm.NumProc() ;
00265   int remainder = numrows - ( m_per_p * Comm.NumProc() );
00266   int MyFirstElement = iam * m_per_p + EPETRA_MIN( iam, remainder );
00267   int MyFirstNonElement = (iam+1) * m_per_p + EPETRA_MIN( iam+1, remainder );
00268   int NumExpectedElements = MyFirstNonElement - MyFirstElement ; 
00269 
00270 
00271   int IsLocal = ( Phase2Matmap.NumMyElements() == 
00272       Phase2Matmap.NumGlobalElements() )?1:0;
00273   Comm.Broadcast( &IsLocal, 1, 0 ) ; 
00274   //
00275   //  Step ?)  Convert to a distributed matrix, if appropriate
00276   //
00277   Epetra_CrsMatrix *Phase3Mat = 0 ;
00278   Epetra_Map DistMap(  Phase2Matmap.NumGlobalElements(), NumExpectedElements, 0, Comm );
00279   Epetra_CrsMatrix DistCrsMatrixA(Copy, DistMap, 0);
00280 
00281   bool redistribute = true ;
00282   if ( redistribute ) {
00283 
00284     Epetra_Export export_to_dist( Phase2Matmap, DistMap);
00285 
00286     DistCrsMatrixA.Export( *Phase2Mat, export_to_dist, Add ); 
00287     
00288     DistCrsMatrixA.FillComplete() ; 
00289     Phase3Mat = &DistCrsMatrixA ;
00290 
00291     
00292   } else {
00293     {  
00294       EPETRA_CHK_ERR( ! ( Phase2Matmap.LinearMap())  ) ; // Map must be contiguously divided
00295       //
00296       //  This is another way to check that the distribution is as pdgssvx expects it
00297       //  (i.e. a linear map)
00298       //
00299       int Phase2NumElements = Phase2Matmap.NumMyElements() ; 
00300       vector <int> MyRows( Phase2NumElements ) ; 
00301       Phase2Matmap.MyGlobalElements( &MyRows[0] ) ; 
00302       for (int row = 0 ; row < Phase2NumElements ; row++ ) {
00303   EPETRA_CHK_ERR( MyFirstElement+row != MyRows[row] ) ;
00304       }
00305     }
00306     Phase3Mat = Phase2Mat ;
00307   }
00308 
00309 #if 0
00310   assert( MyFirstElement ==  Phase3Mat->RowMap().MinMyGID() ) ; 
00311   assert( NumExpectedElements == Phase3Mat->RowMap().NumMyElements() ) ; 
00312   assert( MyFirstElement+NumExpectedElements-1 ==  Phase3Mat->RowMap().MaxMyGID() ) ; 
00313 #endif
00314   //  Comm.Barrier(); 
00315   int MyActualFirstElement = Phase3Mat->RowMap().MinMyGID() ; 
00316   int NumMyElements = Phase3Mat->NumMyRows() ; 
00317   vector <int> MyRowPtr( NumMyElements+1 ) ;  
00318   //
00319   //  This is actually redundant with the Ap below
00320   //
00321   MyRowPtr[0] = 0 ; 
00322   int CurrentRowPtr = 0 ;
00323   for ( int i = 0; i < NumMyElements ; i++ ) { 
00324     CurrentRowPtr += Phase3Mat->NumMyEntries( i ) ; 
00325     MyRowPtr[i+1] = CurrentRowPtr ; 
00326   }
00327 
00328   //
00329   //  Extract nzval(Aval) and colind(Ai) from the CrsMatrix (Phase3Mat) 
00330   //
00331 
00332   int nnz_loc = Phase3Mat->NumMyNonzeros() ;
00333   if ( factor ) { 
00334   //
00335   //  Step 6) Convert the matrix to Ap, Ai, Aval
00336   //
00337     Ap.resize( NumMyElements+1 );
00338     Ai.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ; 
00339     Aval.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ; 
00340     
00341     int NzThisRow ;
00342     double *RowValues;
00343     int *ColIndices;
00344     int Ai_index = 0 ; 
00345     int MyRow;
00346     int num_my_cols = Phase3Mat->NumMyCols() ; 
00347     vector <int>Global_Columns( num_my_cols ) ; 
00348     for ( int i = 0 ; i < num_my_cols ; i ++ ) { 
00349       Global_Columns[i] = Phase3Mat->GCID( i ) ; 
00350     }
00351 
00352     for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
00353       int status = Phase3Mat->ExtractMyRowView( MyRow, NzThisRow, RowValues, ColIndices ) ;
00354       assert( status == 0 ) ; 
00355       Ap[MyRow] = Ai_index ; 
00356       assert( Ap[MyRow] == MyRowPtr[MyRow] ) ; 
00357       for ( int j = 0; j < NzThisRow; j++ ) { 
00358   Ai[Ai_index] = Global_Columns[ColIndices[j]] ; 
00359   Aval[Ai_index] = RowValues[j] ; 
00360   Ai_index++;
00361       }
00362     }
00363     assert( NumMyElements == MyRow );
00364     Ap[ NumMyElements ] = Ai_index ; 
00365   }
00366 
00367   //
00368   //  Pull B out of the Epetra_vector 
00369   //
00370 
00371   Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
00372   Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
00373 
00374   int nrhs; 
00375   if ( vecX == 0 ) { 
00376     nrhs = 0 ;
00377     EPETRA_CHK_ERR( vecB != 0 ) ; 
00378   } else { 
00379     nrhs = vecX->NumVectors() ; 
00380     EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
00381   }
00382 
00383   Epetra_MultiVector vecXdistributed( DistMap, nrhs ) ; 
00384   Epetra_MultiVector vecBdistributed( DistMap, nrhs ) ; 
00385 
00386 
00387   double *bValues ;
00388   double *xValues ;
00389   int ldb, ldx ; 
00390 
00391   Epetra_MultiVector* vecXptr; 
00392   Epetra_MultiVector* vecBptr; 
00393 
00394   if ( redistribute ) {
00395     Epetra_Import ImportToDistributed( DistMap, Phase2Matmap);
00396 
00397     vecXdistributed.Import( *vecX, ImportToDistributed, Insert ) ;
00398     vecBdistributed.Import( *vecB, ImportToDistributed, Insert ) ;
00399 
00400     vecXptr = &vecXdistributed ; 
00401     vecBptr = &vecBdistributed ; 
00402   } else {
00403     vecXptr = vecX ; 
00404     vecBptr = vecB ; 
00405   }
00406 
00407 
00408   EPETRA_CHK_ERR( vecBptr->ExtractView( &bValues, &ldb ) )  ; 
00409   EPETRA_CHK_ERR( vecXptr->ExtractView( &xValues, &ldx ) ) ; 
00410   EPETRA_CHK_ERR( ! ( ldx == ldb ) ) ; 
00411   EPETRA_CHK_ERR( ! ( ldx == NumMyElements ) ) ; 
00412 
00413   //
00414   //  Step 7)  Call SuperLUdist
00415   //  
00416 
00417   //
00418   //  This really belongs somewhere else - perhaps in the constructor
00419   //
00420   const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
00421   MPI_Comm MPIC = comm1.Comm() ;
00422 
00423   if ( factor ) { 
00424     numprocs = Comm.NumProc() ;                 
00425     nprow = SLU_NumProcRows( numprocs ) ; 
00426     npcol = numprocs / nprow ;
00427     assert ( nprow * npcol == numprocs ) ; 
00428     superlu_gridinit( MPIC, nprow, npcol, &grid);
00429   
00430 
00431   } else {
00432     assert( numprocs == Comm.NumProc() ) ; 
00433   }
00434 
00435   /* Bail out if I do not belong in the grid. */
00436   if ( iam < nprow * npcol ) {
00437   
00438     if ( factor ) { 
00439       set_default_options(&options);
00440       
00441       dCreate_CompRowLoc_Matrix_dist( &A, numrows, numcols, 
00442               nnz_loc, NumMyElements, MyActualFirstElement,
00443               &Aval[0], &Ai[0], &Ap[0], 
00444               SLU_NR_loc, SLU_D, SLU_GE );
00445 
00446       A_and_LU_built = true; 
00447       
00448       /* Initialize ScalePermstruct and LUstruct. */
00449       ScalePermstructInit(numrows, numcols, &ScalePermstruct);
00450       LUstructInit(numrows, numcols, &LUstruct);
00451       
00452       assert( options.Fact == DOFACT );  
00453       options.Fact = DOFACT ;       
00454 
00455       Factored_ = true; 
00456     } else {
00457       assert( Factored_ == true ) ; 
00458       EPETRA_CHK_ERR( Factored_ == false ) ; 
00459       options.Fact = FACTORED ; 
00460     }
00461     //
00462     //  pdgssvx returns x in b, so we copy b into x.  
00463     //
00464     for ( int j = 0 ; j < nrhs; j++ )
00465       for ( int i = 0 ; i < NumMyElements; i++ ) xValues[i+j*ldx] = bValues[i+j*ldb]; 
00466 
00467     PStatInit(&stat);    /* Initialize the statistics variables. */
00468 
00469     int info ;
00470     vector<double>berr(nrhs);
00471     pdgssvx(&options, &A, &ScalePermstruct, &xValues[0], ldx, nrhs, &grid,
00472       &LUstruct, &SOLVEstruct, &berr[0], &stat, &info);
00473     EPETRA_CHK_ERR( info ) ; 
00474 
00475     PStatFree(&stat);
00476 
00477   }
00478 
00479   if ( redistribute ) { 
00480     Epetra_Import ImportBackToOriginal( Phase2Matmap,DistMap);
00481     
00482     vecX->Import( *vecXptr, ImportBackToOriginal, Insert ) ;
00483   }
00484 
00485 
00486   return(0) ; 
00487 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines