Amesos Package Browser (Single Doxygen Collection) Development
SuperludistOO.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                Amesos: Direct Sparse Solver Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include "SuperludistOO.h"
00030 #ifdef EPETRA_MPI
00031 #include "Epetra_MpiComm.h"
00032 #else
00033     This code cannot be compiled without mpi.h.
00034 #include "Epetra_Comm.h"
00035 #endif
00036 #include "Epetra_Map.h"
00037 #include "Epetra_LocalMap.h"
00038 #include "Epetra_Vector.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_Operator.h"
00041 #include "Epetra_Import.h"
00042 #include "Epetra_Export.h"
00043 #include "Epetra_CrsMatrix.h"
00044 #include "supermatrix.h"      
00045 #include "superlu_ddefs.h"
00046 #include "CrsMatrixTranspose.h"
00047 #include <vector>
00048 
00049 #ifdef DEBUG
00050 #include "Comm_assert_equal.h"
00051     // #include "CrsMatricesAreIdentical.h"
00052 #endif
00053 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00054 #include "ExtractCrsFromRowMatrix.h"
00055 #endif
00056 /*
00057   Returns the largest number of rows that allows NumProcs to be used within a 
00058   rectangular grid with no more rows than columns.
00059 
00060   i.e. max i such that there exists j > i such that i*j = NumProcs
00061 */
00062 int SLU_NumRows( int NumProcs ) {
00063 #ifdef TFLOP
00064   //  Else, parameter 6 of DTRSV CTNLU is incorrect 
00065   return 1;
00066 #else
00067   int i;
00068   int numrows ;
00069   for ( i = 1; i*i <= NumProcs; i++ ) 
00070     ;
00071   bool done = false ;
00072   for ( numrows = i-1 ; done == false ; ) {
00073     int NumCols = NumProcs / numrows ; 
00074     if ( numrows * NumCols == NumProcs ) 
00075       done = true; 
00076     else 
00077       numrows-- ; 
00078   }
00079   return numrows;
00080 #endif
00081 }
00082 
00083 //=============================================================================
00084 SuperludistOO::SuperludistOO(const Epetra_LinearProblem &prob ) {
00085   //  AllocAzArrays();
00086 
00087   Problem_ = &prob ; 
00088   Transpose_ = false; 
00089   A_and_LU_built = false ; 
00090   Factored_ = false ; 
00091   FirstCallToSolve_ = true ; 
00092   //
00093   //  The following are initialized just on general principle
00094   //
00095   numprocs = -13 ; 
00096   nprow = -13 ; 
00097   npcol = -13 ; 
00098   numrows = -13 ; 
00099 
00100 }
00101 
00102 //=============================================================================
00103 SuperludistOO::~SuperludistOO(void) {
00104   //  DeleteMemory();
00105   //  DeleteAzArrays();
00106 
00107   if ( A_and_LU_built ) { 
00108     // Destroy_CompCol_Matrix_dist(&A);
00109     SUPERLU_FREE(A.Store);
00110     Destroy_LU(numrows, &grid, &LUstruct);
00111     ScalePermstructFree(&ScalePermstruct);
00112     LUstructFree(&LUstruct);
00113     SUPERLU_FREE(berr);
00114   }
00115 
00116 }
00117 
00118 //=============================================================================
00119 
00120 //
00121 //  Solve() uses several intermediate matrices to convert the input matrix
00122 //  to one that we can pass to the Sparse Direct Solver
00123 //
00124 //  Epetra_RowMatrix *RowMatrixA - The input matrix
00125 //  Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
00126 //  Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix 
00127 //                                 (Unused if RowMatrix is an Epetra_CrsMatrix)
00128 //  Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
00129 //  Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
00130 //  Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
00131 //    (i.e. stored exclusively on one process)
00132 //  Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
00133 //  Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the 
00134 //    proper transposition
00135 //  Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the 
00136 //    proper transposition 
00137 //
00138 //  This is what the code does:
00139 //  Step 1)  Convert the matrix to an Epetra_CrsMatrix
00140 //  Step 2)  Coalesce the matrix onto process 0
00141 //  Step 3)  Transpose the matrix 
00142 //  Step 4)  Replicate the matrix
00143 //  Step 5)  Convert vector b to a replicated vector
00144 //  Step 6)  Convert the matrix to Ap, Ai, Aval
00145 //  Step 7)  Call SuperLUdist
00146 //  Step 8)  Convert vector x back to a distributed vector
00147 //
00148 //  Remaining tasks:
00149 //  1)  I still need to make it possible for SuperludistOO to accept a 
00150 //  replicated matrix without using any additional memory.
00151 //  I believe that all that I need is to move the definition
00152 //  of ExtractCrsMatrixA,  SerialCrsMatrixA and Phase3MatTrans up 
00153 //  to the top of the code and add a conditional which tests to 
00154 //  see if RowMatrixA is actually in the exact format that we need,
00155 //  and if so, skip all the matrix transformations.  Also, Phase5Mat 
00156 //  needs to be changed to Phase4Replicated with an added pointer, named
00157 //  *Phase5Mat.
00158 //  2)  Can we handle a transposed matrix any cleaner?  We build Ap, 
00159 //  Ai and Avalues - can we build that as a transpose more efficiently
00160 //  than doing a full CrsMatrix to CrsMatrix transpose?
00161 //  
00162 //  Memory usage:
00163 //    ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
00164 //    SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
00165 //    Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
00166 //    Phase5Mat - 1 
00167 //  If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
00168 //  serial matrix, hence three extra serial copies is the maximum.
00169 //
00170 int SuperludistOO::Solve(bool factor) { 
00171   //
00172   //  I am going to put these here until I determine that I need them in 
00173   //  SuperludistOO.h 
00174   //
00175 
00176   bool CheckExtraction = false;    //  Set to true to force extraction for unit test
00177 
00178   Epetra_RowMatrix *RowMatrixA = 
00179     dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
00180   
00181   EPETRA_CHK_ERR( RowMatrixA == 0 ) ; 
00182 
00183   Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ; 
00184 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00185   Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
00186 #endif
00187   Epetra_CrsMatrix *Phase2Mat = 0 ;
00188   const Epetra_Comm &Comm = RowMatrixA->Comm();
00189   
00190   int iam = Comm.MyPID() ;
00191 #if 0
00192   //
00193   //  The following lines allow us time to attach the debugger
00194   //
00195   int hatever;
00196   if ( iam == 0 )  cin >> hatever ; 
00197   Comm.Barrier();
00198 #endif
00199 
00200   //
00201   //  Step 1)  Convert the matrix to an Epetra_CrsMatrix
00202   //
00203   //  If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails, 
00204   //  extract a CrsMatrix from the RowMatrix.
00205   //
00206   if ( CastCrsMatrixA != 0 && ! CheckExtraction ) { 
00207     Phase2Mat = CastCrsMatrixA ; 
00208   } else {
00209 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
00210     ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ; 
00211 
00212     Phase2Mat = ExtractCrsMatrixA ; 
00213 #ifdef DEBUG
00214     if ( CheckExtraction ) 
00215       assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ; 
00216 #endif
00217 #else
00218     assert( false ) ;
00219 #endif
00220   }
00221 
00222   assert( Phase2Mat != NULL ) ; 
00223   const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ; 
00224 
00225   //
00226   //  Step 2)  Coalesce the matrix onto process 0
00227   //
00228   const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
00229   MPI_Comm MPIC = comm1.Comm() ;
00230 
00231   int IsLocal = ( Phase2Matmap.NumMyElements() == 
00232       Phase2Matmap.NumGlobalElements() )?1:0;
00233   Comm.Broadcast( &IsLocal, 1, 0 ) ; 
00234 #ifdef DEBUG
00235   assert( Comm_assert_equal( &Comm, IsLocal ) );
00236 #endif
00237 
00238   Epetra_CrsMatrix *Phase3Mat = 0 ;
00239 
00240   int NumGlobalElements_ = Phase2Matmap.NumGlobalElements() ;
00241   //  Create a serial map in case we end up needing it 
00242   //  If it is created inside the else block below it would have to
00243   //  be with a call to new().
00244   int NumMyElements_ = 0 ;
00245   if (iam==0) NumMyElements_ = NumGlobalElements_;
00246   Epetra_Map SerialMap( NumGlobalElements_, NumMyElements_, 0, Comm );
00247   Epetra_CrsMatrix SerialCrsMatrixA(Copy, SerialMap, 0);
00248 
00249 
00250   if ( IsLocal==1 ) {
00251      Phase3Mat = Phase2Mat ;
00252   } else {
00253 
00254     Epetra_Export export_to_serial( Phase2Matmap, SerialMap);
00255 
00256     SerialCrsMatrixA.Export( *Phase2Mat, export_to_serial, Add ); 
00257     
00258     SerialCrsMatrixA.FillComplete() ; 
00259     Phase3Mat = &SerialCrsMatrixA ;
00260 
00261   }
00262   Comm.Barrier() ; 
00263 
00264 
00265 
00266   //
00267   //  Step 3)  Transpose the matrix 
00268   //
00269   const Epetra_Map &Phase3Matmap = Phase3Mat->RowMap() ; 
00270 
00271   numrows = Phase3Mat->NumGlobalRows();
00272   int numcols = Phase3Mat->NumGlobalCols();
00273   int numentries = Phase3Mat->NumGlobalNonzeros();
00274 
00275   Epetra_CrsMatrix Phase3MatTrans(Copy, Phase3Matmap, 0);
00276   Epetra_CrsMatrix *Phase4Mat;
00277 
00278   if ( GetTrans() ) { 
00279     Phase4Mat = Phase3Mat ; 
00280   } else {
00281     assert( CrsMatrixTranspose( Phase3Mat, &Phase3MatTrans ) == 0 ) ; 
00282     Phase4Mat = &Phase3MatTrans ;
00283   }
00284 
00285 
00286   //
00287   //  Step 4)  Replicate the matrix
00288   //
00289   int * AllIDs = new int[numrows];
00290   for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; 
00291 
00292   // Create a replicated map and matrix
00293   Epetra_Map ReplicatedMap( -1, numrows, AllIDs, 0, Comm);
00294   //  Epetra_LocalMap ReplicatedMap( numrows, 0, Comm);   // Compiles,runs, fails to replicate
00295 
00296   delete [] AllIDs;
00297 
00298   Epetra_Import importer( ReplicatedMap, Phase3Matmap );
00299 
00300   Epetra_CrsMatrix Phase5Mat(Copy, ReplicatedMap, 0);
00301   EPETRA_CHK_ERR( Phase5Mat.Import( *Phase4Mat, importer, Insert) );
00302   EPETRA_CHK_ERR( Phase5Mat.FillComplete() ) ; 
00303 
00304   assert( Phase5Mat.NumMyRows() == Phase4Mat->NumGlobalRows() ) ;
00305 
00306   //
00307   //  Step 5)  Convert vector b to a replicated vector
00308   //
00309   Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
00310   Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
00311 
00312   int nrhs; 
00313   if ( vecX == 0 ) { 
00314     nrhs = 0 ;
00315     EPETRA_CHK_ERR( vecB != 0 ) ; 
00316   } else { 
00317     nrhs = vecX->NumVectors() ; 
00318     EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
00319   }
00320 
00321   int nArows = Phase3Mat->NumGlobalRows() ; 
00322   int nAcols = Phase3Mat->NumGlobalCols() ; 
00323 
00324 #ifdef ONE_VECTOR_ONLY
00325   assert( vecX->NumVectors() == 1 ) ; 
00326   assert( vecB->NumVectors() == 1 ) ; 
00327 
00328   Epetra_Vector *vecXvector = dynamic_cast<Epetra_Vector*>(vecX) ; 
00329   Epetra_Vector *vecBvector = dynamic_cast<Epetra_Vector*>(vecB) ; 
00330 
00331   assert( vecXvector != 0 ) ; 
00332   assert( vecBvector != 0 ) ; 
00333 
00334   Epetra_Vector vecXreplicated( ReplicatedMap ) ; 
00335   Epetra_Vector vecBreplicated( ReplicatedMap ) ; 
00336 #else
00337   Epetra_MultiVector *vecXvector = (vecX) ; 
00338   Epetra_MultiVector *vecBvector = (vecB) ; 
00339 
00340   Epetra_MultiVector vecXreplicated( ReplicatedMap, nrhs ) ; 
00341   Epetra_MultiVector vecBreplicated( ReplicatedMap, nrhs ) ; 
00342 #endif
00343 
00344   Epetra_Import ImportToReplicated( ReplicatedMap, Phase2Matmap);
00345 
00346   vecXreplicated.Import( *vecXvector, ImportToReplicated, Insert ) ;
00347   vecBreplicated.Import( *vecBvector, ImportToReplicated, Insert ) ;
00348 
00349   assert( nArows == vecXreplicated.MyLength() ) ; 
00350   assert( nAcols == vecBreplicated.MyLength() ) ;
00351 
00352   double *bValues ;
00353   double *xValues ;
00354   int bLda, xLda ; 
00355 
00356   assert( vecBreplicated.ExtractView( &bValues, &bLda ) == 0 )  ; 
00357   assert( vecXreplicated.ExtractView( &xValues, &xLda ) == 0 ) ; 
00358 
00359   if ( factor ) { 
00360   //
00361   //  Step 6) Convert the matrix to Ap, Ai, Aval
00362   //
00363   Ap.resize( numrows+1 );
00364   Ai.resize( EPETRA_MAX( numrows, numentries) ) ; 
00365   Aval.resize( EPETRA_MAX( numrows, numentries) ) ; 
00366 
00367   int NumEntries ;
00368   double *RowValues;
00369   int *ColIndices;
00370   int Ai_index = 0 ; 
00371   int MyRow;
00372   for ( MyRow = 0; MyRow <numrows; MyRow++ ) {
00373     int status = Phase5Mat.ExtractMyRowView( MyRow, NumEntries, RowValues, ColIndices ) ;
00374     assert( status == 0 ) ; 
00375     Ap[MyRow] = Ai_index ; 
00376     for ( int j = 0; j < NumEntries; j++ ) { 
00377       Ai[Ai_index] = ColIndices[j] ; 
00378       Aval[Ai_index] = RowValues[j] ; 
00379       Ai_index++;
00380     }
00381   }
00382   assert( numrows == MyRow );
00383   Ap[ numrows ] = Ai_index ; 
00384   }
00385 
00386   //
00387   //  Step 7)  Call SuperLUdist
00388   //  
00389 
00390   //
00391   //  This really belongs somewhere else - perhaps in the constructor
00392   //
00393   if ( factor ) { 
00394     numprocs = Comm.NumProc() ;                 
00395     nprow = SLU_NumRows( numprocs ) ; 
00396     npcol = numprocs / nprow ;
00397     assert ( nprow * npcol == numprocs ) ; 
00398     superlu_gridinit( MPIC, nprow, npcol, &grid);
00399   
00400 #ifdef DEBUG
00401     assert( Comm_assert_equal( &Comm, numentries ) );
00402     assert( Comm_assert_equal( &Comm, numrows ) );
00403     assert( Comm_assert_equal( &Comm, numcols ) );
00404 #endif
00405     
00406   } else {
00407     assert( numprocs == Comm.NumProc() ) ; 
00408   }
00409   int ldb = numrows ; 
00410   /* Bail out if I do not belong in the grid. */
00411   if ( iam < nprow * npcol ) {
00412     //
00413     //  All processes need to have identical values of:
00414     //    numrows(m), numcols(n), nnz(NumEntries), 
00415     //    Aval(a), Ap(xa), Ai(asub)
00416     //    double(numentries), int(n+1), int(numentries) 
00417 
00418 #ifdef DEBUG
00419     for ( int ii = 0; ii < min( numentries, 10 ) ; ii++ ) { 
00420       assert( Comm_assert_equal( &Comm, Aval[ii] ) ) ; 
00421     }
00422     for ( int ii = 0; ii < min( numcols+1, 10 ) ; ii++ ) { 
00423       assert( Comm_assert_equal( &Comm, Ai[ii] ) ); 
00424       assert( Comm_assert_equal( &Comm, Ap[ii] ) ); 
00425     }
00426     for ( int ii = 0; ii < min( numrows, 10 ) ; ii++ ) { 
00427       assert( Comm_assert_equal( &Comm, bValues[ii] ) ); 
00428     }
00429 #endif
00430   
00431     if ( factor ) { 
00432       set_default_options(&options);
00433       
00434       if ( !(berr = doubleMalloc_dist(nrhs)) )
00435   EPETRA_CHK_ERR( -1 ) ; 
00436       
00437       /* Create compressed column matrix for A. */
00438       dCreate_CompCol_Matrix_dist(&A, numrows, numcols, 
00439           numentries, &Aval[0], &Ai[0], 
00440           &Ap[0], SLU_NC, SLU_D, SLU_GE);
00441       A_and_LU_built = true; 
00442       
00443 #if 0
00444       cout << " Here is A " << endl ; 
00445       dPrint_CompCol_Matrix_dist( &A ); 
00446       cout << " That was A " << "numrows = " << numrows <<  endl ; 
00447       cout << "numcols = " << numcols <<  endl ; 
00448 #endif
00449       
00450       /* Initialize ScalePermstruct and LUstruct. */
00451       ScalePermstructInit(numrows, numcols, &ScalePermstruct);
00452       LUstructInit(numrows, numcols, &LUstruct);
00453       
00454       assert( options.Fact == DOFACT );  
00455       options.Fact = DOFACT ;       
00456 
00457       Factored_ = true; 
00458     } else {
00459       assert( Factored_ == true ) ; 
00460       EPETRA_CHK_ERR( Factored_ == false ) ; 
00461       options.Fact = FACTORED ; 
00462     }
00463     //
00464     //  pdgssvx_ABglobal returns x in b, so we copy b into x and pass x as b.
00465     //
00466     for ( int j = 0 ; j < nrhs; j++ )
00467       for ( int i = 0 ; i < numrows; i++ ) xValues[i+j*xLda] = bValues[i+j*xLda]; 
00468     
00469     /* Initialize the statistics variables. */
00470     PStatInit(&stat);
00471 
00472     /* Call the linear equation solver. */
00473     int info ;
00474     pdgssvx_ABglobal(&options, &A, &ScalePermstruct, &xValues[0], 
00475          ldb, nrhs, &grid, &LUstruct, berr, &stat, &info);
00476     EPETRA_CHK_ERR( info ) ; 
00477 
00478     PStatFree(&stat);
00479 
00480   }
00481 
00482 
00483   //
00484   //  Step 8)  Convert vector x back to a distributed vector
00485   //
00486   //  This is an ugly hack - it should be cleaned up someday
00487   //
00488   for (int i = 0 ; i < numrows; i++ ) { 
00489     int lid[1] ; 
00490     lid[0] = Phase2Matmap.LID( i ) ; 
00491     if ( lid[0] >= 0 ) { 
00492       for (int j = 0 ; j < nrhs; j++ ) { 
00493   vecXvector->ReplaceMyValue(  lid[0], j, xValues[i + ldb * j]  ) ; 
00494       }
00495     }
00496   }
00497 
00498   return(0) ; 
00499 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines