FEI Version of the Day
fei_Reducer.cpp
00001 
00002 /*--------------------------------------------------------------------*/
00003 /*    Copyright 2006 Sandia Corporation.                              */
00004 /*    Under the terms of Contract DE-AC04-94AL85000, there is a       */
00005 /*    non-exclusive license for use of this work by or on behalf      */
00006 /*    of the U.S. Government.  Export of this program may require     */
00007 /*    a license from the United States Government.                    */
00008 /*--------------------------------------------------------------------*/
00009 
00010 #include <fei_Reducer.hpp>
00011 #include <fei_MatrixGraph.hpp>
00012 #include <fei_Matrix.hpp>
00013 #include <fei_Matrix_core.hpp>
00014 #include <fei_Vector.hpp>
00015 #include <fei_Graph_Impl.hpp>
00016 #include <fei_ArrayUtils.hpp>
00017 #include <fei_TemplateUtils.hpp>
00018 #include <fei_SparseRowGraph.hpp>
00019 #include <fei_Vector.hpp>
00020 #include <fei_impl_utils.hpp>
00021 
00022 namespace fei {
00023 
00024 Reducer::Reducer(fei::SharedPtr<FillableMat> globalSlaveDependencyMatrix,
00025                  fei::SharedPtr<CSVec> g_vector,
00026                  MPI_Comm comm)
00027  : csrD_(),
00028    slavesPtr_(NULL),
00029    Kii_(),
00030    Kid_(),
00031    Kdi_(),
00032    Kdd_(),
00033    csrKii(),
00034    csrKid(),
00035    csrKdi(),
00036    csrKdd(),
00037    fi_(),
00038    fd_(),
00039    csfi(),
00040    csvec(),
00041    csvec_i(),
00042    tmpMat1_(),
00043    tmpMat2_(),
00044    tmpVec1_(),
00045    tmpVec2_(),
00046    csg_(),
00047    g_nonzero_(false),
00048    localUnreducedEqns_(),
00049    localReducedEqns_(),
00050    nonslaves_(),
00051    reverse_(),
00052    isSlaveEqn_(NULL),
00053    numGlobalSlaves_(0),
00054    numLocalSlaves_(0),
00055    firstLocalReducedEqn_(0),
00056    lastLocalReducedEqn_(0),
00057    lowestGlobalSlaveEqn_(0),
00058    highestGlobalSlaveEqn_(0),
00059    localProc_(0),
00060    numProcs_(1),
00061    comm_(comm),
00062    dbgprefix_("Reducer: "),
00063    mat_counter_(0),
00064    rhs_vec_counter_(0),
00065    bool_array_(0),
00066    int_array_(0),
00067    double_array_(0),
00068    array_len_(0),
00069    work_1D_(),
00070    work_2D_()
00071 {
00072   csrD_ = *globalSlaveDependencyMatrix;
00073   if (g_vector.get() != NULL) {
00074     csg_ = *g_vector;
00075   }
00076 
00077   initialize();
00078 }
00079 
00080 void
00081 Reducer::initialize()
00082 {
00083   numGlobalSlaves_ = csrD_.getNumRows();
00084   slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
00085   lowestGlobalSlaveEqn_ = slavesPtr_[0];
00086   highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
00087 
00088   if (csg_.size() > 0) {
00089     double* gptr = &(csg_.coefs()[0]);
00090     for(size_t i=0; i<csg_.size(); ++i) {
00091       if (gptr[i] != 0.0) {
00092         g_nonzero_ = true;
00093         break;
00094       }
00095     }
00096   }
00097 
00098   //The following code sets up the mappings (nonslaves_ and reverse_)
00099   //necessary to implement the method 'translateFromReducedEqn'.
00100   //This code is ugly and inelegant, and it took me quite a while to get
00101   //it right. I won't even bother trying to comment it, but note that its
00102   //correctness is tested in test_utils/test_Reducer.cpp.
00103 
00104   nonslaves_.resize(numGlobalSlaves_*2);
00105 
00106   int nonslave_counter = 0;
00107   int slaveOffset = 0;
00108   int num_consecutive = 0;
00109   while(slaveOffset < numGlobalSlaves_-1) {
00110     int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
00111     if (gap > 0) {
00112       nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
00113       nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00114       num_consecutive = 0;
00115     }
00116     else {
00117       ++num_consecutive;
00118     }
00119     ++slaveOffset;
00120   }
00121 
00122   nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
00123   nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00124 
00125   reverse_.resize(nonslave_counter);
00126   int first = lowestGlobalSlaveEqn_;
00127   reverse_[0] = first;
00128   for(int i=1; i<nonslave_counter; ++i) {
00129     reverse_[i] = reverse_[i-1] +
00130          (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
00131   }
00132 
00133 #ifndef FEI_SER
00134     MPI_Comm_rank(comm_, &localProc_);
00135     MPI_Comm_size(comm_, &numProcs_);
00136 #endif
00137 
00138   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00139     FEI_OSTREAM& os = *output_stream_;
00140     os << dbgprefix_<< "ctor, numGlobalSlaves="<<numGlobalSlaves_
00141       << FEI_ENDL;
00142   }
00143 
00144   if (numGlobalSlaves_ < 1) {
00145     throw std::runtime_error("ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
00146   }
00147 }
00148 
00149 Reducer::Reducer(fei::SharedPtr<fei::MatrixGraph> matrixGraph)
00150  : csrD_(),
00151    slavesPtr_(NULL),
00152    Kii_(),
00153    Kid_(),
00154    Kdi_(),
00155    Kdd_(),
00156    fi_(),
00157    fd_(),
00158    tmpMat1_(),
00159    tmpMat2_(),
00160    tmpVec1_(),
00161    tmpVec2_(),
00162    csg_(),
00163    g_nonzero_(false),
00164    localUnreducedEqns_(),
00165    localReducedEqns_(),
00166    nonslaves_(),
00167    reverse_(),
00168    isSlaveEqn_(NULL),
00169    numGlobalSlaves_(0),
00170    numLocalSlaves_(0),
00171    firstLocalReducedEqn_(0),
00172    lastLocalReducedEqn_(0),
00173    lowestGlobalSlaveEqn_(0),
00174    highestGlobalSlaveEqn_(0),
00175    localProc_(0),
00176    numProcs_(1),
00177    comm_(),
00178    dbgprefix_("Reducer: "),
00179    mat_counter_(0),
00180    rhs_vec_counter_(0),
00181    bool_array_(0),
00182    int_array_(0),
00183    double_array_(0),
00184    array_len_(0)
00185 {
00186   fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph->getRowSpace();
00187   comm_ = vecSpace->getCommunicator();
00188   initialize();
00189 
00190   std::vector<int> indices;
00191   vecSpace->getIndices_Owned(indices);
00192   setLocalUnreducedEqns(indices);
00193 }
00194 
00195 Reducer::~Reducer()
00196 {
00197   delete [] isSlaveEqn_;   isSlaveEqn_ = 0;
00198   delete [] bool_array_;   bool_array_ = 0;
00199   delete [] int_array_;    int_array_ = 0;
00200   delete [] double_array_; double_array_ = 0;
00201   array_len_ = 0;
00202 }
00203 
00204 void
00205 Reducer::setLocalUnreducedEqns(const std::vector<int>& localUnreducedEqns)
00206 {
00207   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00208     FEI_OSTREAM& os = *output_stream_;
00209     os << dbgprefix_<< "setLocalUnreducedEqns, numLocalEqns="
00210       <<localUnreducedEqns.size() << FEI_ENDL;
00211   }
00212 
00213   if (localUnreducedEqns_ == localUnreducedEqns) {
00214     return;
00215   }
00216 
00217   localUnreducedEqns_ = localUnreducedEqns;
00218 
00219   int num = localUnreducedEqns_.size();
00220 
00221   if (isSlaveEqn_ != NULL) delete [] isSlaveEqn_;
00222 
00223   isSlaveEqn_ = num > 0 ? new bool[localUnreducedEqns_.size()] : NULL;
00224 
00225   numLocalSlaves_ = 0;
00226 
00227   for(size_t i=0; i<localUnreducedEqns_.size(); ++i) {
00228     int idx = fei::binarySearch(localUnreducedEqns_[i],
00229                                     slavesPtr_, numGlobalSlaves_);
00230     if (idx < 0) {
00231       isSlaveEqn_[i] = false;
00232     }
00233     else {
00234       isSlaveEqn_[i] = true;
00235       ++numLocalSlaves_;
00236 
00237       if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00238         FEI_OSTREAM& os = *output_stream_;
00239         os << dbgprefix_<<" slave " << localUnreducedEqns_[i] << " depends on ";
00240         int offset = csrD_.getGraph().rowOffsets[idx];
00241         int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
00242         int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
00243         for(int j=0; j<rowlen; ++j) {
00244           os << indices[j] << " ";
00245         }
00246         os << FEI_ENDL;
00247       }
00248     }
00249 
00250   }
00251 
00252   int num_slaves_on_lower_procs = 0;
00253 
00254 
00255 #ifndef FEI_SER
00256 
00257   if (numProcs_ > 1) {
00258     std::vector<int> procNumLocalSlaves(numProcs_);
00259 
00260     MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
00261                   1, MPI_INT, comm_);
00262 
00263     for(int p=0; p<localProc_; ++p) {
00264       num_slaves_on_lower_procs += procNumLocalSlaves[p];
00265     }
00266   }
00267 #endif
00268 
00269   unsigned first_non_slave_offset = 0;
00270   while(first_non_slave_offset < localUnreducedEqns_.size() &&
00271         isSlaveEqn_[first_non_slave_offset] == true) {
00272     ++first_non_slave_offset;
00273   }
00274 
00275   firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
00276       - num_slaves_on_lower_procs - first_non_slave_offset;
00277 
00278   int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
00279 
00280   lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
00281 
00282   localReducedEqns_.resize(num_local_eqns);
00283 
00284   unsigned offset = 0;
00285   int eqn = firstLocalReducedEqn_;
00286   for(unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
00287     if (isSlaveEqn_[i] == false) {
00288       localReducedEqns_[offset++] = eqn++;
00289     }
00290   }
00291 
00292   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00293     FEI_OSTREAM& os = *output_stream_;
00294     if (localUnreducedEqns_.size() > 0) {
00295       os << dbgprefix_<< "first local eqn="
00296          <<localUnreducedEqns_[0]<<", last local eqn="
00297       <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
00298     }
00299     os << dbgprefix_<<"numLocalSlaves_="<<numLocalSlaves_
00300        <<", firstLocalReducedEqn_="<<firstLocalReducedEqn_
00301        <<", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
00302   }
00303 }
00304 
00305 void
00306 Reducer::addGraphEntries(fei::SharedPtr<fei::SparseRowGraph> matrixGraph)
00307 {
00308   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00309     FEI_OSTREAM& os = *output_stream_;
00310     os << dbgprefix_<<"addGraphEntries"<<FEI_ENDL;
00311   }
00312 
00313   //iterate through the incoming matrixGraph, putting its contents into
00314   //Kdd, Kdi, Kid and Kii as appropriate.
00315 
00316   std::vector<int>& rowNumbers = matrixGraph->rowNumbers;
00317   std::vector<int>& rowOffsets = matrixGraph->rowOffsets;
00318   std::vector<int>& packedCols = matrixGraph->packedColumnIndices;
00319 
00320   for(unsigned i=0; i<rowNumbers.size(); ++i) {
00321     int row = rowNumbers[i];
00322 
00323     bool slave_row = isSlaveEqn(row);
00324 
00325     int rowLength = rowOffsets[i+1]-rowOffsets[i];
00326     int* cols = &packedCols[rowOffsets[i]];
00327 
00328     if (slave_row) {
00329       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(row);
00330       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(row);
00331 
00332       for(int j=0; j<rowLength; ++j) {
00333         int col = cols[j];
00334         bool slave_col = isSlaveEqn(col);
00335 
00336         if (slave_col) {
00337           Kdd_row->addEntry(col, 0.0);
00338         }
00339         else {
00340           Kdi_row->addEntry(col, 0.0);
00341         }
00342       }
00343     }
00344     else {
00345       //not a slave row, so add slave columns to Kid, and non-slave
00346       //columns to graph.
00347       fei::FillableVec* Kid_row = Kid_.create_or_getRow(row);
00348       fei::FillableVec* Kii_row = Kii_.create_or_getRow(row);
00349 
00350       for(int j=0; j<rowLength; ++j) {
00351         int col = cols[j];
00352         bool slave_col = isSlaveEqn(col);
00353 
00354         if (slave_col) {
00355           Kid_row->addEntry(col, 0.0);
00356         }
00357         else {
00358           Kii_row->addEntry(col, 0.0);
00359         }
00360       }
00361     }
00362   }
00363 }
00364 
00365 void
00366 Reducer::expand_work_arrays(int size)
00367 {
00368   if (size <= array_len_) return;
00369 
00370   array_len_ = size;
00371   delete [] bool_array_;
00372   delete [] int_array_;
00373   delete [] double_array_;
00374   bool_array_ = new bool[array_len_];
00375   int_array_ = new int[array_len_];
00376   double_array_ = new double[array_len_];
00377 }
00378 
00379 void
00380 Reducer::addGraphIndices(int numRows, const int* rows,
00381                          int numCols, const int* cols,
00382                          fei::Graph& graph)
00383 {
00384   expand_work_arrays(numCols);
00385 
00386   bool no_slave_cols = true;
00387   for(int i=0; i<numCols; ++i) {
00388     bool_array_[i] = isSlaveEqn(cols[i]);
00389     if (bool_array_[i]) no_slave_cols = false;
00390   }
00391 
00392   for(int i=0; i<numRows; ++i) {
00393     bool slave_row = isSlaveEqn(rows[i]);
00394 
00395     if (slave_row) {
00396       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
00397       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
00398 
00399       for(int j=0; j<numCols; ++j) {
00400         if (bool_array_[j]) {
00401           Kdd_row->addEntry(cols[j], 0.0);
00402         }
00403         else {
00404           Kdi_row->addEntry(cols[j], 0.0);
00405         }
00406       }
00407       ++mat_counter_;
00408     }
00409     else {
00410       //not a slave row, so add slave columns to Kid, and non-slave
00411       //columns to graph.
00412       fei::FillableVec* Kid_row = no_slave_cols ?
00413         NULL : Kid_.create_or_getRow(rows[i]);
00414   
00415       unsigned num_non_slave_cols = 0;
00416 
00417       for(int j=0; j<numCols; ++j) {
00418         if (bool_array_[j]) {
00419           Kid_row->addEntry(cols[j], 0.0);
00420           ++mat_counter_;
00421         }
00422         else {
00423           int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00424         }
00425       }
00426 
00427       if (num_non_slave_cols > 0) {
00428         int reduced_row = translateToReducedEqn(rows[i]);
00429         graph.addIndices(reduced_row, num_non_slave_cols, int_array_);
00430       }
00431     }
00432   }
00433 
00434   if (mat_counter_ > 600) {
00435     assembleReducedGraph(&graph, false);
00436   }
00437 }
00438 
00439 void
00440 Reducer::addSymmetricGraphIndices(int numIndices, const int* indices,
00441                                   bool diagonal,
00442                                   fei::Graph& graph)
00443 {
00444   if (diagonal) {
00445     for(int i=0; i<numIndices; ++i) {
00446       addGraphIndices(1, &indices[i], 1, &indices[i], graph);
00447     }
00448   }
00449   else {
00450     addGraphIndices(numIndices, indices, numIndices, indices, graph);
00451   }
00452 }
00453 
00454 void
00455 Reducer::assembleReducedGraph(fei::Graph* graph,
00456                               bool global_gather)
00457 {
00458   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00459     FEI_OSTREAM& os = *output_stream_;
00460     os << dbgprefix_<<"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
00461     if (output_level_ >= fei::FULL_LOGS) {
00462       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00463     }
00464   }
00465 
00466   //This function performs the appropriate manipulations (matrix-matrix
00467   //products, etc., on the Kid,Kdi,Kdd matrices and then assembles the
00468   //results into the input graph structure.
00469   //
00470 
00471   //form tmpMat1_ = Kid*D
00472   csrKid = Kid_;
00473   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_, true);
00474 
00475   csrKdi = Kdi_;
00476   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_, true);
00477 
00478   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00479   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00480 
00481   fei::impl_utils::add_to_graph(tmpMat1_, *graph);
00482   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00483 
00484   //form tmpMat1_ = D^T*Kdd
00485   csrKdd = Kdd_;
00486   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_, true);
00487 
00488   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd*D
00489   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_, true);
00490 
00491   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00492 
00493   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00494 
00495   //lastly, translate Kii and add it to the graph.
00496   csrKii = Kii_;
00497   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00498   fei::impl_utils::add_to_graph(csrKii, *graph);
00499 
00500   Kii_.clear();
00501   Kdi_.clear();
00502   Kid_.clear();
00503   Kdd_.clear();
00504 
00505   mat_counter_ = 0;
00506 
00507   if (global_gather) {
00508     graph->gatherFromOverlap();
00509   }
00510 }
00511 
00512 void
00513 Reducer::assembleReducedGraph(fei::SparseRowGraph* srgraph)
00514 {
00515   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00516     FEI_OSTREAM& os = *output_stream_;
00517     os << dbgprefix_<<"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
00518   }
00519 
00520   fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
00521   assembleReducedGraph(&graph);
00522   fei::copyToSparseRowGraph(*(graph.getLocalGraph()), *srgraph);
00523 }
00524 
00525 void
00526 Reducer::assembleReducedMatrix(fei::Matrix& matrix)
00527 {
00528   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00529     FEI_OSTREAM& os = *output_stream_;
00530     os << dbgprefix_<<"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
00531     if (output_level_ >= fei::FULL_LOGS) {
00532       os << dbgprefix_<<"Kid:"<<FEI_ENDL<<Kid_;
00533       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00534     }
00535   }
00536 
00537   //form tmpMat1_ = Kid_*D
00538   csrKid = Kid_;
00539 
00540   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_);
00541 
00542   //form tmpMat2_ = D^T*Kdi_
00543   csrKdi = Kdi_;
00544   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_);
00545 
00546   //accumulate the above two results into the global system matrix.
00547   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00548 
00549   fei::impl_utils::add_to_matrix(tmpMat1_, true, matrix);
00550 
00551   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00552   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00553 
00554   //form tmpMat1_ = D^T*Kdd_
00555   csrKdd = Kdd_;
00556   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_);
00557 
00558   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd_*D
00559   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_);
00560 
00561   if (g_nonzero_) {
00562     //form tmpVec1_ = Kid_*g_
00563     fei::multiply_CSRMat_CSVec(csrKid, csg_, tmpVec1_);
00564 
00565     //add tmpVec1_ to fi_
00566     csfi = fi_;
00567     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00568 
00569     //we already have tmpMat1_ = D^T*Kdd which was computed above, and we need
00570     //to form tmpVec1_ = D^T*Kdd*g_.
00571     //So we can simply form tmpVec1_ = tmpMat1_*g_.
00572     fei::multiply_CSRMat_CSVec(tmpMat1_, csg_, tmpVec1_);
00573 
00574     //now add tmpVec1_ to the right-hand-side fi_
00575     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00576   }
00577 
00578   //accumulate tmpMat2_ = D^T*Kdd_*D into the global system matrix.
00579   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00580   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00581 
00582   //lastly, translate Kii and add it to the graph.
00583   csrKii = Kii_;
00584   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00585   fei::impl_utils::add_to_matrix(csrKii, true, matrix);
00586 
00587   Kii_.clear();
00588   Kdi_.clear();
00589   Kid_.clear();
00590   Kdd_.clear();
00591 
00592   mat_counter_ = 0;
00593 }
00594 
00595 bool
00596 Reducer::isSlaveEqn(int unreducedEqn) const
00597 {
00598   int num = localUnreducedEqns_.size();
00599 
00600   int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
00601   if (offset < 0 || offset >= (int)localUnreducedEqns_.size()) {
00602     return(isSlaveCol(unreducedEqn));
00603   }
00604 
00605   return(isSlaveEqn_[offset]);
00606 }
00607 
00608 void
00609 Reducer::getSlaveMasterEqns(int slaveEqn, std::vector<int>& masterEqns)
00610 {
00611   masterEqns.clear();
00612 
00613   std::vector<int>& rows = csrD_.getGraph().rowNumbers;
00614 
00615   std::vector<int>::iterator iter =
00616     std::lower_bound(rows.begin(), rows.end(), slaveEqn);
00617 
00618   if (iter == rows.end() || *iter != slaveEqn) {
00619     return;
00620   }
00621 
00622   size_t offset = iter - rows.begin();
00623 
00624   int rowBegin = csrD_.getGraph().rowOffsets[offset];
00625   int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
00626   std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
00627 
00628   for(int j=rowBegin; j<rowEnd; ++j) {
00629     masterEqns.push_back(cols[j]);
00630   }
00631 }
00632 
00633 bool
00634 Reducer::isSlaveCol(int unreducedEqn) const
00635 {
00636   int idx = fei::binarySearch(unreducedEqn,
00637                                   slavesPtr_, numGlobalSlaves_);
00638   
00639   return(idx>=0);
00640 }
00641 
00642 int
00643 Reducer::translateToReducedEqn(int eqn) const
00644 {
00645   if (eqn < lowestGlobalSlaveEqn_) {
00646     return(eqn);
00647   }
00648 
00649   if (eqn > highestGlobalSlaveEqn_) {
00650     return(eqn - numGlobalSlaves_);
00651   }
00652 
00653   int index = 0;
00654   int foundOffset = fei::binarySearch(eqn, slavesPtr_, numGlobalSlaves_,
00655                                           index);
00656 
00657   if (foundOffset >= 0) {
00658     throw std::runtime_error("Reducer::translateToReducedEqn ERROR, input is slave eqn.");
00659   }
00660 
00661   return(eqn - index);
00662 }
00663 
00664 int
00665 Reducer::translateFromReducedEqn(int reduced_eqn) const
00666 {
00667   int index = -1;
00668   int offset = fei::binarySearch(reduced_eqn, &reverse_[0],
00669                                      reverse_.size(), index);
00670   if (offset >= 0) {
00671     return(nonslaves_[offset]);
00672   }
00673 
00674   if (index == 0) {
00675     return(reduced_eqn);
00676   }
00677 
00678   int adjustment = reduced_eqn - reverse_[index-1];
00679 
00680   return(nonslaves_[index-1] + adjustment);
00681 }
00682 
00683 int
00684 Reducer::addMatrixValues(int numRows, const int* rows,
00685                          int numCols, const int* cols,
00686                          const double* const* values,
00687                          bool sum_into,
00688                          fei::Matrix& feimat,
00689                          int format)
00690 {
00691   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00692     FEI_OSTREAM& os = *output_stream_;
00693     os << dbgprefix_<<"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
00694   }
00695 
00696   expand_work_arrays(numCols+numRows);
00697 
00698   const double** myvalues = const_cast<const double**>(values);
00699   if (format != FEI_DENSE_ROW) {
00700     if (format != FEI_DENSE_COL) {
00701       throw std::runtime_error("fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
00702     }
00703 
00704     fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
00705                                                 work_1D_, work_2D_);
00706     myvalues = &work_2D_[0];
00707   }
00708 
00709   bool no_slave_cols = true;
00710   unsigned num_non_slave_cols = 0;
00711   for(int j=0; j<numCols; ++j) {
00712     bool_array_[j] = isSlaveEqn(cols[j]);
00713     if (bool_array_[j]) no_slave_cols = false;
00714     else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00715   }
00716 
00717   bool no_slave_rows = true;
00718   for(int i=0; i<numRows; ++i) {
00719     bool_array_[numCols+i] = isSlaveEqn(rows[i]);
00720     if (bool_array_[numCols+i]) no_slave_rows = false;
00721     else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
00722   }
00723 
00724   if (no_slave_rows && no_slave_cols) {
00725     if (sum_into) {
00726       feimat.sumIn(numRows, int_array_+numCols,
00727                    numCols, int_array_, myvalues, FEI_DENSE_ROW);
00728     }
00729     else {
00730       feimat.copyIn(numRows, int_array_+numCols,
00731                     numCols, int_array_, myvalues, FEI_DENSE_ROW);
00732     }
00733 
00734     return(0);
00735   }
00736 
00737   for(int i=0; i<numRows; ++i) {
00738     if (bool_array_[numCols+i]) {
00739       //slave row: slave columns go into Kdd, non-slave columns go
00740       //into Kdi.
00741       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
00742       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
00743 
00744       for(int j=0; j<numCols; ++j) {
00745         if (bool_array_[j]) {
00746           Kdd_row->addEntry(cols[j], myvalues[i][j]);
00747         }
00748         else {
00749           Kdi_row->addEntry(cols[j], myvalues[i][j]);
00750         }
00751       }
00752       ++mat_counter_;
00753     }
00754     else {//not slave row
00755       if (no_slave_cols) {
00756         int reduced_row = int_array_[numCols+i];
00757         const double* rowvals = myvalues[i];
00758         if (sum_into) {
00759           feimat.sumIn(1, &reduced_row, numCols, int_array_,
00760                        &rowvals, format);
00761         }
00762         else {
00763           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00764                         &rowvals, format);
00765         }
00766         continue;
00767       }
00768 
00769       //put non-slave columns into Kii,
00770       //and slave columns into Kid.
00771       fei::FillableVec* Kid_row = Kid_.create_or_getRow(rows[i]);
00772 
00773       unsigned offset = 0;
00774       for(int j=0; j<numCols; ++j) {
00775         if (bool_array_[j]) {
00776           Kid_row->addEntry(cols[j], myvalues[i][j]);
00777           ++mat_counter_;
00778         }
00779         else {
00780           double_array_[offset++] = myvalues[i][j];
00781         }
00782       }
00783 
00784       if (num_non_slave_cols > 0) {
00785         int reduced_row = int_array_[numCols+i];
00786         if (sum_into) {
00787           feimat.sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
00788                        &double_array_, format);
00789         }
00790         else {
00791           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00792                         &double_array_, format);
00793         }
00794       }
00795     }
00796   }
00797 
00798   if (mat_counter_ > 600) {
00799     assembleReducedMatrix(feimat);
00800   }
00801 
00802   return(0);
00803 }
00804 
00805 int
00806 Reducer::addVectorValues(int numValues,
00807                          const int* globalIndices,
00808                          const double* values,
00809                          bool sum_into,
00810                          bool soln_vector,
00811                          int vectorIndex,
00812                          fei::Vector& feivec)
00813 {
00814   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00815     FEI_OSTREAM& os = *output_stream_;
00816     os << dbgprefix_<<"addVectorValues(fei::Vector)"<<FEI_ENDL;
00817   }
00818 
00819   for(int i=0; i<numValues; ++i) {
00820     if (isSlaveEqn(globalIndices[i])) {
00821       if (sum_into) {
00822         if (!soln_vector) fd_.addEntry(globalIndices[i], values[i]);
00823       }
00824       else {
00825         if (!soln_vector) fd_.putEntry(globalIndices[i], values[i]);
00826       }
00827       if (!soln_vector) ++rhs_vec_counter_;
00828     }
00829     else {
00830       int reduced_index = translateToReducedEqn(globalIndices[i]);
00831 
00832       if (sum_into) {
00833         feivec.sumIn(1, &reduced_index, &values[i], vectorIndex);
00834       }
00835       else {
00836         feivec.copyIn(1, &reduced_index, &values[i], vectorIndex);
00837       }
00838     }
00839   }
00840 
00841   if (rhs_vec_counter_ > 600) {
00842     assembleReducedVector(soln_vector, feivec);
00843   }
00844 
00845   return(0);
00846 }
00847 
00848 void
00849 Reducer::assembleReducedVector(bool soln_vector,
00850                                fei::Vector& feivec)
00851 {
00852   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00853     FEI_OSTREAM& os = *output_stream_;
00854     os << dbgprefix_<<"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
00855   }
00856 
00857   if (soln_vector) {
00858     return;
00859   }
00860 
00861   fei::FillableVec& vec = fd_;
00862 
00863   if (vec.size() > 0) {
00864     //form tmpVec1 = D^T*vec.
00865     csvec = vec;
00866     fei::multiply_trans_CSRMat_CSVec(csrD_, csvec, tmpVec1_);
00867 
00868     vec.clear();
00869 
00870     fei::impl_utils::translate_to_reduced_eqns(*this, tmpVec1_);
00871 
00872     int which_vector = 0;
00873     feivec.sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00874                  &(tmpVec1_.coefs()[0]), which_vector);
00875   }
00876 
00877   fei::FillableVec& vec_i = fi_;
00878 
00879   if (vec_i.size() > 0) {
00880     csvec_i = vec_i;
00881     fei::impl_utils::translate_to_reduced_eqns(*this, csvec_i);
00882 
00883     int which_vector = 0;
00884     feivec.sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
00885                  &(csvec_i.coefs()[0]), which_vector);
00886 
00887     vec_i.clear();
00888   }
00889 
00890   rhs_vec_counter_ = 0;
00891 }
00892 
00893 int
00894 Reducer::copyOutVectorValues(int numValues,
00895                              const int* globalIndices,
00896                              double* values,
00897                              bool soln_vector,
00898                              int vectorIndex,
00899                              fei::Vector& feivec)
00900 {
00901   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00902     FEI_OSTREAM& os = *output_stream_;
00903     os << dbgprefix_<<"copyOutVectorValues"<<FEI_ENDL;
00904   }
00905 
00906   tmpVec1_.clear();
00907   tmpVec2_.clear();
00908   std::vector<int> reduced_indices;
00909   std::vector<int> offsets;
00910 
00911   for(int i=0; i<numValues; ++i) {
00912     if (isSlaveEqn(globalIndices[i])) {
00913       fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
00914       offsets.push_back(i);
00915     }
00916     else {
00917       int reduced_idx = translateToReducedEqn(globalIndices[i]);
00918       feivec.copyOut(1, &reduced_idx, &values[i], vectorIndex);
00919     }
00920   }
00921 
00922   if (tmpVec1_.size() > 0) {
00923     fei::multiply_trans_CSRMat_CSVec(csrD_, tmpVec1_, tmpVec2_);
00924     int* tmpVec2Indices = &(tmpVec2_.indices()[0]);
00925     for(size_t i=0; i<tmpVec2_.size(); ++i) {
00926       reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
00927     }
00928 
00929     feivec.copyOut(tmpVec2_.size(), &reduced_indices[0],
00930                    &(tmpVec2_.coefs()[0]), vectorIndex);
00931 
00932     fei::multiply_CSRMat_CSVec(csrD_, tmpVec2_, tmpVec1_);
00933 
00934     if (g_nonzero_) {
00935       int* ginds = &(csg_.indices()[0]);
00936       double* gcoefs = &(csg_.coefs()[0]);
00937       for(size_t ii=0; ii<csg_.size(); ++ii) {
00938         fei::add_entry(tmpVec1_, ginds[ii], -gcoefs[ii]);
00939       }
00940     }
00941 
00942     int len = tmpVec1_.size();
00943     int* indices = &(tmpVec1_.indices()[0]);
00944     double* coefs = &(tmpVec1_.coefs()[0]);
00945 
00946     for(unsigned ii=0; ii<offsets.size(); ++ii) {
00947       int index = globalIndices[offsets[ii]];
00948       int idx = fei::binarySearch(index, indices, len);
00949       if (idx < 0) {
00950         continue;
00951       }
00952 
00953       values[offsets[ii]] = coefs[idx];
00954     }
00955   }
00956 
00957   return(0);
00958 }
00959 
00960 std::vector<int>&
00961 Reducer::getLocalReducedEqns()
00962 {
00963   return(localReducedEqns_);
00964 }
00965 
00966 }//namespace fei
00967 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends