FEI Version of the Day
fei_Reducer.cpp
00001 
00002 /*--------------------------------------------------------------------*/
00003 /*    Copyright 2006 Sandia Corporation.                              */
00004 /*    Under the terms of Contract DE-AC04-94AL85000, there is a       */
00005 /*    non-exclusive license for use of this work by or on behalf      */
00006 /*    of the U.S. Government.  Export of this program may require     */
00007 /*    a license from the United States Government.                    */
00008 /*--------------------------------------------------------------------*/
00009 
00010 #include <fei_Reducer.hpp>
00011 #include <fei_MatrixGraph.hpp>
00012 #include <fei_Matrix.hpp>
00013 #include <fei_Matrix_core.hpp>
00014 #include <fei_Vector.hpp>
00015 #include <fei_Graph_Impl.hpp>
00016 #include <fei_ArrayUtils.hpp>
00017 #include <fei_TemplateUtils.hpp>
00018 #include <fei_SparseRowGraph.hpp>
00019 #include <fei_Vector.hpp>
00020 #include <fei_impl_utils.hpp>
00021 
00022 namespace fei {
00023 
00024 Reducer::Reducer(fei::SharedPtr<FillableMat> globalSlaveDependencyMatrix,
00025                  fei::SharedPtr<CSVec> g_vector,
00026                  MPI_Comm comm)
00027  : csrD_(),
00028    slavesPtr_(NULL),
00029    Kii_(),
00030    Kid_(),
00031    Kdi_(),
00032    Kdd_(),
00033    csrKii(),
00034    csrKid(),
00035    csrKdi(),
00036    csrKdd(),
00037    fi_(),
00038    fd_(),
00039    csfi(),
00040    csvec(),
00041    csvec_i(),
00042    tmpMat1_(),
00043    tmpMat2_(),
00044    tmpVec1_(),
00045    tmpVec2_(),
00046    csg_(),
00047    g_nonzero_(false),
00048    localUnreducedEqns_(),
00049    localReducedEqns_(),
00050    nonslaves_(),
00051    reverse_(),
00052    isSlaveEqn_(NULL),
00053    numGlobalSlaves_(0),
00054    numLocalSlaves_(0),
00055    firstLocalReducedEqn_(0),
00056    lastLocalReducedEqn_(0),
00057    lowestGlobalSlaveEqn_(0),
00058    highestGlobalSlaveEqn_(0),
00059    localProc_(0),
00060    numProcs_(1),
00061    comm_(comm),
00062    dbgprefix_("Reducer: "),
00063    mat_counter_(0),
00064    rhs_vec_counter_(0),
00065    bool_array_(0),
00066    int_array_(0),
00067    double_array_(0),
00068    array_len_(0),
00069    work_1D_(),
00070    work_2D_()
00071 {
00072   csrD_ = *globalSlaveDependencyMatrix;
00073   if (g_vector.get() != NULL) {
00074     csg_ = *g_vector;
00075   }
00076 
00077   initialize();
00078 }
00079 
00080 void
00081 Reducer::initialize()
00082 {
00083   numGlobalSlaves_ = csrD_.getNumRows();
00084   slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
00085   lowestGlobalSlaveEqn_ = slavesPtr_[0];
00086   highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
00087 
00088   if (csg_.size() > 0) {
00089     double* gptr = &(csg_.coefs()[0]);
00090     for(size_t i=0; i<csg_.size(); ++i) {
00091       if (gptr[i] != 0.0) {
00092         g_nonzero_ = true;
00093         break;
00094       }
00095     }
00096   }
00097 
00098   //The following code sets up the mappings (nonslaves_ and reverse_)
00099   //necessary to implement the method 'translateFromReducedEqn'.
00100   //This code is ugly and inelegant, and it took me quite a while to get
00101   //it right. I won't even bother trying to comment it, but note that its
00102   //correctness is tested in test_utils/test_Reducer.cpp.
00103 
00104   nonslaves_.resize(numGlobalSlaves_*2);
00105 
00106   int nonslave_counter = 0;
00107   int slaveOffset = 0;
00108   int num_consecutive = 0;
00109   while(slaveOffset < numGlobalSlaves_-1) {
00110     int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
00111     if (gap > 0) {
00112       nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
00113       nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00114       num_consecutive = 0;
00115     }
00116     else {
00117       ++num_consecutive;
00118     }
00119     ++slaveOffset;
00120   }
00121 
00122   nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
00123   nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00124 
00125   reverse_.resize(nonslave_counter);
00126   int first = lowestGlobalSlaveEqn_;
00127   reverse_[0] = first;
00128   for(int i=1; i<nonslave_counter; ++i) {
00129     reverse_[i] = reverse_[i-1] +
00130          (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
00131   }
00132 
00133 #ifndef FEI_SER
00134     MPI_Comm_rank(comm_, &localProc_);
00135     MPI_Comm_size(comm_, &numProcs_);
00136 #endif
00137 
00138   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00139     FEI_OSTREAM& os = *output_stream_;
00140     os << dbgprefix_<< "ctor, numGlobalSlaves="<<numGlobalSlaves_
00141       << FEI_ENDL;
00142   }
00143 
00144   if (numGlobalSlaves_ < 1) {
00145     throw std::runtime_error("ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
00146   }
00147 }
00148 
00149 Reducer::Reducer(fei::SharedPtr<fei::MatrixGraph> matrixGraph)
00150  : csrD_(),
00151    slavesPtr_(NULL),
00152    Kii_(),
00153    Kid_(),
00154    Kdi_(),
00155    Kdd_(),
00156    fi_(),
00157    fd_(),
00158    tmpMat1_(),
00159    tmpMat2_(),
00160    tmpVec1_(),
00161    tmpVec2_(),
00162    csg_(),
00163    g_nonzero_(false),
00164    localUnreducedEqns_(),
00165    localReducedEqns_(),
00166    nonslaves_(),
00167    reverse_(),
00168    isSlaveEqn_(NULL),
00169    numGlobalSlaves_(0),
00170    numLocalSlaves_(0),
00171    firstLocalReducedEqn_(0),
00172    lastLocalReducedEqn_(0),
00173    lowestGlobalSlaveEqn_(0),
00174    highestGlobalSlaveEqn_(0),
00175    localProc_(0),
00176    numProcs_(1),
00177    comm_(),
00178    dbgprefix_("Reducer: "),
00179    mat_counter_(0),
00180    rhs_vec_counter_(0),
00181    bool_array_(0),
00182    int_array_(0),
00183    double_array_(0),
00184    array_len_(0)
00185 {
00186   fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph->getRowSpace();
00187   comm_ = vecSpace->getCommunicator();
00188   initialize();
00189 
00190   std::vector<int> indices;
00191   vecSpace->getIndices_Owned(indices);
00192   setLocalUnreducedEqns(indices);
00193 }
00194 
00195 Reducer::~Reducer()
00196 {
00197   delete [] isSlaveEqn_;   isSlaveEqn_ = 0;
00198   delete [] bool_array_;   bool_array_ = 0;
00199   delete [] int_array_;    int_array_ = 0;
00200   delete [] double_array_; double_array_ = 0;
00201   array_len_ = 0;
00202 }
00203 
00204 void
00205 Reducer::setLocalUnreducedEqns(const std::vector<int>& localUnreducedEqns)
00206 {
00207   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00208     FEI_OSTREAM& os = *output_stream_;
00209     os << dbgprefix_<< "setLocalUnreducedEqns, numLocalEqns="
00210       <<localUnreducedEqns.size() << FEI_ENDL;
00211   }
00212 
00213   if (localUnreducedEqns_ == localUnreducedEqns) {
00214     return;
00215   }
00216 
00217   localUnreducedEqns_ = localUnreducedEqns;
00218 
00219   int num = localUnreducedEqns_.size();
00220 
00221   if (isSlaveEqn_ != NULL) delete [] isSlaveEqn_;
00222 
00223   isSlaveEqn_ = num > 0 ? new bool[localUnreducedEqns_.size()] : NULL;
00224 
00225   numLocalSlaves_ = 0;
00226 
00227   for(size_t i=0; i<localUnreducedEqns_.size(); ++i) {
00228     int idx = fei::binarySearch(localUnreducedEqns_[i],
00229                                     slavesPtr_, numGlobalSlaves_);
00230     if (idx < 0) {
00231       isSlaveEqn_[i] = false;
00232     }
00233     else {
00234       isSlaveEqn_[i] = true;
00235       ++numLocalSlaves_;
00236 
00237       if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00238         FEI_OSTREAM& os = *output_stream_;
00239         os << dbgprefix_<<" slave " << localUnreducedEqns_[i] << " depends on ";
00240         int offset = csrD_.getGraph().rowOffsets[idx];
00241         int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
00242         int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
00243         for(int j=0; j<rowlen; ++j) {
00244           os << indices[j] << " ";
00245         }
00246         os << FEI_ENDL;
00247       }
00248     }
00249 
00250   }
00251 
00252   int num_slaves_on_lower_procs = 0;
00253 
00254 
00255 #ifndef FEI_SER
00256 
00257   if (numProcs_ > 1) {
00258     std::vector<int> procNumLocalSlaves(numProcs_);
00259 
00260     MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
00261                   1, MPI_INT, comm_);
00262 
00263     for(int p=0; p<localProc_; ++p) {
00264       num_slaves_on_lower_procs += procNumLocalSlaves[p];
00265     }
00266   }
00267 #endif
00268 
00269   unsigned first_non_slave_offset = 0;
00270   while(first_non_slave_offset < localUnreducedEqns_.size() &&
00271         isSlaveEqn_[first_non_slave_offset] == true) {
00272     ++first_non_slave_offset;
00273   }
00274 
00275   firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
00276       - num_slaves_on_lower_procs - first_non_slave_offset;
00277 
00278   int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
00279 
00280   lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
00281 
00282   localReducedEqns_.resize(num_local_eqns);
00283 
00284   unsigned offset = 0;
00285   int eqn = firstLocalReducedEqn_;
00286   for(unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
00287     if (isSlaveEqn_[i] == false) {
00288       localReducedEqns_[offset++] = eqn++;
00289     }
00290   }
00291 
00292   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00293     FEI_OSTREAM& os = *output_stream_;
00294     if (localUnreducedEqns_.size() > 0) {
00295       os << dbgprefix_<< "first local eqn="
00296          <<localUnreducedEqns_[0]<<", last local eqn="
00297       <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
00298     }
00299     os << dbgprefix_<<"numLocalSlaves_="<<numLocalSlaves_
00300        <<", firstLocalReducedEqn_="<<firstLocalReducedEqn_
00301        <<", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
00302   }
00303 }
00304 
00305 void
00306 Reducer::addGraphEntries(fei::SharedPtr<fei::SparseRowGraph> matrixGraph)
00307 {
00308   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00309     FEI_OSTREAM& os = *output_stream_;
00310     os << dbgprefix_<<"addGraphEntries"<<FEI_ENDL;
00311   }
00312 
00313   //iterate through the incoming matrixGraph, putting its contents into
00314   //Kdd, Kdi, Kid and Kii as appropriate.
00315 
00316   bool create_if_necessary = true;
00317 
00318   std::vector<int>& rowNumbers = matrixGraph->rowNumbers;
00319   std::vector<int>& rowOffsets = matrixGraph->rowOffsets;
00320   std::vector<int>& packedCols = matrixGraph->packedColumnIndices;
00321 
00322   for(unsigned i=0; i<rowNumbers.size(); ++i) {
00323     int row = rowNumbers[i];
00324 
00325     bool slave_row = isSlaveEqn(row);
00326 
00327     int rowLength = rowOffsets[i+1]-rowOffsets[i];
00328     int* cols = &packedCols[rowOffsets[i]];
00329 
00330     if (slave_row) {
00331       fei::FillableVec* Kdd_row = Kdd_.getRow(row, create_if_necessary);
00332       fei::FillableVec* Kdi_row = Kdi_.getRow(row, create_if_necessary);
00333 
00334       for(int j=0; j<rowLength; ++j) {
00335         int col = cols[j];
00336         bool slave_col = isSlaveEqn(col);
00337 
00338         if (slave_col) {
00339           Kdd_row->addEntry(col, 0.0);
00340         }
00341         else {
00342           Kdi_row->addEntry(col, 0.0);
00343         }
00344       }
00345     }
00346     else {
00347       //not a slave row, so add slave columns to Kid, and non-slave
00348       //columns to graph.
00349       fei::FillableVec* Kid_row = Kid_.getRow(row, create_if_necessary);
00350       fei::FillableVec* Kii_row = Kii_.getRow(row, create_if_necessary);
00351 
00352       for(int j=0; j<rowLength; ++j) {
00353         int col = cols[j];
00354         bool slave_col = isSlaveEqn(col);
00355 
00356         if (slave_col) {
00357           Kid_row->addEntry(col, 0.0);
00358         }
00359         else {
00360           Kii_row->addEntry(col, 0.0);
00361         }
00362       }
00363     }
00364   }
00365 }
00366 
00367 void
00368 Reducer::expand_work_arrays(int size)
00369 {
00370   if (size <= array_len_) return;
00371 
00372   array_len_ = size;
00373   delete [] bool_array_;
00374   delete [] int_array_;
00375   delete [] double_array_;
00376   bool_array_ = new bool[array_len_];
00377   int_array_ = new int[array_len_];
00378   double_array_ = new double[array_len_];
00379 }
00380 
00381 void
00382 Reducer::addGraphIndices(int numRows, const int* rows,
00383                          int numCols, const int* cols,
00384                          fei::Graph& graph)
00385 {
00386   bool create_if_necessary = true;
00387 
00388   expand_work_arrays(numCols);
00389 
00390   bool no_slave_cols = true;
00391   for(int i=0; i<numCols; ++i) {
00392     bool_array_[i] = isSlaveEqn(cols[i]);
00393     if (bool_array_[i]) no_slave_cols = false;
00394   }
00395 
00396   for(int i=0; i<numRows; ++i) {
00397     bool slave_row = isSlaveEqn(rows[i]);
00398 
00399     if (slave_row) {
00400       fei::FillableVec* Kdd_row = Kdd_.getRow(rows[i], create_if_necessary);
00401       fei::FillableVec* Kdi_row = Kdi_.getRow(rows[i], create_if_necessary);
00402 
00403       for(int j=0; j<numCols; ++j) {
00404         if (bool_array_[j]) {
00405           Kdd_row->addEntry(cols[j], 0.0);
00406         }
00407         else {
00408           Kdi_row->addEntry(cols[j], 0.0);
00409         }
00410       }
00411       ++mat_counter_;
00412     }
00413     else {
00414       //not a slave row, so add slave columns to Kid, and non-slave
00415       //columns to graph.
00416       fei::FillableVec* Kid_row = no_slave_cols ?
00417         NULL : Kid_.getRow(rows[i], create_if_necessary);
00418   
00419       unsigned num_non_slave_cols = 0;
00420 
00421       for(int j=0; j<numCols; ++j) {
00422         if (bool_array_[j]) {
00423           Kid_row->addEntry(cols[j], 0.0);
00424           ++mat_counter_;
00425         }
00426         else {
00427           int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00428         }
00429       }
00430 
00431       if (num_non_slave_cols > 0) {
00432         int reduced_row = translateToReducedEqn(rows[i]);
00433         graph.addIndices(reduced_row, num_non_slave_cols, int_array_);
00434       }
00435     }
00436   }
00437 
00438   if (mat_counter_ > 600) {
00439     assembleReducedGraph(&graph, false);
00440   }
00441 }
00442 
00443 void
00444 Reducer::addSymmetricGraphIndices(int numIndices, const int* indices,
00445                                   bool diagonal,
00446                                   fei::Graph& graph)
00447 {
00448   if (diagonal) {
00449     for(int i=0; i<numIndices; ++i) {
00450       addGraphIndices(1, &indices[i], 1, &indices[i], graph);
00451     }
00452   }
00453   else {
00454     addGraphIndices(numIndices, indices, numIndices, indices, graph);
00455   }
00456 }
00457 
00458 void
00459 Reducer::assembleReducedGraph(fei::Graph* graph,
00460                               bool global_gather)
00461 {
00462   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00463     FEI_OSTREAM& os = *output_stream_;
00464     os << dbgprefix_<<"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
00465     if (output_level_ >= fei::FULL_LOGS) {
00466       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00467     }
00468   }
00469 
00470   //This function performs the appropriate manipulations (matrix-matrix
00471   //products, etc., on the Kid,Kdi,Kdd matrices and then assembles the
00472   //results into the input graph structure.
00473   //
00474 
00475   //form tmpMat1_ = Kid*D
00476   csrKid = Kid_;
00477   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_, true);
00478 
00479   csrKdi = Kdi_;
00480   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_, true);
00481 
00482   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00483   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00484 
00485   fei::impl_utils::add_to_graph(tmpMat1_, *graph);
00486   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00487 
00488   //form tmpMat1_ = D^T*Kdd
00489   csrKdd = Kdd_;
00490   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_, true);
00491 
00492   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd*D
00493   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_, true);
00494 
00495   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00496 
00497   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00498 
00499   //lastly, translate Kii and add it to the graph.
00500   csrKii = Kii_;
00501   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00502   fei::impl_utils::add_to_graph(csrKii, *graph);
00503 
00504   Kii_.clear();
00505   Kdi_.clear();
00506   Kid_.clear();
00507   Kdd_.clear();
00508 
00509   mat_counter_ = 0;
00510 
00511   if (global_gather) {
00512     graph->gatherFromOverlap();
00513   }
00514 }
00515 
00516 void
00517 Reducer::assembleReducedGraph(fei::SparseRowGraph* srgraph)
00518 {
00519   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00520     FEI_OSTREAM& os = *output_stream_;
00521     os << dbgprefix_<<"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
00522   }
00523 
00524   fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
00525   assembleReducedGraph(&graph);
00526   fei::copyToSparseRowGraph(*(graph.getLocalGraph()), *srgraph);
00527 }
00528 
00529 void
00530 Reducer::assembleReducedMatrix(fei::Matrix& matrix)
00531 {
00532   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00533     FEI_OSTREAM& os = *output_stream_;
00534     os << dbgprefix_<<"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
00535     if (output_level_ >= fei::FULL_LOGS) {
00536       os << dbgprefix_<<"Kid:"<<FEI_ENDL<<Kid_;
00537       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00538     }
00539   }
00540 
00541   //form tmpMat1_ = Kid_*D
00542   csrKid = Kid_;
00543 
00544   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_);
00545 
00546   //form tmpMat2_ = D^T*Kdi_
00547   csrKdi = Kdi_;
00548   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_);
00549 
00550   //accumulate the above two results into the global system matrix.
00551   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00552 
00553   fei::impl_utils::add_to_matrix(tmpMat1_, true, matrix);
00554 
00555   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00556   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00557 
00558   //form tmpMat1_ = D^T*Kdd_
00559   csrKdd = Kdd_;
00560   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_);
00561 
00562   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd_*D
00563   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_);
00564 
00565   if (g_nonzero_) {
00566     //form tmpVec1_ = Kid_*g_
00567     fei::multiply_CSRMat_CSVec(csrKid, csg_, tmpVec1_);
00568 
00569     //add tmpVec1_ to fi_
00570     csfi = fi_;
00571     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00572 
00573     //we already have tmpMat1_ = D^T*Kdd which was computed above, and we need
00574     //to form tmpVec1_ = D^T*Kdd*g_.
00575     //So we can simply form tmpVec1_ = tmpMat1_*g_.
00576     fei::multiply_CSRMat_CSVec(tmpMat1_, csg_, tmpVec1_);
00577 
00578     //now add tmpVec1_ to the right-hand-side fi_
00579     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00580   }
00581 
00582   //accumulate tmpMat2_ = D^T*Kdd_*D into the global system matrix.
00583   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00584   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00585 
00586   //lastly, translate Kii and add it to the graph.
00587   csrKii = Kii_;
00588   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00589   fei::impl_utils::add_to_matrix(csrKii, true, matrix);
00590 
00591   Kii_.clear();
00592   Kdi_.clear();
00593   Kid_.clear();
00594   Kdd_.clear();
00595 
00596   mat_counter_ = 0;
00597 }
00598 
00599 bool
00600 Reducer::isSlaveEqn(int unreducedEqn) const
00601 {
00602   int num = localUnreducedEqns_.size();
00603 
00604   int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
00605   if (offset < 0 || offset >= (int)localUnreducedEqns_.size()) {
00606     return(isSlaveCol(unreducedEqn));
00607   }
00608 
00609   return(isSlaveEqn_[offset]);
00610 }
00611 
00612 void
00613 Reducer::getSlaveMasterEqns(int slaveEqn, std::vector<int>& masterEqns)
00614 {
00615   masterEqns.clear();
00616 
00617   std::vector<int>& rows = csrD_.getGraph().rowNumbers;
00618 
00619   std::vector<int>::iterator iter =
00620     std::lower_bound(rows.begin(), rows.end(), slaveEqn);
00621 
00622   if (iter == rows.end() || *iter != slaveEqn) {
00623     return;
00624   }
00625 
00626   size_t offset = iter - rows.begin();
00627 
00628   int rowBegin = csrD_.getGraph().rowOffsets[offset];
00629   int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
00630   std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
00631 
00632   for(int j=rowBegin; j<rowEnd; ++j) {
00633     masterEqns.push_back(cols[j]);
00634   }
00635 }
00636 
00637 bool
00638 Reducer::isSlaveCol(int unreducedEqn) const
00639 {
00640   int idx = fei::binarySearch(unreducedEqn,
00641                                   slavesPtr_, numGlobalSlaves_);
00642   
00643   return(idx>=0);
00644 }
00645 
00646 int
00647 Reducer::translateToReducedEqn(int eqn) const
00648 {
00649   if (eqn < lowestGlobalSlaveEqn_) {
00650     return(eqn);
00651   }
00652 
00653   if (eqn > highestGlobalSlaveEqn_) {
00654     return(eqn - numGlobalSlaves_);
00655   }
00656 
00657   int index = 0;
00658   int foundOffset = fei::binarySearch(eqn, slavesPtr_, numGlobalSlaves_,
00659                                           index);
00660 
00661   if (foundOffset >= 0) {
00662     throw std::runtime_error("Reducer::translateToReducedEqn ERROR, input is slave eqn.");
00663   }
00664 
00665   return(eqn - index);
00666 }
00667 
00668 int
00669 Reducer::translateFromReducedEqn(int reduced_eqn) const
00670 {
00671   int index = -1;
00672   int offset = fei::binarySearch(reduced_eqn, &reverse_[0],
00673                                      reverse_.size(), index);
00674   if (offset >= 0) {
00675     return(nonslaves_[offset]);
00676   }
00677 
00678   if (index == 0) {
00679     return(reduced_eqn);
00680   }
00681 
00682   int adjustment = reduced_eqn - reverse_[index-1];
00683 
00684   return(nonslaves_[index-1] + adjustment);
00685 }
00686 
00687 int
00688 Reducer::addMatrixValues(int numRows, const int* rows,
00689                          int numCols, const int* cols,
00690                          const double* const* values,
00691                          bool sum_into,
00692                          fei::Matrix& feimat,
00693                          int format)
00694 {
00695   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00696     FEI_OSTREAM& os = *output_stream_;
00697     os << dbgprefix_<<"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
00698   }
00699 
00700   expand_work_arrays(numCols+numRows);
00701 
00702   const double** myvalues = const_cast<const double**>(values);
00703   if (format != FEI_DENSE_ROW) {
00704     if (format != FEI_DENSE_COL) {
00705       throw std::runtime_error("fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
00706     }
00707 
00708     fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
00709                                                 work_1D_, work_2D_);
00710     myvalues = &work_2D_[0];
00711   }
00712 
00713   bool no_slave_cols = true;
00714   unsigned num_non_slave_cols = 0;
00715   for(int j=0; j<numCols; ++j) {
00716     bool_array_[j] = isSlaveEqn(cols[j]);
00717     if (bool_array_[j]) no_slave_cols = false;
00718     else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00719   }
00720 
00721   bool no_slave_rows = true;
00722   for(int i=0; i<numRows; ++i) {
00723     bool_array_[numCols+i] = isSlaveEqn(rows[i]);
00724     if (bool_array_[numCols+i]) no_slave_rows = false;
00725     else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
00726   }
00727 
00728   if (no_slave_rows && no_slave_cols) {
00729     if (sum_into) {
00730       feimat.sumIn(numRows, int_array_+numCols,
00731                    numCols, int_array_, myvalues, FEI_DENSE_ROW);
00732     }
00733     else {
00734       feimat.copyIn(numRows, int_array_+numCols,
00735                     numCols, int_array_, myvalues, FEI_DENSE_ROW);
00736     }
00737 
00738     return(0);
00739   }
00740 
00741   for(int i=0; i<numRows; ++i) {
00742     if (bool_array_[numCols+i]) {
00743       //slave row: slave columns go into Kdd, non-slave columns go
00744       //into Kdi.
00745       fei::FillableVec* Kdd_row = Kdd_.getRow(rows[i], true);
00746       fei::FillableVec* Kdi_row = Kdi_.getRow(rows[i], true);
00747 
00748       for(int j=0; j<numCols; ++j) {
00749         if (bool_array_[j]) {
00750           Kdd_row->addEntry(cols[j], myvalues[i][j]);
00751         }
00752         else {
00753           Kdi_row->addEntry(cols[j], myvalues[i][j]);
00754         }
00755       }
00756       ++mat_counter_;
00757     }
00758     else {//not slave row
00759       if (no_slave_cols) {
00760         int reduced_row = int_array_[numCols+i];
00761         const double* rowvals = myvalues[i];
00762         if (sum_into) {
00763           feimat.sumIn(1, &reduced_row, numCols, int_array_,
00764                        &rowvals, format);
00765         }
00766         else {
00767           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00768                         &rowvals, format);
00769         }
00770         continue;
00771       }
00772 
00773       //put non-slave columns into Kii,
00774       //and slave columns into Kid.
00775       fei::FillableVec* Kid_row = Kid_.getRow(rows[i], true);
00776 
00777       unsigned offset = 0;
00778       for(int j=0; j<numCols; ++j) {
00779         if (bool_array_[j]) {
00780           Kid_row->addEntry(cols[j], myvalues[i][j]);
00781           ++mat_counter_;
00782         }
00783         else {
00784           double_array_[offset++] = myvalues[i][j];
00785         }
00786       }
00787 
00788       if (num_non_slave_cols > 0) {
00789         int reduced_row = int_array_[numCols+i];
00790         if (sum_into) {
00791           feimat.sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
00792                        &double_array_, format);
00793         }
00794         else {
00795           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00796                         &double_array_, format);
00797         }
00798       }
00799     }
00800   }
00801 
00802   if (mat_counter_ > 600) {
00803     assembleReducedMatrix(feimat);
00804   }
00805 
00806   return(0);
00807 }
00808 
00809 int
00810 Reducer::addVectorValues(int numValues,
00811                          const int* globalIndices,
00812                          const double* values,
00813                          bool sum_into,
00814                          bool soln_vector,
00815                          int vectorIndex,
00816                          fei::Vector& feivec)
00817 {
00818   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00819     FEI_OSTREAM& os = *output_stream_;
00820     os << dbgprefix_<<"addVectorValues(fei::Vector)"<<FEI_ENDL;
00821   }
00822 
00823   for(int i=0; i<numValues; ++i) {
00824     if (isSlaveEqn(globalIndices[i])) {
00825       if (sum_into) {
00826         if (!soln_vector) fd_.addEntry(globalIndices[i], values[i]);
00827       }
00828       else {
00829         if (!soln_vector) fd_.putEntry(globalIndices[i], values[i]);
00830       }
00831       if (!soln_vector) ++rhs_vec_counter_;
00832     }
00833     else {
00834       int reduced_index = translateToReducedEqn(globalIndices[i]);
00835 
00836       if (sum_into) {
00837         feivec.sumIn(1, &reduced_index, &values[i], vectorIndex);
00838       }
00839       else {
00840         feivec.copyIn(1, &reduced_index, &values[i], vectorIndex);
00841       }
00842     }
00843   }
00844 
00845   if (rhs_vec_counter_ > 600) {
00846     assembleReducedVector(soln_vector, feivec, sum_into);
00847   }
00848 
00849   return(0);
00850 }
00851 
00852 void
00853 Reducer::assembleReducedVector(bool soln_vector,
00854                                fei::Vector& feivec,
00855                                bool sum_into)
00856 {
00857   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00858     FEI_OSTREAM& os = *output_stream_;
00859     os << dbgprefix_<<"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
00860   }
00861 
00862   if (soln_vector) {
00863     return;
00864   }
00865 
00866   fei::FillableVec& vec = fd_;
00867 
00868   if (vec.size() > 0) {
00869     //form tmpVec1 = D^T*vec.
00870     csvec = vec;
00871     fei::multiply_trans_CSRMat_CSVec(csrD_, csvec, tmpVec1_);
00872 
00873     vec.clear();
00874 
00875     fei::impl_utils::translate_to_reduced_eqns(*this, tmpVec1_);
00876 
00877     int which_vector = 0;
00878     if (sum_into) {
00879       feivec.sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00880                    &(tmpVec1_.coefs()[0]), which_vector);
00881     }
00882     else {
00883       feivec.copyIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00884                     &(tmpVec1_.coefs()[0]), which_vector);
00885     }
00886   }
00887 
00888   fei::FillableVec& vec_i = fi_;
00889 
00890   if (vec_i.size() > 0) {
00891     csvec_i = vec_i;
00892     fei::impl_utils::translate_to_reduced_eqns(*this, csvec_i);
00893 
00894     int which_vector = 0;
00895     if (sum_into) {
00896       feivec.sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
00897                    &(csvec_i.coefs()[0]), which_vector);
00898     }
00899     else {
00900       feivec.copyIn(csvec_i.size(), &(csvec_i.indices()[0]),
00901                     &(csvec_i.coefs()[0]), which_vector);
00902     }
00903 
00904     vec_i.clear();
00905   }
00906 
00907   rhs_vec_counter_ = 0;
00908 }
00909 
00910 int
00911 Reducer::copyOutVectorValues(int numValues,
00912                              const int* globalIndices,
00913                              double* values,
00914                              bool soln_vector,
00915                              int vectorIndex,
00916                              fei::Vector& feivec)
00917 {
00918   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00919     FEI_OSTREAM& os = *output_stream_;
00920     os << dbgprefix_<<"copyOutVectorValues"<<FEI_ENDL;
00921   }
00922 
00923   tmpVec1_.clear();
00924   tmpVec2_.clear();
00925   std::vector<int> reduced_indices;
00926   std::vector<int> offsets;
00927 
00928   for(int i=0; i<numValues; ++i) {
00929     if (isSlaveEqn(globalIndices[i])) {
00930       fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
00931       offsets.push_back(i);
00932     }
00933     else {
00934       int reduced_idx = translateToReducedEqn(globalIndices[i]);
00935       feivec.copyOut(1, &reduced_idx, &values[i], vectorIndex);
00936     }
00937   }
00938 
00939   if (tmpVec1_.size() > 0) {
00940     fei::multiply_trans_CSRMat_CSVec(csrD_, tmpVec1_, tmpVec2_);
00941     int* tmpVec2Indices = &(tmpVec2_.indices()[0]);
00942     for(size_t i=0; i<tmpVec2_.size(); ++i) {
00943       reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
00944     }
00945 
00946     feivec.copyOut(tmpVec2_.size(), &reduced_indices[0],
00947                    &(tmpVec2_.coefs()[0]), vectorIndex);
00948 
00949     fei::multiply_CSRMat_CSVec(csrD_, tmpVec2_, tmpVec1_);
00950 
00951     if (g_nonzero_) {
00952       int* ginds = &(csg_.indices()[0]);
00953       double* gcoefs = &(csg_.coefs()[0]);
00954       for(size_t ii=0; ii<csg_.size(); ++ii) {
00955         fei::add_entry(tmpVec1_, ginds[ii], -gcoefs[ii]);
00956       }
00957     }
00958 
00959     int len = tmpVec1_.size();
00960     int* indices = &(tmpVec1_.indices()[0]);
00961     double* coefs = &(tmpVec1_.coefs()[0]);
00962 
00963     for(unsigned ii=0; ii<offsets.size(); ++ii) {
00964       int index = globalIndices[offsets[ii]];
00965       int idx = fei::binarySearch(index, indices, len);
00966       if (idx < 0) {
00967         continue;
00968       }
00969 
00970       values[offsets[ii]] = coefs[idx];
00971     }
00972   }
00973 
00974   return(0);
00975 }
00976 
00977 std::vector<int>&
00978 Reducer::getLocalReducedEqns()
00979 {
00980   return(localReducedEqns_);
00981 }
00982 
00983 }//namespace fei
00984 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends