fei_Reducer.cpp

00001 
00002 /*--------------------------------------------------------------------*/
00003 /*    Copyright 2006 Sandia Corporation.                              */
00004 /*    Under the terms of Contract DE-AC04-94AL85000, there is a       */
00005 /*    non-exclusive license for use of this work by or on behalf      */
00006 /*    of the U.S. Government.  Export of this program may require     */
00007 /*    a license from the United States Government.                    */
00008 /*--------------------------------------------------------------------*/
00009 
00010 #include <fei_Reducer.hpp>
00011 #include <fei_MatrixGraph.hpp>
00012 #include <fei_Matrix.hpp>
00013 #include <fei_Matrix_core.hpp>
00014 #include <fei_Vector.hpp>
00015 #include <fei_Graph_Impl.hpp>
00016 #include <snl_fei_ArrayUtils.hpp>
00017 #include <fei_TemplateUtils.hpp>
00018 #include <fei_SparseRowGraph.hpp>
00019 #include <fei_Vector.hpp>
00020 #include <fei_impl_utils.hpp>
00021 
00022 namespace fei {
00023 
00024 Reducer::Reducer(fei::SharedPtr<FillableMat> globalSlaveDependencyMatrix,
00025                  fei::SharedPtr<CSVec> g_vector,
00026                  MPI_Comm comm)
00027  : csrD_(),
00028    slavesPtr_(NULL),
00029    Kii_(),
00030    Kid_(),
00031    Kdi_(),
00032    Kdd_(),
00033    csrKii(),
00034    csrKid(),
00035    csrKdi(),
00036    csrKdd(),
00037    fi_(),
00038    fd_(),
00039    csfi(),
00040    csvec(),
00041    csvec_i(),
00042    tmpMat1_(),
00043    tmpMat2_(),
00044    tmpVec1_(),
00045    tmpVec2_(),
00046    csg_(),
00047    g_nonzero_(false),
00048    localUnreducedEqns_(),
00049    localReducedEqns_(),
00050    nonslaves_(),
00051    reverse_(),
00052    isSlaveEqn_(NULL),
00053    numGlobalSlaves_(0),
00054    numLocalSlaves_(0),
00055    firstLocalReducedEqn_(0),
00056    lastLocalReducedEqn_(0),
00057    lowestGlobalSlaveEqn_(0),
00058    highestGlobalSlaveEqn_(0),
00059    localProc_(0),
00060    numProcs_(1),
00061    comm_(comm),
00062    dbgprefix_("Reducer: "),
00063    mat_counter_(0),
00064    rhs_vec_counter_(0),
00065    bool_array_(0),
00066    int_array_(0),
00067    double_array_(0),
00068    array_len_(0),
00069    work_1D_(),
00070    work_2D_()
00071 {
00072   csrD_ = *globalSlaveDependencyMatrix;
00073   if (g_vector.get() != NULL) {
00074     csg_ = *g_vector;
00075   }
00076 
00077   initialize();
00078 }
00079 
00080 void
00081 Reducer::initialize()
00082 {
00083   numGlobalSlaves_ = csrD_.getNumRows();
00084   slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
00085   lowestGlobalSlaveEqn_ = slavesPtr_[0];
00086   highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
00087 
00088   if (csg_.size() > 0) {
00089     double* gptr = &(csg_.coefs()[0]);
00090     for(size_t i=0; i<csg_.size(); ++i) {
00091       if (gptr[i] != 0.0) {
00092         g_nonzero_ = true;
00093         break;
00094       }
00095     }
00096   }
00097 
00098   //The following code sets up the mappings (nonslaves_ and reverse_)
00099   //necessary to implement the method 'translateFromReducedEqn'.
00100   //This code is ugly and inelegant, and it took me quite a while to get
00101   //it right. I won't even bother trying to comment it, but note that its
00102   //correctness is tested in test_utils/test_Reducer.cpp.
00103 
00104   nonslaves_.resize(numGlobalSlaves_*2);
00105 
00106   int nonslave_counter = 0;
00107   int slaveOffset = 0;
00108   int num_consecutive = 0;
00109   while(slaveOffset < numGlobalSlaves_-1) {
00110     int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
00111     if (gap > 0) {
00112       nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
00113       nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00114       num_consecutive = 0;
00115     }
00116     else {
00117       ++num_consecutive;
00118     }
00119     ++slaveOffset;
00120   }
00121 
00122   nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
00123   nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00124 
00125   reverse_.resize(nonslave_counter);
00126   int first = lowestGlobalSlaveEqn_;
00127   reverse_[0] = first;
00128   for(int i=1; i<nonslave_counter; ++i) {
00129     reverse_[i] = reverse_[i-1] +
00130          (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
00131   }
00132 
00133 #ifndef FEI_SER
00134     MPI_Comm_rank(comm_, &localProc_);
00135     MPI_Comm_size(comm_, &numProcs_);
00136 #endif
00137 
00138   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00139     FEI_OSTREAM& os = *output_stream_;
00140     os << dbgprefix_<< "ctor, numGlobalSlaves="<<numGlobalSlaves_
00141       << FEI_ENDL;
00142   }
00143 
00144   if (numGlobalSlaves_ < 1) {
00145     throw std::runtime_error("ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
00146   }
00147 }
00148 
00149 Reducer::Reducer(fei::SharedPtr<fei::MatrixGraph> matrixGraph)
00150  : csrD_(),
00151    slavesPtr_(NULL),
00152    Kii_(),
00153    Kid_(),
00154    Kdi_(),
00155    Kdd_(),
00156    fi_(),
00157    fd_(),
00158    tmpMat1_(),
00159    tmpMat2_(),
00160    tmpVec1_(),
00161    tmpVec2_(),
00162    csg_(),
00163    g_nonzero_(false),
00164    localUnreducedEqns_(),
00165    localReducedEqns_(),
00166    nonslaves_(),
00167    reverse_(),
00168    isSlaveEqn_(NULL),
00169    numGlobalSlaves_(0),
00170    numLocalSlaves_(0),
00171    firstLocalReducedEqn_(0),
00172    lastLocalReducedEqn_(0),
00173    lowestGlobalSlaveEqn_(0),
00174    highestGlobalSlaveEqn_(0),
00175    localProc_(0),
00176    numProcs_(1),
00177    comm_(),
00178    dbgprefix_("Reducer: "),
00179    mat_counter_(0),
00180    rhs_vec_counter_(0),
00181    bool_array_(0),
00182    int_array_(0),
00183    double_array_(0),
00184    array_len_(0)
00185 {
00186   fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph->getRowSpace();
00187   comm_ = vecSpace->getCommunicator();
00188   initialize();
00189 
00190   int num = vecSpace->getNumIndices_Owned();
00191   std::vector<int> indices(num);
00192   vecSpace->getIndices_Owned(num, &indices[0], num);
00193   setLocalUnreducedEqns(indices);
00194 }
00195 
00196 Reducer::~Reducer()
00197 {
00198   delete [] isSlaveEqn_;   isSlaveEqn_ = 0;
00199   delete [] bool_array_;   bool_array_ = 0;
00200   delete [] int_array_;    int_array_ = 0;
00201   delete [] double_array_; double_array_ = 0;
00202   array_len_ = 0;
00203 }
00204 
00205 void
00206 Reducer::setLocalUnreducedEqns(const std::vector<int>& localUnreducedEqns)
00207 {
00208   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00209     FEI_OSTREAM& os = *output_stream_;
00210     os << dbgprefix_<< "setLocalUnreducedEqns, numLocalEqns="
00211       <<localUnreducedEqns.size() << FEI_ENDL;
00212   }
00213 
00214   if (localUnreducedEqns_ == localUnreducedEqns) {
00215     return;
00216   }
00217 
00218   localUnreducedEqns_ = localUnreducedEqns;
00219 
00220   int num = localUnreducedEqns_.size();
00221 
00222   if (isSlaveEqn_ != NULL) delete [] isSlaveEqn_;
00223 
00224   isSlaveEqn_ = num > 0 ? new bool[localUnreducedEqns_.size()] : NULL;
00225 
00226   numLocalSlaves_ = 0;
00227 
00228   for(size_t i=0; i<localUnreducedEqns_.size(); ++i) {
00229     int idx = snl_fei::binarySearch(localUnreducedEqns_[i],
00230                                     slavesPtr_, numGlobalSlaves_);
00231     if (idx < 0) {
00232       isSlaveEqn_[i] = false;
00233     }
00234     else {
00235       isSlaveEqn_[i] = true;
00236       ++numLocalSlaves_;
00237 
00238       if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00239         FEI_OSTREAM& os = *output_stream_;
00240         os << dbgprefix_<<" slave " << localUnreducedEqns_[i] << " depends on ";
00241         int offset = csrD_.getGraph().rowOffsets[idx];
00242         int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
00243         int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
00244         for(int j=0; j<rowlen; ++j) {
00245           os << indices[j] << " ";
00246         }
00247         os << FEI_ENDL;
00248       }
00249     }
00250 
00251   }
00252 
00253   int num_slaves_on_lower_procs = 0;
00254 
00255 
00256 #ifndef FEI_SER
00257 
00258   if (numProcs_ > 1) {
00259     std::vector<int> procNumLocalSlaves(numProcs_);
00260 
00261     MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
00262                   1, MPI_INT, comm_);
00263 
00264     for(int p=0; p<localProc_; ++p) {
00265       num_slaves_on_lower_procs += procNumLocalSlaves[p];
00266     }
00267   }
00268 #endif
00269 
00270   unsigned first_non_slave_offset = 0;
00271   while(first_non_slave_offset < localUnreducedEqns_.size() &&
00272         isSlaveEqn_[first_non_slave_offset] == true) {
00273     ++first_non_slave_offset;
00274   }
00275 
00276   firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
00277       - num_slaves_on_lower_procs - first_non_slave_offset;
00278 
00279   int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
00280 
00281   lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
00282 
00283   localReducedEqns_.resize(num_local_eqns);
00284 
00285   unsigned offset = 0;
00286   int eqn = firstLocalReducedEqn_;
00287   for(unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
00288     if (isSlaveEqn_[i] == false) {
00289       localReducedEqns_[offset++] = eqn++;
00290     }
00291   }
00292 
00293   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00294     FEI_OSTREAM& os = *output_stream_;
00295     if (localUnreducedEqns_.size() > 0) {
00296       os << dbgprefix_<< "first local eqn="
00297          <<localUnreducedEqns_[0]<<", last local eqn="
00298       <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
00299     }
00300     os << dbgprefix_<<"numLocalSlaves_="<<numLocalSlaves_
00301        <<", firstLocalReducedEqn_="<<firstLocalReducedEqn_
00302        <<", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
00303   }
00304 }
00305 
00306 void
00307 Reducer::addGraphEntries(fei::SharedPtr<fei::SparseRowGraph> matrixGraph)
00308 {
00309   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00310     FEI_OSTREAM& os = *output_stream_;
00311     os << dbgprefix_<<"addGraphEntries"<<FEI_ENDL;
00312   }
00313 
00314   //iterate through the incoming matrixGraph, putting its contents into
00315   //Kdd, Kdi, Kid and Kii as appropriate.
00316 
00317   bool create_if_necessary = true;
00318 
00319   std::vector<int>& rowNumbers = matrixGraph->rowNumbers;
00320   std::vector<int>& rowOffsets = matrixGraph->rowOffsets;
00321   std::vector<int>& packedCols = matrixGraph->packedColumnIndices;
00322 
00323   for(unsigned i=0; i<rowNumbers.size(); ++i) {
00324     int row = rowNumbers[i];
00325 
00326     bool slave_row = isSlaveEqn(row);
00327 
00328     int rowLength = rowOffsets[i+1]-rowOffsets[i];
00329     int* cols = &packedCols[rowOffsets[i]];
00330 
00331     if (slave_row) {
00332       fei::FillableVec* Kdd_row = Kdd_.getRow(row, create_if_necessary);
00333       fei::FillableVec* Kdi_row = Kdi_.getRow(row, create_if_necessary);
00334 
00335       for(int j=0; j<rowLength; ++j) {
00336         int col = cols[j];
00337         bool slave_col = isSlaveEqn(col);
00338 
00339         if (slave_col) {
00340           Kdd_row->addEntry(col, 0.0);
00341         }
00342         else {
00343           Kdi_row->addEntry(col, 0.0);
00344         }
00345       }
00346     }
00347     else {
00348       //not a slave row, so add slave columns to Kid, and non-slave
00349       //columns to graph.
00350       fei::FillableVec* Kid_row = Kid_.getRow(row, create_if_necessary);
00351       fei::FillableVec* Kii_row = Kii_.getRow(row, create_if_necessary);
00352 
00353       for(int j=0; j<rowLength; ++j) {
00354         int col = cols[j];
00355         bool slave_col = isSlaveEqn(col);
00356 
00357         if (slave_col) {
00358           Kid_row->addEntry(col, 0.0);
00359         }
00360         else {
00361           Kii_row->addEntry(col, 0.0);
00362         }
00363       }
00364     }
00365   }
00366 }
00367 
00368 void
00369 Reducer::expand_work_arrays(int size)
00370 {
00371   if (size <= array_len_) return;
00372 
00373   array_len_ = size;
00374   delete [] bool_array_;
00375   delete [] int_array_;
00376   delete [] double_array_;
00377   bool_array_ = new bool[array_len_];
00378   int_array_ = new int[array_len_];
00379   double_array_ = new double[array_len_];
00380 }
00381 
00382 void
00383 Reducer::addGraphIndices(int numRows, const int* rows,
00384                          int numCols, const int* cols,
00385                          fei::Graph& graph)
00386 {
00387   bool create_if_necessary = true;
00388 
00389   expand_work_arrays(numCols);
00390 
00391   bool no_slave_cols = true;
00392   for(int i=0; i<numCols; ++i) {
00393     bool_array_[i] = isSlaveEqn(cols[i]);
00394     if (bool_array_[i]) no_slave_cols = false;
00395   }
00396 
00397   for(int i=0; i<numRows; ++i) {
00398     bool slave_row = isSlaveEqn(rows[i]);
00399 
00400     if (slave_row) {
00401       fei::FillableVec* Kdd_row = Kdd_.getRow(rows[i], create_if_necessary);
00402       fei::FillableVec* Kdi_row = Kdi_.getRow(rows[i], create_if_necessary);
00403 
00404       for(int j=0; j<numCols; ++j) {
00405         if (bool_array_[j]) {
00406           Kdd_row->addEntry(cols[j], 0.0);
00407         }
00408         else {
00409           Kdi_row->addEntry(cols[j], 0.0);
00410         }
00411       }
00412       ++mat_counter_;
00413     }
00414     else {
00415       //not a slave row, so add slave columns to Kid, and non-slave
00416       //columns to graph.
00417       fei::FillableVec* Kid_row = no_slave_cols ?
00418         NULL : Kid_.getRow(rows[i], create_if_necessary);
00419   
00420       unsigned num_non_slave_cols = 0;
00421 
00422       for(int j=0; j<numCols; ++j) {
00423         if (bool_array_[j]) {
00424           Kid_row->addEntry(cols[j], 0.0);
00425           ++mat_counter_;
00426         }
00427         else {
00428           int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00429         }
00430       }
00431 
00432       if (num_non_slave_cols > 0) {
00433         int reduced_row = translateToReducedEqn(rows[i]);
00434         graph.addIndices(reduced_row, num_non_slave_cols, int_array_);
00435       }
00436     }
00437   }
00438 
00439   if (mat_counter_ > 600) {
00440     assembleReducedGraph(&graph, false);
00441   }
00442 }
00443 
00444 void
00445 Reducer::addSymmetricGraphIndices(int numIndices, const int* indices,
00446                                   bool diagonal,
00447                                   fei::Graph& graph)
00448 {
00449   if (diagonal) {
00450     for(int i=0; i<numIndices; ++i) {
00451       addGraphIndices(1, &indices[i], 1, &indices[i], graph);
00452     }
00453   }
00454   else {
00455     addGraphIndices(numIndices, indices, numIndices, indices, graph);
00456   }
00457 }
00458 
00459 void
00460 Reducer::assembleReducedGraph(fei::Graph* graph,
00461                               bool global_gather)
00462 {
00463   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00464     FEI_OSTREAM& os = *output_stream_;
00465     os << dbgprefix_<<"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
00466     if (output_level_ >= fei::FULL_LOGS) {
00467       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00468     }
00469   }
00470 
00471   //This function performs the appropriate manipulations (matrix-matrix
00472   //products, etc., on the Kid,Kdi,Kdd matrices and then assembles the
00473   //results into the input graph structure.
00474   //
00475 
00476   //form tmpMat1_ = Kid*D
00477   csrKid = Kid_;
00478   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_, true);
00479 
00480   csrKdi = Kdi_;
00481   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_, true);
00482 
00483   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00484   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00485 
00486   fei::impl_utils::add_to_graph(tmpMat1_, *graph);
00487   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00488 
00489   //form tmpMat1_ = D^T*Kdd
00490   csrKdd = Kdd_;
00491   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_, true);
00492 
00493   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd*D
00494   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_, true);
00495 
00496   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00497 
00498   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00499 
00500   //lastly, translate Kii and add it to the graph.
00501   csrKii = Kii_;
00502   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00503   fei::impl_utils::add_to_graph(csrKii, *graph);
00504 
00505   Kii_.clear();
00506   Kdi_.clear();
00507   Kid_.clear();
00508   Kdd_.clear();
00509 
00510   mat_counter_ = 0;
00511 
00512   if (global_gather) {
00513     graph->gatherFromOverlap();
00514   }
00515 }
00516 
00517 void
00518 Reducer::assembleReducedGraph(fei::SparseRowGraph* srgraph)
00519 {
00520   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00521     FEI_OSTREAM& os = *output_stream_;
00522     os << dbgprefix_<<"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
00523   }
00524 
00525   fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
00526   assembleReducedGraph(&graph);
00527   fei::copyToSparseRowGraph(*(graph.getLocalGraph()), *srgraph);
00528 }
00529 
00530 void
00531 Reducer::assembleReducedMatrix(fei::Matrix& matrix)
00532 {
00533   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00534     FEI_OSTREAM& os = *output_stream_;
00535     os << dbgprefix_<<"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
00536     if (output_level_ >= fei::FULL_LOGS) {
00537       os << dbgprefix_<<"Kid:"<<FEI_ENDL<<Kid_;
00538       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00539     }
00540   }
00541 
00542   //form tmpMat1_ = Kid_*D
00543   csrKid = Kid_;
00544 
00545   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_);
00546 
00547   //form tmpMat2_ = D^T*Kdi_
00548   csrKdi = Kdi_;
00549   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_);
00550 
00551   //accumulate the above two results into the global system matrix.
00552   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00553 
00554   fei::impl_utils::add_to_matrix(tmpMat1_, true, matrix);
00555 
00556   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00557   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00558 
00559   //form tmpMat1_ = D^T*Kdd_
00560   csrKdd = Kdd_;
00561   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_);
00562 
00563   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd_*D
00564   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_);
00565 
00566   if (g_nonzero_) {
00567     //form tmpVec1_ = Kid_*g_
00568     fei::multiply_CSRMat_CSVec(csrKid, csg_, tmpVec1_);
00569 
00570     //add tmpVec1_ to fi_
00571     csfi = fi_;
00572     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00573 
00574     //we already have tmpMat1_ = D^T*Kdd which was computed above, and we need
00575     //to form tmpVec1_ = D^T*Kdd*g_.
00576     //So we can simply form tmpVec1_ = tmpMat1_*g_.
00577     fei::multiply_CSRMat_CSVec(tmpMat1_, csg_, tmpVec1_);
00578 
00579     //now add tmpVec1_ to the right-hand-side fi_
00580     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00581   }
00582 
00583   //accumulate tmpMat2_ = D^T*Kdd_*D into the global system matrix.
00584   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00585   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00586 
00587   //lastly, translate Kii and add it to the graph.
00588   csrKii = Kii_;
00589   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00590   fei::impl_utils::add_to_matrix(csrKii, true, matrix);
00591 
00592   Kii_.clear();
00593   Kdi_.clear();
00594   Kid_.clear();
00595   Kdd_.clear();
00596 
00597   mat_counter_ = 0;
00598 }
00599 
00600 bool
00601 Reducer::isSlaveEqn(int unreducedEqn) const
00602 {
00603   int num = localUnreducedEqns_.size();
00604 
00605   int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
00606   if (offset < 0 || offset >= (int)localUnreducedEqns_.size()) {
00607     return(isSlaveCol(unreducedEqn));
00608   }
00609 
00610   return(isSlaveEqn_[offset]);
00611 }
00612 
00613 void
00614 Reducer::getSlaveMasterEqns(int slaveEqn, std::vector<int>& masterEqns)
00615 {
00616   masterEqns.clear();
00617 
00618   std::vector<int>& rows = csrD_.getGraph().rowNumbers;
00619 
00620   std::vector<int>::iterator iter =
00621     std::lower_bound(rows.begin(), rows.end(), slaveEqn);
00622 
00623   if (iter == rows.end() || *iter != slaveEqn) {
00624     return;
00625   }
00626 
00627   size_t offset = iter - rows.begin();
00628 
00629   int rowBegin = csrD_.getGraph().rowOffsets[offset];
00630   int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
00631   std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
00632 
00633   for(int j=rowBegin; j<rowEnd; ++j) {
00634     masterEqns.push_back(cols[j]);
00635   }
00636 }
00637 
00638 bool
00639 Reducer::isSlaveCol(int unreducedEqn) const
00640 {
00641   int idx = snl_fei::binarySearch(unreducedEqn,
00642                                   slavesPtr_, numGlobalSlaves_);
00643   
00644   return(idx>=0);
00645 }
00646 
00647 int
00648 Reducer::translateToReducedEqn(int eqn) const
00649 {
00650   if (eqn < lowestGlobalSlaveEqn_) {
00651     return(eqn);
00652   }
00653 
00654   if (eqn > highestGlobalSlaveEqn_) {
00655     return(eqn - numGlobalSlaves_);
00656   }
00657 
00658   int index = 0;
00659   int foundOffset = snl_fei::binarySearch(eqn, slavesPtr_, numGlobalSlaves_,
00660                                           index);
00661 
00662   if (foundOffset >= 0) {
00663     throw std::runtime_error("Reducer::translateToReducedEqn ERROR, input is slave eqn.");
00664   }
00665 
00666   return(eqn - index);
00667 }
00668 
00669 int
00670 Reducer::translateFromReducedEqn(int reduced_eqn) const
00671 {
00672   int index = -1;
00673   int offset = snl_fei::binarySearch(reduced_eqn, &reverse_[0],
00674                                      reverse_.size(), index);
00675   if (offset >= 0) {
00676     return(nonslaves_[offset]);
00677   }
00678 
00679   if (index == 0) {
00680     return(reduced_eqn);
00681   }
00682 
00683   int adjustment = reduced_eqn - reverse_[index-1];
00684 
00685   return(nonslaves_[index-1] + adjustment);
00686 }
00687 
00688 int
00689 Reducer::addMatrixValues(int numRows, const int* rows,
00690                          int numCols, const int* cols,
00691                          const double* const* values,
00692                          bool sum_into,
00693                          fei::Matrix& feimat,
00694                          int format)
00695 {
00696   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00697     FEI_OSTREAM& os = *output_stream_;
00698     os << dbgprefix_<<"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
00699   }
00700 
00701   expand_work_arrays(numCols+numRows);
00702 
00703   const double** myvalues = const_cast<const double**>(values);
00704   if (format != FEI_DENSE_ROW) {
00705     if (format != FEI_DENSE_COL) {
00706       throw std::runtime_error("fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
00707     }
00708 
00709     fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
00710                                                 work_1D_, work_2D_);
00711     myvalues = &work_2D_[0];
00712   }
00713 
00714   bool no_slave_cols = true;
00715   unsigned num_non_slave_cols = 0;
00716   for(int j=0; j<numCols; ++j) {
00717     bool_array_[j] = isSlaveEqn(cols[j]);
00718     if (bool_array_[j]) no_slave_cols = false;
00719     else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00720   }
00721 
00722   bool no_slave_rows = true;
00723   for(int i=0; i<numRows; ++i) {
00724     bool_array_[numCols+i] = isSlaveEqn(rows[i]);
00725     if (bool_array_[numCols+i]) no_slave_rows = false;
00726     else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
00727   }
00728 
00729   if (no_slave_rows && no_slave_cols) {
00730     if (sum_into) {
00731       feimat.sumIn(numRows, int_array_+numCols,
00732                    numCols, int_array_, myvalues, FEI_DENSE_ROW);
00733     }
00734     else {
00735       feimat.copyIn(numRows, int_array_+numCols,
00736                     numCols, int_array_, myvalues, FEI_DENSE_ROW);
00737     }
00738 
00739     return(0);
00740   }
00741 
00742   for(int i=0; i<numRows; ++i) {
00743     if (bool_array_[numCols+i]) {
00744       //slave row: slave columns go into Kdd, non-slave columns go
00745       //into Kdi.
00746       fei::FillableVec* Kdd_row = Kdd_.getRow(rows[i], true);
00747       fei::FillableVec* Kdi_row = Kdi_.getRow(rows[i], true);
00748 
00749       for(int j=0; j<numCols; ++j) {
00750         if (bool_array_[j]) {
00751           Kdd_row->addEntry(cols[j], myvalues[i][j]);
00752         }
00753         else {
00754           Kdi_row->addEntry(cols[j], myvalues[i][j]);
00755         }
00756       }
00757       ++mat_counter_;
00758     }
00759     else {//not slave row
00760       if (no_slave_cols) {
00761         int reduced_row = int_array_[numCols+i];
00762         const double* rowvals = myvalues[i];
00763         if (sum_into) {
00764           feimat.sumIn(1, &reduced_row, numCols, int_array_,
00765                        &rowvals, format);
00766         }
00767         else {
00768           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00769                         &rowvals, format);
00770         }
00771         continue;
00772       }
00773 
00774       //put non-slave columns into Kii,
00775       //and slave columns into Kid.
00776       fei::FillableVec* Kid_row = Kid_.getRow(rows[i], true);
00777 
00778       unsigned offset = 0;
00779       for(int j=0; j<numCols; ++j) {
00780         if (bool_array_[j]) {
00781           Kid_row->addEntry(cols[j], myvalues[i][j]);
00782           ++mat_counter_;
00783         }
00784         else {
00785           double_array_[offset++] = myvalues[i][j];
00786         }
00787       }
00788 
00789       if (num_non_slave_cols > 0) {
00790         int reduced_row = int_array_[numCols+i];
00791         if (sum_into) {
00792           feimat.sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
00793                        &double_array_, format);
00794         }
00795         else {
00796           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00797                         &double_array_, format);
00798         }
00799       }
00800     }
00801   }
00802 
00803   if (mat_counter_ > 600) {
00804     assembleReducedMatrix(feimat);
00805   }
00806 
00807   return(0);
00808 }
00809 
00810 int
00811 Reducer::addVectorValues(int numValues,
00812                          const int* globalIndices,
00813                          const double* values,
00814                          bool sum_into,
00815                          bool soln_vector,
00816                          int vectorIndex,
00817                          fei::Vector& feivec)
00818 {
00819   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00820     FEI_OSTREAM& os = *output_stream_;
00821     os << dbgprefix_<<"addVectorValues(fei::Vector)"<<FEI_ENDL;
00822   }
00823 
00824   for(int i=0; i<numValues; ++i) {
00825     if (isSlaveEqn(globalIndices[i])) {
00826       if (sum_into) {
00827         if (!soln_vector) fd_.addEntry(globalIndices[i], values[i]);
00828       }
00829       else {
00830         if (!soln_vector) fd_.putEntry(globalIndices[i], values[i]);
00831       }
00832       if (!soln_vector) ++rhs_vec_counter_;
00833     }
00834     else {
00835       int reduced_index = translateToReducedEqn(globalIndices[i]);
00836 
00837       if (sum_into) {
00838         feivec.sumIn(1, &reduced_index, &values[i], vectorIndex);
00839       }
00840       else {
00841         feivec.copyIn(1, &reduced_index, &values[i], vectorIndex);
00842       }
00843     }
00844   }
00845 
00846   if (rhs_vec_counter_ > 600) {
00847     assembleReducedVector(soln_vector, feivec, sum_into);
00848   }
00849 
00850   return(0);
00851 }
00852 
00853 void
00854 Reducer::assembleReducedVector(bool soln_vector,
00855                                fei::Vector& feivec,
00856                                bool sum_into)
00857 {
00858   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00859     FEI_OSTREAM& os = *output_stream_;
00860     os << dbgprefix_<<"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
00861   }
00862 
00863   if (soln_vector) {
00864     return;
00865   }
00866 
00867   fei::FillableVec& vec = fd_;
00868 
00869   if (vec.size() > 0) {
00870     //form tmpVec1 = D^T*vec.
00871     csvec = vec;
00872     fei::multiply_trans_CSRMat_CSVec(csrD_, csvec, tmpVec1_);
00873 
00874     vec.clear();
00875 
00876     fei::impl_utils::translate_to_reduced_eqns(*this, tmpVec1_);
00877 
00878     int which_vector = 0;
00879     if (sum_into) {
00880       feivec.sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00881                    &(tmpVec1_.coefs()[0]), which_vector);
00882     }
00883     else {
00884       feivec.copyIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00885                     &(tmpVec1_.coefs()[0]), which_vector);
00886     }
00887   }
00888 
00889   fei::FillableVec& vec_i = fi_;
00890 
00891   if (vec_i.size() > 0) {
00892     csvec_i = vec_i;
00893     fei::impl_utils::translate_to_reduced_eqns(*this, csvec_i);
00894 
00895     int which_vector = 0;
00896     if (sum_into) {
00897       feivec.sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
00898                    &(csvec_i.coefs()[0]), which_vector);
00899     }
00900     else {
00901       feivec.copyIn(csvec_i.size(), &(csvec_i.indices()[0]),
00902                     &(csvec_i.coefs()[0]), which_vector);
00903     }
00904 
00905     vec_i.clear();
00906   }
00907 
00908   rhs_vec_counter_ = 0;
00909 }
00910 
00911 int
00912 Reducer::copyOutVectorValues(int numValues,
00913                              const int* globalIndices,
00914                              double* values,
00915                              bool soln_vector,
00916                              int vectorIndex,
00917                              fei::Vector& feivec)
00918 {
00919   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00920     FEI_OSTREAM& os = *output_stream_;
00921     os << dbgprefix_<<"copyOutVectorValues"<<FEI_ENDL;
00922   }
00923 
00924   tmpVec1_.clear();
00925   tmpVec2_.clear();
00926   std::vector<int> reduced_indices;
00927   std::vector<int> offsets;
00928 
00929   for(int i=0; i<numValues; ++i) {
00930     if (isSlaveEqn(globalIndices[i])) {
00931       fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
00932       offsets.push_back(i);
00933     }
00934     else {
00935       int reduced_idx = translateToReducedEqn(globalIndices[i]);
00936       feivec.copyOut(1, &reduced_idx, &values[i], vectorIndex);
00937     }
00938   }
00939 
00940   if (tmpVec1_.size() > 0) {
00941     fei::multiply_trans_CSRMat_CSVec(csrD_, tmpVec1_, tmpVec2_);
00942     int* tmpVec2Indices = &(tmpVec2_.indices()[0]);
00943     for(size_t i=0; i<tmpVec2_.size(); ++i) {
00944       reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
00945     }
00946 
00947     feivec.copyOut(tmpVec2_.size(), &reduced_indices[0],
00948                    &(tmpVec2_.coefs()[0]), vectorIndex);
00949 
00950     fei::multiply_CSRMat_CSVec(csrD_, tmpVec2_, tmpVec1_);
00951 
00952     if (g_nonzero_) {
00953       int* ginds = &(csg_.indices()[0]);
00954       double* gcoefs = &(csg_.coefs()[0]);
00955       for(size_t ii=0; ii<csg_.size(); ++ii) {
00956         fei::add_entry(tmpVec1_, ginds[ii], -gcoefs[ii]);
00957       }
00958     }
00959 
00960     int len = tmpVec1_.size();
00961     int* indices = &(tmpVec1_.indices()[0]);
00962     double* coefs = &(tmpVec1_.coefs()[0]);
00963 
00964     for(unsigned ii=0; ii<offsets.size(); ++ii) {
00965       int index = globalIndices[offsets[ii]];
00966       int idx = snl_fei::binarySearch(index, indices, len);
00967       if (idx < 0) {
00968         continue;
00969       }
00970 
00971       values[offsets[ii]] = coefs[idx];
00972     }
00973   }
00974 
00975   return(0);
00976 }
00977 
00978 std::vector<int>&
00979 Reducer::getLocalReducedEqns()
00980 {
00981   return(localReducedEqns_);
00982 }
00983 
00984 }//namespace fei
00985 

Generated on Wed May 12 21:30:41 2010 for FEI by  doxygen 1.4.7