FEI Version of the Day
fei_Reducer.cpp
00001 /*
00002 // @HEADER
00003 // ************************************************************************
00004 //             FEI: Finite Element Interface to Linear Solvers
00005 //                  Copyright (2005) Sandia Corporation.
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the
00008 // U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Alan Williams (william@sandia.gov) 
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 */
00042 
00043 
00044 
00045 #include <fei_Reducer.hpp>
00046 #include <fei_MatrixGraph.hpp>
00047 #include <fei_Matrix.hpp>
00048 #include <fei_Matrix_core.hpp>
00049 #include <fei_Vector.hpp>
00050 #include <fei_Graph_Impl.hpp>
00051 #include <fei_ArrayUtils.hpp>
00052 #include <fei_TemplateUtils.hpp>
00053 #include <fei_SparseRowGraph.hpp>
00054 #include <fei_Vector.hpp>
00055 #include <fei_impl_utils.hpp>
00056 
00057 namespace fei {
00058 
00059 Reducer::Reducer(fei::SharedPtr<FillableMat> globalSlaveDependencyMatrix,
00060                  fei::SharedPtr<CSVec> g_vector,
00061                  MPI_Comm comm)
00062  : csrD_(),
00063    slavesPtr_(NULL),
00064    Kii_(),
00065    Kid_(),
00066    Kdi_(),
00067    Kdd_(),
00068    csrKii(),
00069    csrKid(),
00070    csrKdi(),
00071    csrKdd(),
00072    fi_(),
00073    fd_(),
00074    csfi(),
00075    csvec(),
00076    csvec_i(),
00077    tmpMat1_(),
00078    tmpMat2_(),
00079    tmpVec1_(),
00080    tmpVec2_(),
00081    csg_(),
00082    g_nonzero_(false),
00083    localUnreducedEqns_(),
00084    localReducedEqns_(),
00085    nonslaves_(),
00086    reverse_(),
00087    isSlaveEqn_(NULL),
00088    numGlobalSlaves_(0),
00089    numLocalSlaves_(0),
00090    firstLocalReducedEqn_(0),
00091    lastLocalReducedEqn_(0),
00092    lowestGlobalSlaveEqn_(0),
00093    highestGlobalSlaveEqn_(0),
00094    localProc_(0),
00095    numProcs_(1),
00096    comm_(comm),
00097    dbgprefix_("Reducer: "),
00098    mat_counter_(0),
00099    rhs_vec_counter_(0),
00100    bool_array_(0),
00101    int_array_(0),
00102    double_array_(0),
00103    array_len_(0),
00104    work_1D_(),
00105    work_2D_()
00106 {
00107   csrD_ = *globalSlaveDependencyMatrix;
00108   if (g_vector.get() != NULL) {
00109     csg_ = *g_vector;
00110   }
00111 
00112   initialize();
00113 }
00114 
00115 void
00116 Reducer::initialize()
00117 {
00118   numGlobalSlaves_ = csrD_.getNumRows();
00119   slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
00120   lowestGlobalSlaveEqn_ = slavesPtr_[0];
00121   highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
00122 
00123   if (csg_.size() > 0) {
00124     double* gptr = &(csg_.coefs()[0]);
00125     for(size_t i=0; i<csg_.size(); ++i) {
00126       if (gptr[i] != 0.0) {
00127         g_nonzero_ = true;
00128         break;
00129       }
00130     }
00131   }
00132 
00133   //The following code sets up the mappings (nonslaves_ and reverse_)
00134   //necessary to implement the method 'translateFromReducedEqn'.
00135   //This code is ugly and inelegant, and it took me quite a while to get
00136   //it right. I won't even bother trying to comment it, but note that its
00137   //correctness is tested in test_utils/test_Reducer.cpp.
00138 
00139   nonslaves_.resize(numGlobalSlaves_*2);
00140 
00141   int nonslave_counter = 0;
00142   int slaveOffset = 0;
00143   int num_consecutive = 0;
00144   while(slaveOffset < numGlobalSlaves_-1) {
00145     int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
00146     if (gap > 0) {
00147       nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
00148       nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00149       num_consecutive = 0;
00150     }
00151     else {
00152       ++num_consecutive;
00153     }
00154     ++slaveOffset;
00155   }
00156 
00157   nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
00158   nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
00159 
00160   reverse_.resize(nonslave_counter);
00161   int first = lowestGlobalSlaveEqn_;
00162   reverse_[0] = first;
00163   for(int i=1; i<nonslave_counter; ++i) {
00164     reverse_[i] = reverse_[i-1] +
00165          (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
00166   }
00167 
00168 #ifndef FEI_SER
00169     MPI_Comm_rank(comm_, &localProc_);
00170     MPI_Comm_size(comm_, &numProcs_);
00171 #endif
00172 
00173   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00174     FEI_OSTREAM& os = *output_stream_;
00175     os << dbgprefix_<< "ctor, numGlobalSlaves="<<numGlobalSlaves_
00176       << FEI_ENDL;
00177   }
00178 
00179   if (numGlobalSlaves_ < 1) {
00180     throw std::runtime_error("ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
00181   }
00182 }
00183 
00184 Reducer::Reducer(fei::SharedPtr<fei::MatrixGraph> matrixGraph)
00185  : csrD_(),
00186    slavesPtr_(NULL),
00187    Kii_(),
00188    Kid_(),
00189    Kdi_(),
00190    Kdd_(),
00191    fi_(),
00192    fd_(),
00193    tmpMat1_(),
00194    tmpMat2_(),
00195    tmpVec1_(),
00196    tmpVec2_(),
00197    csg_(),
00198    g_nonzero_(false),
00199    localUnreducedEqns_(),
00200    localReducedEqns_(),
00201    nonslaves_(),
00202    reverse_(),
00203    isSlaveEqn_(NULL),
00204    numGlobalSlaves_(0),
00205    numLocalSlaves_(0),
00206    firstLocalReducedEqn_(0),
00207    lastLocalReducedEqn_(0),
00208    lowestGlobalSlaveEqn_(0),
00209    highestGlobalSlaveEqn_(0),
00210    localProc_(0),
00211    numProcs_(1),
00212    comm_(),
00213    dbgprefix_("Reducer: "),
00214    mat_counter_(0),
00215    rhs_vec_counter_(0),
00216    bool_array_(0),
00217    int_array_(0),
00218    double_array_(0),
00219    array_len_(0)
00220 {
00221   fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph->getRowSpace();
00222   comm_ = vecSpace->getCommunicator();
00223   initialize();
00224 
00225   std::vector<int> indices;
00226   vecSpace->getIndices_Owned(indices);
00227   setLocalUnreducedEqns(indices);
00228 }
00229 
00230 Reducer::~Reducer()
00231 {
00232   delete [] isSlaveEqn_;   isSlaveEqn_ = 0;
00233   delete [] bool_array_;   bool_array_ = 0;
00234   delete [] int_array_;    int_array_ = 0;
00235   delete [] double_array_; double_array_ = 0;
00236   array_len_ = 0;
00237 }
00238 
00239 void
00240 Reducer::setLocalUnreducedEqns(const std::vector<int>& localUnreducedEqns)
00241 {
00242   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00243     FEI_OSTREAM& os = *output_stream_;
00244     os << dbgprefix_<< "setLocalUnreducedEqns, numLocalEqns="
00245       <<localUnreducedEqns.size() << FEI_ENDL;
00246   }
00247 
00248   if (localUnreducedEqns_ == localUnreducedEqns) {
00249     return;
00250   }
00251 
00252   localUnreducedEqns_ = localUnreducedEqns;
00253 
00254   int num = localUnreducedEqns_.size();
00255 
00256   if (isSlaveEqn_ != NULL) delete [] isSlaveEqn_;
00257 
00258   isSlaveEqn_ = num > 0 ? new bool[localUnreducedEqns_.size()] : NULL;
00259 
00260   numLocalSlaves_ = 0;
00261 
00262   for(size_t i=0; i<localUnreducedEqns_.size(); ++i) {
00263     int idx = fei::binarySearch(localUnreducedEqns_[i],
00264                                     slavesPtr_, numGlobalSlaves_);
00265     if (idx < 0) {
00266       isSlaveEqn_[i] = false;
00267     }
00268     else {
00269       isSlaveEqn_[i] = true;
00270       ++numLocalSlaves_;
00271 
00272       if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00273         FEI_OSTREAM& os = *output_stream_;
00274         os << dbgprefix_<<" slave " << localUnreducedEqns_[i] << " depends on ";
00275         int offset = csrD_.getGraph().rowOffsets[idx];
00276         int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
00277         int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
00278         for(int j=0; j<rowlen; ++j) {
00279           os << indices[j] << " ";
00280         }
00281         os << FEI_ENDL;
00282       }
00283     }
00284 
00285   }
00286 
00287   int num_slaves_on_lower_procs = 0;
00288 
00289 
00290 #ifndef FEI_SER
00291 
00292   if (numProcs_ > 1) {
00293     std::vector<int> procNumLocalSlaves(numProcs_);
00294 
00295     MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
00296                   1, MPI_INT, comm_);
00297 
00298     for(int p=0; p<localProc_; ++p) {
00299       num_slaves_on_lower_procs += procNumLocalSlaves[p];
00300     }
00301   }
00302 #endif
00303 
00304   unsigned first_non_slave_offset = 0;
00305   while(first_non_slave_offset < localUnreducedEqns_.size() &&
00306         isSlaveEqn_[first_non_slave_offset] == true) {
00307     ++first_non_slave_offset;
00308   }
00309 
00310   firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
00311       - num_slaves_on_lower_procs - first_non_slave_offset;
00312 
00313   int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
00314 
00315   lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
00316 
00317   localReducedEqns_.resize(num_local_eqns);
00318 
00319   unsigned offset = 0;
00320   int eqn = firstLocalReducedEqn_;
00321   for(unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
00322     if (isSlaveEqn_[i] == false) {
00323       localReducedEqns_[offset++] = eqn++;
00324     }
00325   }
00326 
00327   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00328     FEI_OSTREAM& os = *output_stream_;
00329     if (localUnreducedEqns_.size() > 0) {
00330       os << dbgprefix_<< "first local eqn="
00331          <<localUnreducedEqns_[0]<<", last local eqn="
00332       <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
00333     }
00334     os << dbgprefix_<<"numLocalSlaves_="<<numLocalSlaves_
00335        <<", firstLocalReducedEqn_="<<firstLocalReducedEqn_
00336        <<", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
00337   }
00338 }
00339 
00340 void
00341 Reducer::addGraphEntries(fei::SharedPtr<fei::SparseRowGraph> matrixGraph)
00342 {
00343   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00344     FEI_OSTREAM& os = *output_stream_;
00345     os << dbgprefix_<<"addGraphEntries"<<FEI_ENDL;
00346   }
00347 
00348   //iterate through the incoming matrixGraph, putting its contents into
00349   //Kdd, Kdi, Kid and Kii as appropriate.
00350 
00351   std::vector<int>& rowNumbers = matrixGraph->rowNumbers;
00352   std::vector<int>& rowOffsets = matrixGraph->rowOffsets;
00353   std::vector<int>& packedCols = matrixGraph->packedColumnIndices;
00354 
00355   for(unsigned i=0; i<rowNumbers.size(); ++i) {
00356     int row = rowNumbers[i];
00357 
00358     bool slave_row = isSlaveEqn(row);
00359 
00360     int rowLength = rowOffsets[i+1]-rowOffsets[i];
00361     int* cols = &packedCols[rowOffsets[i]];
00362 
00363     if (slave_row) {
00364       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(row);
00365       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(row);
00366 
00367       for(int j=0; j<rowLength; ++j) {
00368         int col = cols[j];
00369         bool slave_col = isSlaveEqn(col);
00370 
00371         if (slave_col) {
00372           Kdd_row->addEntry(col, 0.0);
00373         }
00374         else {
00375           Kdi_row->addEntry(col, 0.0);
00376         }
00377       }
00378     }
00379     else {
00380       //not a slave row, so add slave columns to Kid, and non-slave
00381       //columns to graph.
00382       fei::FillableVec* Kid_row = Kid_.create_or_getRow(row);
00383       fei::FillableVec* Kii_row = Kii_.create_or_getRow(row);
00384 
00385       for(int j=0; j<rowLength; ++j) {
00386         int col = cols[j];
00387         bool slave_col = isSlaveEqn(col);
00388 
00389         if (slave_col) {
00390           Kid_row->addEntry(col, 0.0);
00391         }
00392         else {
00393           Kii_row->addEntry(col, 0.0);
00394         }
00395       }
00396     }
00397   }
00398 }
00399 
00400 void
00401 Reducer::expand_work_arrays(int size)
00402 {
00403   if (size <= array_len_) return;
00404 
00405   array_len_ = size;
00406   delete [] bool_array_;
00407   delete [] int_array_;
00408   delete [] double_array_;
00409   bool_array_ = new bool[array_len_];
00410   int_array_ = new int[array_len_];
00411   double_array_ = new double[array_len_];
00412 }
00413 
00414 void
00415 Reducer::addGraphIndices(int numRows, const int* rows,
00416                          int numCols, const int* cols,
00417                          fei::Graph& graph)
00418 {
00419   expand_work_arrays(numCols);
00420 
00421   bool no_slave_cols = true;
00422   for(int i=0; i<numCols; ++i) {
00423     bool_array_[i] = isSlaveEqn(cols[i]);
00424     if (bool_array_[i]) no_slave_cols = false;
00425   }
00426 
00427   for(int i=0; i<numRows; ++i) {
00428     bool slave_row = isSlaveEqn(rows[i]);
00429 
00430     if (slave_row) {
00431       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
00432       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
00433 
00434       for(int j=0; j<numCols; ++j) {
00435         if (bool_array_[j]) {
00436           Kdd_row->addEntry(cols[j], 0.0);
00437         }
00438         else {
00439           Kdi_row->addEntry(cols[j], 0.0);
00440         }
00441       }
00442       ++mat_counter_;
00443     }
00444     else {
00445       //not a slave row, so add slave columns to Kid, and non-slave
00446       //columns to graph.
00447       fei::FillableVec* Kid_row = no_slave_cols ?
00448         NULL : Kid_.create_or_getRow(rows[i]);
00449   
00450       unsigned num_non_slave_cols = 0;
00451 
00452       for(int j=0; j<numCols; ++j) {
00453         if (bool_array_[j]) {
00454           Kid_row->addEntry(cols[j], 0.0);
00455           ++mat_counter_;
00456         }
00457         else {
00458           int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00459         }
00460       }
00461 
00462       if (num_non_slave_cols > 0) {
00463         int reduced_row = translateToReducedEqn(rows[i]);
00464         graph.addIndices(reduced_row, num_non_slave_cols, int_array_);
00465       }
00466     }
00467   }
00468 
00469   if (mat_counter_ > 600) {
00470     assembleReducedGraph(&graph, false);
00471   }
00472 }
00473 
00474 void
00475 Reducer::addSymmetricGraphIndices(int numIndices, const int* indices,
00476                                   bool diagonal,
00477                                   fei::Graph& graph)
00478 {
00479   if (diagonal) {
00480     for(int i=0; i<numIndices; ++i) {
00481       addGraphIndices(1, &indices[i], 1, &indices[i], graph);
00482     }
00483   }
00484   else {
00485     addGraphIndices(numIndices, indices, numIndices, indices, graph);
00486   }
00487 }
00488 
00489 void
00490 Reducer::assembleReducedGraph(fei::Graph* graph,
00491                               bool global_gather)
00492 {
00493   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00494     FEI_OSTREAM& os = *output_stream_;
00495     os << dbgprefix_<<"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
00496     if (output_level_ >= fei::FULL_LOGS) {
00497       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00498     }
00499   }
00500 
00501   //This function performs the appropriate manipulations (matrix-matrix
00502   //products, etc., on the Kid,Kdi,Kdd matrices and then assembles the
00503   //results into the input graph structure.
00504   //
00505 
00506   //form tmpMat1_ = Kid*D
00507   csrKid = Kid_;
00508   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_, true);
00509 
00510   csrKdi = Kdi_;
00511   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_, true);
00512 
00513   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00514   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00515 
00516   fei::impl_utils::add_to_graph(tmpMat1_, *graph);
00517   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00518 
00519   //form tmpMat1_ = D^T*Kdd
00520   csrKdd = Kdd_;
00521   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_, true);
00522 
00523   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd*D
00524   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_, true);
00525 
00526   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00527 
00528   fei::impl_utils::add_to_graph(tmpMat2_, *graph);
00529 
00530   //lastly, translate Kii and add it to the graph.
00531   csrKii = Kii_;
00532   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00533   fei::impl_utils::add_to_graph(csrKii, *graph);
00534 
00535   Kii_.clear();
00536   Kdi_.clear();
00537   Kid_.clear();
00538   Kdd_.clear();
00539 
00540   mat_counter_ = 0;
00541 
00542   if (global_gather) {
00543     graph->gatherFromOverlap();
00544   }
00545 }
00546 
00547 void
00548 Reducer::assembleReducedGraph(fei::SparseRowGraph* srgraph)
00549 {
00550   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00551     FEI_OSTREAM& os = *output_stream_;
00552     os << dbgprefix_<<"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
00553   }
00554 
00555   fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
00556   assembleReducedGraph(&graph);
00557   fei::copyToSparseRowGraph(*(graph.getLocalGraph()), *srgraph);
00558 }
00559 
00560 void
00561 Reducer::assembleReducedMatrix(fei::Matrix& matrix)
00562 {
00563   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00564     FEI_OSTREAM& os = *output_stream_;
00565     os << dbgprefix_<<"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
00566     if (output_level_ >= fei::FULL_LOGS) {
00567       os << dbgprefix_<<"Kid:"<<FEI_ENDL<<Kid_;
00568       os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
00569     }
00570   }
00571 
00572   //form tmpMat1_ = Kid_*D
00573   csrKid = Kid_;
00574 
00575   fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_);
00576 
00577   //form tmpMat2_ = D^T*Kdi_
00578   csrKdi = Kdi_;
00579   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_);
00580 
00581   //accumulate the above two results into the global system matrix.
00582   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
00583 
00584   fei::impl_utils::add_to_matrix(tmpMat1_, true, matrix);
00585 
00586   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00587   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00588 
00589   //form tmpMat1_ = D^T*Kdd_
00590   csrKdd = Kdd_;
00591   fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_);
00592 
00593   //form tmpMat2_ = tmpMat1_*D = D^T*Kdd_*D
00594   fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_);
00595 
00596   if (g_nonzero_) {
00597     //form tmpVec1_ = Kid_*g_
00598     fei::multiply_CSRMat_CSVec(csrKid, csg_, tmpVec1_);
00599 
00600     //add tmpVec1_ to fi_
00601     csfi = fi_;
00602     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00603 
00604     //we already have tmpMat1_ = D^T*Kdd which was computed above, and we need
00605     //to form tmpVec1_ = D^T*Kdd*g_.
00606     //So we can simply form tmpVec1_ = tmpMat1_*g_.
00607     fei::multiply_CSRMat_CSVec(tmpMat1_, csg_, tmpVec1_);
00608 
00609     //now add tmpVec1_ to the right-hand-side fi_
00610     fei::add_CSVec_CSVec(tmpVec1_, csfi);
00611   }
00612 
00613   //accumulate tmpMat2_ = D^T*Kdd_*D into the global system matrix.
00614   fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
00615   fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
00616 
00617   //lastly, translate Kii and add it to the graph.
00618   csrKii = Kii_;
00619   fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
00620   fei::impl_utils::add_to_matrix(csrKii, true, matrix);
00621 
00622   Kii_.clear();
00623   Kdi_.clear();
00624   Kid_.clear();
00625   Kdd_.clear();
00626 
00627   mat_counter_ = 0;
00628 }
00629 
00630 bool
00631 Reducer::isSlaveEqn(int unreducedEqn) const
00632 {
00633   int num = localUnreducedEqns_.size();
00634 
00635   int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
00636   if (offset < 0 || offset >= (int)localUnreducedEqns_.size()) {
00637     return(isSlaveCol(unreducedEqn));
00638   }
00639 
00640   return(isSlaveEqn_[offset]);
00641 }
00642 
00643 void
00644 Reducer::getSlaveMasterEqns(int slaveEqn, std::vector<int>& masterEqns)
00645 {
00646   masterEqns.clear();
00647 
00648   std::vector<int>& rows = csrD_.getGraph().rowNumbers;
00649 
00650   std::vector<int>::iterator iter =
00651     std::lower_bound(rows.begin(), rows.end(), slaveEqn);
00652 
00653   if (iter == rows.end() || *iter != slaveEqn) {
00654     return;
00655   }
00656 
00657   size_t offset = iter - rows.begin();
00658 
00659   int rowBegin = csrD_.getGraph().rowOffsets[offset];
00660   int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
00661   std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
00662 
00663   for(int j=rowBegin; j<rowEnd; ++j) {
00664     masterEqns.push_back(cols[j]);
00665   }
00666 }
00667 
00668 bool
00669 Reducer::isSlaveCol(int unreducedEqn) const
00670 {
00671   int idx = fei::binarySearch(unreducedEqn,
00672                                   slavesPtr_, numGlobalSlaves_);
00673   
00674   return(idx>=0);
00675 }
00676 
00677 int
00678 Reducer::translateToReducedEqn(int eqn) const
00679 {
00680   if (eqn < lowestGlobalSlaveEqn_) {
00681     return(eqn);
00682   }
00683 
00684   if (eqn > highestGlobalSlaveEqn_) {
00685     return(eqn - numGlobalSlaves_);
00686   }
00687 
00688   int index = 0;
00689   int foundOffset = fei::binarySearch(eqn, slavesPtr_, numGlobalSlaves_,
00690                                           index);
00691 
00692   if (foundOffset >= 0) {
00693     throw std::runtime_error("Reducer::translateToReducedEqn ERROR, input is slave eqn.");
00694   }
00695 
00696   return(eqn - index);
00697 }
00698 
00699 int
00700 Reducer::translateFromReducedEqn(int reduced_eqn) const
00701 {
00702   int index = -1;
00703   int offset = fei::binarySearch(reduced_eqn, &reverse_[0],
00704                                      reverse_.size(), index);
00705   if (offset >= 0) {
00706     return(nonslaves_[offset]);
00707   }
00708 
00709   if (index == 0) {
00710     return(reduced_eqn);
00711   }
00712 
00713   int adjustment = reduced_eqn - reverse_[index-1];
00714 
00715   return(nonslaves_[index-1] + adjustment);
00716 }
00717 
00718 int
00719 Reducer::addMatrixValues(int numRows, const int* rows,
00720                          int numCols, const int* cols,
00721                          const double* const* values,
00722                          bool sum_into,
00723                          fei::Matrix& feimat,
00724                          int format)
00725 {
00726   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00727     FEI_OSTREAM& os = *output_stream_;
00728     os << dbgprefix_<<"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
00729   }
00730 
00731   expand_work_arrays(numCols+numRows);
00732 
00733   const double** myvalues = const_cast<const double**>(values);
00734   if (format != FEI_DENSE_ROW) {
00735     if (format != FEI_DENSE_COL) {
00736       throw std::runtime_error("fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
00737     }
00738 
00739     fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
00740                                                 work_1D_, work_2D_);
00741     myvalues = &work_2D_[0];
00742   }
00743 
00744   bool no_slave_cols = true;
00745   unsigned num_non_slave_cols = 0;
00746   for(int j=0; j<numCols; ++j) {
00747     bool_array_[j] = isSlaveEqn(cols[j]);
00748     if (bool_array_[j]) no_slave_cols = false;
00749     else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
00750   }
00751 
00752   bool no_slave_rows = true;
00753   for(int i=0; i<numRows; ++i) {
00754     bool_array_[numCols+i] = isSlaveEqn(rows[i]);
00755     if (bool_array_[numCols+i]) no_slave_rows = false;
00756     else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
00757   }
00758 
00759   if (no_slave_rows && no_slave_cols) {
00760     if (sum_into) {
00761       feimat.sumIn(numRows, int_array_+numCols,
00762                    numCols, int_array_, myvalues, FEI_DENSE_ROW);
00763     }
00764     else {
00765       feimat.copyIn(numRows, int_array_+numCols,
00766                     numCols, int_array_, myvalues, FEI_DENSE_ROW);
00767     }
00768 
00769     return(0);
00770   }
00771 
00772   for(int i=0; i<numRows; ++i) {
00773     if (bool_array_[numCols+i]) {
00774       //slave row: slave columns go into Kdd, non-slave columns go
00775       //into Kdi.
00776       fei::FillableVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
00777       fei::FillableVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
00778 
00779       for(int j=0; j<numCols; ++j) {
00780         if (bool_array_[j]) {
00781           Kdd_row->addEntry(cols[j], myvalues[i][j]);
00782         }
00783         else {
00784           Kdi_row->addEntry(cols[j], myvalues[i][j]);
00785         }
00786       }
00787       ++mat_counter_;
00788     }
00789     else {//not slave row
00790       if (no_slave_cols) {
00791         int reduced_row = int_array_[numCols+i];
00792         const double* rowvals = myvalues[i];
00793         if (sum_into) {
00794           feimat.sumIn(1, &reduced_row, numCols, int_array_,
00795                        &rowvals, format);
00796         }
00797         else {
00798           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00799                         &rowvals, format);
00800         }
00801         continue;
00802       }
00803 
00804       //put non-slave columns into Kii,
00805       //and slave columns into Kid.
00806       fei::FillableVec* Kid_row = Kid_.create_or_getRow(rows[i]);
00807 
00808       unsigned offset = 0;
00809       for(int j=0; j<numCols; ++j) {
00810         if (bool_array_[j]) {
00811           Kid_row->addEntry(cols[j], myvalues[i][j]);
00812           ++mat_counter_;
00813         }
00814         else {
00815           double_array_[offset++] = myvalues[i][j];
00816         }
00817       }
00818 
00819       if (num_non_slave_cols > 0) {
00820         int reduced_row = int_array_[numCols+i];
00821         if (sum_into) {
00822           feimat.sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
00823                        &double_array_, format);
00824         }
00825         else {
00826           feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
00827                         &double_array_, format);
00828         }
00829       }
00830     }
00831   }
00832 
00833   if (mat_counter_ > 600) {
00834     assembleReducedMatrix(feimat);
00835   }
00836 
00837   return(0);
00838 }
00839 
00840 int
00841 Reducer::addVectorValues(int numValues,
00842                          const int* globalIndices,
00843                          const double* values,
00844                          bool sum_into,
00845                          bool soln_vector,
00846                          int vectorIndex,
00847                          fei::Vector& feivec)
00848 {
00849   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00850     FEI_OSTREAM& os = *output_stream_;
00851     os << dbgprefix_<<"addVectorValues(fei::Vector)"<<FEI_ENDL;
00852   }
00853 
00854   for(int i=0; i<numValues; ++i) {
00855     if (isSlaveEqn(globalIndices[i])) {
00856       if (sum_into) {
00857         if (!soln_vector) fd_.addEntry(globalIndices[i], values[i]);
00858       }
00859       else {
00860         if (!soln_vector) fd_.putEntry(globalIndices[i], values[i]);
00861       }
00862       if (!soln_vector) ++rhs_vec_counter_;
00863     }
00864     else {
00865       int reduced_index = translateToReducedEqn(globalIndices[i]);
00866 
00867       if (sum_into) {
00868         feivec.sumIn(1, &reduced_index, &values[i], vectorIndex);
00869       }
00870       else {
00871         feivec.copyIn(1, &reduced_index, &values[i], vectorIndex);
00872       }
00873     }
00874   }
00875 
00876   if (rhs_vec_counter_ > 600) {
00877     assembleReducedVector(soln_vector, feivec);
00878   }
00879 
00880   return(0);
00881 }
00882 
00883 void
00884 Reducer::assembleReducedVector(bool soln_vector,
00885                                fei::Vector& feivec)
00886 {
00887   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00888     FEI_OSTREAM& os = *output_stream_;
00889     os << dbgprefix_<<"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
00890   }
00891 
00892   if (soln_vector) {
00893     return;
00894   }
00895 
00896   fei::FillableVec& vec = fd_;
00897 
00898   if (vec.size() > 0) {
00899     //form tmpVec1 = D^T*vec.
00900     csvec = vec;
00901     fei::multiply_trans_CSRMat_CSVec(csrD_, csvec, tmpVec1_);
00902 
00903     vec.clear();
00904 
00905     fei::impl_utils::translate_to_reduced_eqns(*this, tmpVec1_);
00906 
00907     int which_vector = 0;
00908     feivec.sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
00909                  &(tmpVec1_.coefs()[0]), which_vector);
00910   }
00911 
00912   fei::FillableVec& vec_i = fi_;
00913 
00914   if (vec_i.size() > 0) {
00915     csvec_i = vec_i;
00916     fei::impl_utils::translate_to_reduced_eqns(*this, csvec_i);
00917 
00918     int which_vector = 0;
00919     feivec.sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
00920                  &(csvec_i.coefs()[0]), which_vector);
00921 
00922     vec_i.clear();
00923   }
00924 
00925   rhs_vec_counter_ = 0;
00926 }
00927 
00928 int
00929 Reducer::copyOutVectorValues(int numValues,
00930                              const int* globalIndices,
00931                              double* values,
00932                              bool soln_vector,
00933                              int vectorIndex,
00934                              fei::Vector& feivec)
00935 {
00936   if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
00937     FEI_OSTREAM& os = *output_stream_;
00938     os << dbgprefix_<<"copyOutVectorValues"<<FEI_ENDL;
00939   }
00940 
00941   tmpVec1_.clear();
00942   tmpVec2_.clear();
00943   std::vector<int> reduced_indices;
00944   std::vector<int> offsets;
00945 
00946   for(int i=0; i<numValues; ++i) {
00947     if (isSlaveEqn(globalIndices[i])) {
00948       fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
00949       offsets.push_back(i);
00950     }
00951     else {
00952       int reduced_idx = translateToReducedEqn(globalIndices[i]);
00953       feivec.copyOut(1, &reduced_idx, &values[i], vectorIndex);
00954     }
00955   }
00956 
00957   if (tmpVec1_.size() > 0) {
00958     fei::multiply_trans_CSRMat_CSVec(csrD_, tmpVec1_, tmpVec2_);
00959     int* tmpVec2Indices = &(tmpVec2_.indices()[0]);
00960     for(size_t i=0; i<tmpVec2_.size(); ++i) {
00961       reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
00962     }
00963 
00964     feivec.copyOut(tmpVec2_.size(), &reduced_indices[0],
00965                    &(tmpVec2_.coefs()[0]), vectorIndex);
00966 
00967     fei::multiply_CSRMat_CSVec(csrD_, tmpVec2_, tmpVec1_);
00968 
00969     if (g_nonzero_) {
00970       int* ginds = &(csg_.indices()[0]);
00971       double* gcoefs = &(csg_.coefs()[0]);
00972       for(size_t ii=0; ii<csg_.size(); ++ii) {
00973         fei::add_entry(tmpVec1_, ginds[ii], -gcoefs[ii]);
00974       }
00975     }
00976 
00977     int len = tmpVec1_.size();
00978     int* indices = &(tmpVec1_.indices()[0]);
00979     double* coefs = &(tmpVec1_.coefs()[0]);
00980 
00981     for(unsigned ii=0; ii<offsets.size(); ++ii) {
00982       int index = globalIndices[offsets[ii]];
00983       int idx = fei::binarySearch(index, indices, len);
00984       if (idx < 0) {
00985         continue;
00986       }
00987 
00988       values[offsets[ii]] = coefs[idx];
00989     }
00990   }
00991 
00992   return(0);
00993 }
00994 
00995 std::vector<int>&
00996 Reducer::getLocalReducedEqns()
00997 {
00998   return(localReducedEqns_);
00999 }
01000 
01001 }//namespace fei
01002 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends