Anasazi Version of the Day
Tsqr_Random_GlobalMatrix.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __Tsqr_Random_GlobalMatrix_hpp
00030 #define __Tsqr_Random_GlobalMatrix_hpp
00031 
00032 #include "Tsqr_Blas.hpp"
00033 #include "Tsqr_Matrix.hpp"
00034 #include "Tsqr_Random_MatrixGenerator.hpp"
00035 #include "Tsqr_ScalarTraits.hpp"
00036 #include "Tsqr_RMessenger.hpp"
00037 
00038 #include <algorithm>
00039 #include <functional>
00040 #include <iostream>
00041 #include <sstream>
00042 #include <stdexcept>
00043 #include <vector>
00044 
00047 
00048 namespace TSQR {
00049   namespace Random {
00050 
00051     template< class MatrixViewType >
00052     static void
00053     scaleMatrix (MatrixViewType& A, 
00054      const typename MatrixViewType::scalar_type& denom)
00055     {
00056       typedef typename MatrixViewType::ordinal_type ordinal_type;
00057       typedef typename MatrixViewType::scalar_type scalar_type;
00058 
00059       const ordinal_type nrows = A.nrows();
00060       const ordinal_type ncols = A.ncols();
00061       const ordinal_type lda = A.lda();
00062 
00063       if (nrows == lda)
00064   {
00065     // This works because storage in A is contiguous.
00066     const ordinal_type nelts = nrows * ncols;
00067     scalar_type* const A_ptr = A.get();
00068     std::transform (A_ptr, A_ptr + nelts, A_ptr, std::bind2nd(std::divides<double>(), denom));
00069   }
00070       else
00071   {
00072     for (ordinal_type j = 0; j < ncols; ++j)
00073       {
00074         scalar_type* const A_j = &A(0,j);
00075         std::transform (A_j, A_j + nrows, A_j, std::bind2nd(std::divides<double>(), denom));
00076       }
00077   }
00078     }
00079 
00080     template< class MatrixViewType, class Generator >
00081     void
00082     randomGlobalMatrix (Generator* const pGenerator, 
00083       MatrixViewType& A_local,
00084       const typename ScalarTraits< typename MatrixViewType::scalar_type >::magnitude_type singular_values[],
00085       MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
00086       MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
00087     {
00088       typedef typename MatrixViewType::ordinal_type ordinal_type;
00089       typedef typename MatrixViewType::scalar_type scalar_type;
00090       typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type;
00091       using std::vector;
00092 
00093       const bool b_local_debug = false;
00094 
00095       const int rootProc = 0;
00096       const int nprocs = ordinalMessenger->size();
00097       const int myRank = ordinalMessenger->rank();
00098       BLAS< ordinal_type, scalar_type > blas;
00099 
00100       const ordinal_type nrowsLocal = A_local.nrows();
00101       const ordinal_type ncols = A_local.ncols();
00102 
00103       // Theory: Suppose there are P processors.  Proc q wants an m_q by n
00104       // component of the matrix A, which we write as A_q.  On Proc 0, we
00105       // generate random m_q by n orthogonal matrices Q_q (in explicit
00106       // form), and send Q_q to Proc q.  The m by n matrix [Q_0; Q_1; ...;
00107       // Q_{P-1}] is not itself orthogonal.  However, the m by n matrix
00108       // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
00109       // 
00110       // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.
00111 
00112       if (myRank == rootProc)
00113   {
00114     typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
00115     matgen_type matGen (*pGenerator);
00116 
00117     // Generate a random ncols by ncols upper triangular matrix
00118     // R with the given singular values.
00119     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
00120     matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);
00121 
00122     // Broadcast R to all the processors.
00123     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00124 
00125     // Generate (for myself) a random nrowsLocal x ncols
00126     // orthogonal matrix, stored in explicit form.
00127     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00128     matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());
00129 
00130     // Scale the (local) orthogonal matrix by the number of
00131     // processors P, to make the columns of the global matrix Q
00132     // orthogonal.  (Otherwise the norm of each column will be P
00133     // instead of 1.)
00134     const scalar_type P = static_cast< scalar_type > (nprocs);
00135     // Do overflow check.  If casting P back to scalar_type
00136     // doesn't produce the same value as nprocs, the cast
00137     // overflowed.
00138     if (static_cast< int > (P) != nprocs)
00139       throw std::runtime_error ("Casting nprocs to Scalar failed");
00140 
00141     scaleMatrix (Q_local, P);
00142 
00143     // A_local := Q_local * R
00144     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols,
00145          scalar_type(1), Q_local.get(), Q_local.lda(), 
00146          R.get(), R.lda(), 
00147          scalar_type(0), A_local.get(), A_local.lda());
00148 
00149     for (int recvProc = 1; recvProc < nprocs; ++recvProc)
00150       {
00151         // Ask the receiving processor how big (i.e., how many rows)
00152         // its local component of the matrix is.
00153         ordinal_type nrowsRemote = 0;
00154         ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);
00155 
00156         if (b_local_debug)
00157     {
00158       std::ostringstream os;
00159       os << "For Proc " << recvProc << ": local block is " 
00160          << nrowsRemote << " by " << ncols << std::endl;
00161       std::cerr << os.str();
00162     }
00163 
00164         // Make sure Q_local is big enough to hold the data for
00165         // the current receiver proc.
00166         Q_local.reshape (nrowsRemote, ncols);
00167 
00168         // Compute a random nrowsRemote * ncols orthogonal
00169         // matrix Q_local, for the current receiving processor.
00170         matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());
00171 
00172         // Send Q_local to the current receiving processor.
00173         scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
00174       }
00175   }
00176       else
00177   {
00178     // Receive the R factor from Proc 0.  There's only 1 R
00179     // factor for all the processes.
00180     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0));
00181     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00182 
00183     // Q_local (nrows_local by ncols, random orthogonal matrix)
00184     // will be received from Proc 0, where it was generated.
00185     const ordinal_type recvSize = nrowsLocal * ncols;
00186     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00187 
00188     // Tell Proc 0 how many rows there are in the random orthogonal
00189     // matrix I want to receive from Proc 0.
00190     ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0);
00191 
00192     // Receive the orthogonal matrix from Proc 0.
00193     scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0);
00194 
00195     // Scale the (local) orthogonal matrix by the number of
00196     // processors, to make the global matrix Q orthogonal.
00197     const scalar_type P = static_cast< scalar_type > (nprocs);
00198     // Do overflow check.  If casting P back to scalar_type
00199     // doesn't produce the same value as nprocs, the cast
00200     // overflowed.
00201     if (static_cast< int > (P) != nprocs)
00202       throw std::runtime_error ("Casting nprocs to Scalar failed");
00203     scaleMatrix (Q_local, P);
00204 
00205     // A_local := Q_local * R
00206     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols, 
00207          scalar_type(1), Q_local.get(), Q_local.lda(),
00208          R.get(), R.lda(),
00209          scalar_type(0), A_local.get(), A_local.lda());
00210   }
00211     }
00212   } // namespace Random
00213 } // namespace TSQR
00214 
00215 #endif // __Tsqr_Random_GlobalMatrix_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends