Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_Random_GlobalMatrix.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __Tsqr_Random_GlobalMatrix_hpp
00030 #define __Tsqr_Random_GlobalMatrix_hpp
00031 
00032 #include "Tsqr_Blas.hpp"
00033 #include "Tsqr_Matrix.hpp"
00034 #include "Tsqr_Random_MatrixGenerator.hpp"
00035 #include "Teuchos_ScalarTraits.hpp"
00036 #include "Tsqr_RMessenger.hpp"
00037 
00038 #include <algorithm>
00039 #include <functional>
00040 #include <iostream>
00041 #include <sstream>
00042 #include <stdexcept>
00043 #include <vector>
00044 
00045 
00046 namespace TSQR {
00047   namespace Random {
00048 
00049     template<class MatrixViewType>
00050     static void
00051     scaleMatrix (MatrixViewType& A, 
00052      const typename MatrixViewType::scalar_type& denom)
00053     {
00054       typedef typename MatrixViewType::ordinal_type ordinal_type;
00055       typedef typename MatrixViewType::scalar_type scalar_type;
00056 
00057       const ordinal_type nrows = A.nrows();
00058       const ordinal_type ncols = A.ncols();
00059       const ordinal_type lda = A.lda();
00060 
00061       if (nrows == lda)
00062   { // The whole A matrix is stored contiguously.
00063     const ordinal_type nelts = nrows * ncols;
00064     scalar_type* const A_ptr = A.get();
00065     std::transform (A_ptr, A_ptr + nelts, A_ptr, std::bind2nd(std::divides<scalar_type>(), denom));
00066   }
00067       else
00068   { // Each column of A is stored contiguously.
00069     for (ordinal_type j = 0; j < ncols; ++j)
00070       {
00071         scalar_type* const A_j = &A(0,j);
00072         std::transform (A_j, A_j + nrows, A_j, std::bind2nd(std::divides<scalar_type>(), denom));
00073       }
00074   }
00075     }
00076 
00077     template< class MatrixViewType, class Generator >
00078     void
00079     randomGlobalMatrix (Generator* const pGenerator, 
00080       MatrixViewType& A_local,
00081       const typename Teuchos::ScalarTraits< typename MatrixViewType::scalar_type >::magnitudeType singular_values[],
00082       MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
00083       MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
00084     {
00085       typedef typename MatrixViewType::ordinal_type ordinal_type;
00086       typedef typename MatrixViewType::scalar_type scalar_type;
00087       typedef typename Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type;
00088       using std::vector;
00089 
00090       const bool b_local_debug = false;
00091 
00092       const int rootProc = 0;
00093       const int nprocs = ordinalMessenger->size();
00094       const int myRank = ordinalMessenger->rank();
00095       BLAS< ordinal_type, scalar_type > blas;
00096 
00097       const ordinal_type nrowsLocal = A_local.nrows();
00098       const ordinal_type ncols = A_local.ncols();
00099 
00100       // Theory: Suppose there are P processors.  Proc q wants an m_q by n
00101       // component of the matrix A, which we write as A_q.  On Proc 0, we
00102       // generate random m_q by n orthogonal matrices Q_q (in explicit
00103       // form), and send Q_q to Proc q.  The m by n matrix [Q_0; Q_1; ...;
00104       // Q_{P-1}] is not itself orthogonal.  However, the m by n matrix
00105       // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
00106       // 
00107       // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.
00108 
00109       if (myRank == rootProc)
00110   {
00111     typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
00112     matgen_type matGen (*pGenerator);
00113 
00114     // Generate a random ncols by ncols upper triangular matrix
00115     // R with the given singular values.
00116     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
00117     matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);
00118 
00119     // Broadcast R to all the processors.
00120     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00121 
00122     // Generate (for myself) a random nrowsLocal x ncols
00123     // orthogonal matrix, stored in explicit form.
00124     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00125     matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());
00126 
00127     // Scale the (local) orthogonal matrix by the number of
00128     // processors P, to make the columns of the global matrix Q
00129     // orthogonal.  (Otherwise the norm of each column will be P
00130     // instead of 1.)
00131     const scalar_type P = static_cast< scalar_type > (nprocs);
00132     // Do overflow check.  If casting P back to scalar_type
00133     // doesn't produce the same value as nprocs, the cast
00134     // overflowed.  We take the real part, because scalar_type
00135     // might be complex.
00136     if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
00137       throw std::runtime_error ("Casting nprocs to Scalar failed");
00138 
00139     scaleMatrix (Q_local, P);
00140 
00141     // A_local := Q_local * R
00142     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols,
00143          scalar_type(1), Q_local.get(), Q_local.lda(), 
00144          R.get(), R.lda(), 
00145          scalar_type(0), A_local.get(), A_local.lda());
00146 
00147     for (int recvProc = 1; recvProc < nprocs; ++recvProc)
00148       {
00149         // Ask the receiving processor how big (i.e., how many rows)
00150         // its local component of the matrix is.
00151         ordinal_type nrowsRemote = 0;
00152         ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);
00153 
00154         if (b_local_debug)
00155     {
00156       std::ostringstream os;
00157       os << "For Proc " << recvProc << ": local block is " 
00158          << nrowsRemote << " by " << ncols << std::endl;
00159       std::cerr << os.str();
00160     }
00161 
00162         // Make sure Q_local is big enough to hold the data for
00163         // the current receiver proc.
00164         Q_local.reshape (nrowsRemote, ncols);
00165 
00166         // Compute a random nrowsRemote * ncols orthogonal
00167         // matrix Q_local, for the current receiving processor.
00168         matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());
00169 
00170         // Send Q_local to the current receiving processor.
00171         scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
00172       }
00173   }
00174       else
00175   {
00176     // Receive the R factor from Proc 0.  There's only 1 R
00177     // factor for all the processes.
00178     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0));
00179     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00180 
00181     // Q_local (nrows_local by ncols, random orthogonal matrix)
00182     // will be received from Proc 0, where it was generated.
00183     const ordinal_type recvSize = nrowsLocal * ncols;
00184     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00185 
00186     // Tell Proc 0 how many rows there are in the random orthogonal
00187     // matrix I want to receive from Proc 0.
00188     ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0);
00189 
00190     // Receive the orthogonal matrix from Proc 0.
00191     scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0);
00192 
00193     // Scale the (local) orthogonal matrix by the number of
00194     // processors, to make the global matrix Q orthogonal.
00195     const scalar_type P = static_cast< scalar_type > (nprocs);
00196     // Do overflow check.  If casting P back to scalar_type
00197     // doesn't produce the same value as nprocs, the cast
00198     // overflowed.  We take the real part, because scalar_type
00199     // might be complex.
00200     if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
00201       throw std::runtime_error ("Casting nprocs to Scalar failed");
00202     scaleMatrix (Q_local, P);
00203 
00204     // A_local := Q_local * R
00205     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols, 
00206          scalar_type(1), Q_local.get(), Q_local.lda(),
00207          R.get(), R.lda(),
00208          scalar_type(0), A_local.get(), A_local.lda());
00209   }
00210     }
00211   } // namespace Random
00212 } // namespace TSQR
00213 
00214 #endif // __Tsqr_Random_GlobalMatrix_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends