Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_Random_GlobalMatrix.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __Tsqr_Random_GlobalMatrix_hpp
00043 #define __Tsqr_Random_GlobalMatrix_hpp
00044 
00045 #include "Tsqr_Blas.hpp"
00046 #include "Tsqr_Matrix.hpp"
00047 #include "Tsqr_Random_MatrixGenerator.hpp"
00048 #include "Teuchos_ScalarTraits.hpp"
00049 #include "Tsqr_RMessenger.hpp"
00050 
00051 #include <algorithm>
00052 #include <functional>
00053 #include <iostream>
00054 #include <sstream>
00055 #include <stdexcept>
00056 #include <vector>
00057 
00058 
00059 namespace TSQR {
00060   namespace Random {
00061 
00062     template<class MatrixViewType>
00063     static void
00064     scaleMatrix (MatrixViewType& A, 
00065      const typename MatrixViewType::scalar_type& denom)
00066     {
00067       typedef typename MatrixViewType::ordinal_type ordinal_type;
00068       typedef typename MatrixViewType::scalar_type scalar_type;
00069 
00070       const ordinal_type nrows = A.nrows();
00071       const ordinal_type ncols = A.ncols();
00072       const ordinal_type lda = A.lda();
00073 
00074       if (nrows == lda)
00075   { // The whole A matrix is stored contiguously.
00076     const ordinal_type nelts = nrows * ncols;
00077     scalar_type* const A_ptr = A.get();
00078     std::transform (A_ptr, A_ptr + nelts, A_ptr, std::bind2nd(std::divides<scalar_type>(), denom));
00079   }
00080       else
00081   { // Each column of A is stored contiguously.
00082     for (ordinal_type j = 0; j < ncols; ++j)
00083       {
00084         scalar_type* const A_j = &A(0,j);
00085         std::transform (A_j, A_j + nrows, A_j, std::bind2nd(std::divides<scalar_type>(), denom));
00086       }
00087   }
00088     }
00089 
00090     template< class MatrixViewType, class Generator >
00091     void
00092     randomGlobalMatrix (Generator* const pGenerator, 
00093       MatrixViewType& A_local,
00094       const typename Teuchos::ScalarTraits< typename MatrixViewType::scalar_type >::magnitudeType singular_values[],
00095       MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
00096       MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
00097     {
00098       typedef typename MatrixViewType::ordinal_type ordinal_type;
00099       typedef typename MatrixViewType::scalar_type scalar_type;
00100       typedef typename Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type;
00101       using std::vector;
00102 
00103       const bool b_local_debug = false;
00104 
00105       const int rootProc = 0;
00106       const int nprocs = ordinalMessenger->size();
00107       const int myRank = ordinalMessenger->rank();
00108       BLAS< ordinal_type, scalar_type > blas;
00109 
00110       const ordinal_type nrowsLocal = A_local.nrows();
00111       const ordinal_type ncols = A_local.ncols();
00112 
00113       // Theory: Suppose there are P processors.  Proc q wants an m_q by n
00114       // component of the matrix A, which we write as A_q.  On Proc 0, we
00115       // generate random m_q by n orthogonal matrices Q_q (in explicit
00116       // form), and send Q_q to Proc q.  The m by n matrix [Q_0; Q_1; ...;
00117       // Q_{P-1}] is not itself orthogonal.  However, the m by n matrix
00118       // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
00119       // 
00120       // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.
00121 
00122       if (myRank == rootProc)
00123   {
00124     typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
00125     matgen_type matGen (*pGenerator);
00126 
00127     // Generate a random ncols by ncols upper triangular matrix
00128     // R with the given singular values.
00129     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
00130     matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);
00131 
00132     // Broadcast R to all the processors.
00133     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00134 
00135     // Generate (for myself) a random nrowsLocal x ncols
00136     // orthogonal matrix, stored in explicit form.
00137     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00138     matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());
00139 
00140     // Scale the (local) orthogonal matrix by the number of
00141     // processors P, to make the columns of the global matrix Q
00142     // orthogonal.  (Otherwise the norm of each column will be P
00143     // instead of 1.)
00144     const scalar_type P = static_cast< scalar_type > (nprocs);
00145     // Do overflow check.  If casting P back to scalar_type
00146     // doesn't produce the same value as nprocs, the cast
00147     // overflowed.  We take the real part, because scalar_type
00148     // might be complex.
00149     if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
00150       throw std::runtime_error ("Casting nprocs to Scalar failed");
00151 
00152     scaleMatrix (Q_local, P);
00153 
00154     // A_local := Q_local * R
00155     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols,
00156          scalar_type(1), Q_local.get(), Q_local.lda(), 
00157          R.get(), R.lda(), 
00158          scalar_type(0), A_local.get(), A_local.lda());
00159 
00160     for (int recvProc = 1; recvProc < nprocs; ++recvProc)
00161       {
00162         // Ask the receiving processor how big (i.e., how many rows)
00163         // its local component of the matrix is.
00164         ordinal_type nrowsRemote = 0;
00165         ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);
00166 
00167         if (b_local_debug)
00168     {
00169       std::ostringstream os;
00170       os << "For Proc " << recvProc << ": local block is " 
00171          << nrowsRemote << " by " << ncols << std::endl;
00172       std::cerr << os.str();
00173     }
00174 
00175         // Make sure Q_local is big enough to hold the data for
00176         // the current receiver proc.
00177         Q_local.reshape (nrowsRemote, ncols);
00178 
00179         // Compute a random nrowsRemote * ncols orthogonal
00180         // matrix Q_local, for the current receiving processor.
00181         matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());
00182 
00183         // Send Q_local to the current receiving processor.
00184         scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
00185       }
00186   }
00187       else
00188   {
00189     // Receive the R factor from Proc 0.  There's only 1 R
00190     // factor for all the processes.
00191     Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0));
00192     scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
00193 
00194     // Q_local (nrows_local by ncols, random orthogonal matrix)
00195     // will be received from Proc 0, where it was generated.
00196     const ordinal_type recvSize = nrowsLocal * ncols;
00197     Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
00198 
00199     // Tell Proc 0 how many rows there are in the random orthogonal
00200     // matrix I want to receive from Proc 0.
00201     ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0);
00202 
00203     // Receive the orthogonal matrix from Proc 0.
00204     scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0);
00205 
00206     // Scale the (local) orthogonal matrix by the number of
00207     // processors, to make the global matrix Q orthogonal.
00208     const scalar_type P = static_cast< scalar_type > (nprocs);
00209     // Do overflow check.  If casting P back to scalar_type
00210     // doesn't produce the same value as nprocs, the cast
00211     // overflowed.  We take the real part, because scalar_type
00212     // might be complex.
00213     if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
00214       throw std::runtime_error ("Casting nprocs to Scalar failed");
00215     scaleMatrix (Q_local, P);
00216 
00217     // A_local := Q_local * R
00218     blas.GEMM ("N", "N", nrowsLocal, ncols, ncols, 
00219          scalar_type(1), Q_local.get(), Q_local.lda(),
00220          R.get(), R.lda(),
00221          scalar_type(0), A_local.get(), A_local.lda());
00222   }
00223     }
00224   } // namespace Random
00225 } // namespace TSQR
00226 
00227 #endif // __Tsqr_Random_GlobalMatrix_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends