Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_GlobalVerify.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_Tsqr_GlobalVerify_hpp
00030 #define __TSQR_Tsqr_GlobalVerify_hpp
00031 
00032 #include <Tsqr_LocalVerify.hpp>
00033 #include <Tsqr_MessengerBase.hpp>
00034 #include <Tsqr_Blas.hpp>
00035 #include <Tsqr_Util.hpp>
00036 
00037 #include <Teuchos_ScalarTraits.hpp>
00038 
00039 #include <utility> // std::pair
00040 #include <vector>
00041 
00042 
00043 namespace TSQR {
00044 
00057   template<class Scalar, bool isComplex = Teuchos::ScalarTraits< Scalar >::isComplex>
00058   class GlobalSummer {
00059   public:
00060     typedef Scalar scalar_type;
00061     typedef Teuchos::ScalarTraits< Scalar > STS;
00062     typedef typename STS::magnitudeType magnitude_type;
00063 
00064     static magnitude_type
00065     sum (const Scalar& localSum,
00066    MessengerBase<Scalar>* const messenger);
00067   };
00068 
00069   // Complex-arithmetic "forward declaration"
00070   template<class Scalar>
00071   class GlobalSummer<Scalar, true> {
00072   public:
00073     typedef Scalar scalar_type;
00074     typedef Teuchos::ScalarTraits<Scalar> STS;
00075     typedef typename STS::magnitudeType magnitude_type;
00076 
00077     static magnitude_type
00078     sum (const Scalar& localSum,
00079    MessengerBase<Scalar>* const messenger);
00080   };
00081 
00082   // Real-arithmetic "forward declaration"
00083   template<class Scalar>
00084   class GlobalSummer<Scalar, false> {
00085   public:
00086     typedef Scalar scalar_type;
00087     typedef Teuchos::ScalarTraits<Scalar> STS;
00088     typedef typename STS::magnitudeType magnitude_type;
00089 
00090     static magnitude_type
00091     sum (const Scalar& localSum,
00092    MessengerBase<Scalar>* const messenger);
00093   };
00094 
00095   // Complex-arithmetic case
00096   template<class Scalar>
00097   typename GlobalSummer<Scalar, true>::magnitude_type
00098   GlobalSummer<Scalar, true>::sum (const Scalar& localSum,
00099            MessengerBase<Scalar>* const messenger)
00100   {
00101     // In order to use a MessengerBase<Scalar> on magnitude_type
00102     // values, we have to convert local_result to a Scalar, and then
00103     // convert back the result.  We convert by setting the real
00104     // component of the Scalar to the magnitude_type.  This isn't
00105     // guaranteed to work if magnitude_type has a greater dynamic
00106     // range than Scalar.  That's possible, but that's not how we do
00107     // things with ScalarTraits< std::complex< T > >, and that's not
00108     // how LAPACK does it either, so it's fair to assume that
00109     // magnitude_type and the individual components of Scalar have the
00110     // same dynamic range.
00111     const magnitude_type localSumAbs = STS::magnitude (localSum);
00112     const Scalar localSumAsScalar (localSumAbs, magnitude_type(0));
00113     const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar);
00114     const magnitude_type globalSum = STS::magnitude (globalSumAsScalar);
00115     return globalSum;
00116   }
00117 
00118   // Real-arithmetic case
00119   template<class Scalar>
00120   typename GlobalSummer<Scalar, false>::magnitude_type
00121   GlobalSummer<Scalar, false>::sum (const Scalar& localSum,
00122             MessengerBase<Scalar>* const messenger)
00123   {
00124     const Scalar localSumAsScalar (localSum);
00125     const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar);
00126     const magnitude_type globalSum = STS::magnitude (globalSumAsScalar);
00127     return globalSum;
00128   }
00129 
00130   template<class LocalOrdinal, class Scalar>
00131   typename Teuchos::ScalarTraits<Scalar>::magnitudeType
00132   global_frobenius_norm (const LocalOrdinal nrows_local, 
00133        const LocalOrdinal ncols,
00134        const Scalar A_local[],
00135        const LocalOrdinal lda_local,
00136        MessengerBase<Scalar>* const messenger)
00137   {
00138     typedef Teuchos::ScalarTraits<Scalar> STS;
00139     typedef typename STS::magnitudeType magnitude_type;
00140 
00141     // FIXME (mfh 20 Apr 2010) This is currently implemented using an
00142     // all-reduction.  This may result in different processors getting
00143     // slightly different answers, due to floating-point arithmetic
00144     // roundoff.  We might not want this if we are using this function
00145     // to test a routine.
00146 
00147     magnitude_type localResult (0);
00148     for (LocalOrdinal j = 0; j < ncols; j++)
00149       {
00150   const Scalar* const cur_col = &A_local[j*lda_local];
00151   for (LocalOrdinal i = 0; i < nrows_local; ++i)
00152     {
00153       const magnitude_type abs_xi = STS::magnitude (cur_col[i]);
00154       localResult = localResult + abs_xi * abs_xi;
00155     }
00156       }
00157     // GlobalSummmer() is a hack to let us use a Scalar - type
00158     // MessengerBase with magnitude_type inputs and outputs.
00159     // Otherwise we would need to carry around a
00160     // MessengerBase<magnitude_type> object as well.
00161     const magnitude_type globalResult = 
00162       GlobalSummer<Scalar, STS::isComplex>::sum (localResult, messenger);
00163     return sqrt (globalResult);
00164   }
00165 
00166   template<class LocalOrdinal, class Scalar>
00167   std::vector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
00168   global_verify (const LocalOrdinal nrows_local, 
00169      const LocalOrdinal ncols, 
00170      const Scalar A_local[],
00171      const LocalOrdinal lda_local,
00172      const Scalar Q_local[],
00173      const LocalOrdinal ldq_local,
00174      const Scalar R[],
00175      const LocalOrdinal ldr,
00176      MessengerBase<Scalar>* const messenger)
00177   {
00178     typedef Teuchos::ScalarTraits<Scalar> STS;
00179     typedef typename STS::magnitudeType magnitude_type;
00180     using std::make_pair;
00181     using std::pair;
00182     using std::vector;
00183 
00184     const magnitude_type ZERO (0);
00185     const magnitude_type ONE (1);
00186     BLAS<LocalOrdinal, Scalar> blas;
00187 
00188     //
00189     // Compute $\| I - Q^T * Q \|_F$
00190     //
00191 
00192     // Compute Q_local^T * Q_local (this node's component of Q^T*Q)
00193     vector<Scalar> Temp (ncols*ncols, STS::nan());
00194     const LocalOrdinal ld_temp = ncols;
00195 
00196     if (STS::isComplex)
00197       blas.GEMM ("C", "N", ncols, ncols, nrows_local, 
00198      ONE, Q_local, ldq_local, Q_local, ldq_local, 
00199      ZERO, &Temp[0], ld_temp);
00200     else
00201       blas.GEMM ("T", "N", ncols, ncols, nrows_local, 
00202      ONE, Q_local, ldq_local, Q_local, ldq_local, 
00203      ZERO, &Temp[0], ld_temp);
00204   
00205     // Reduce over all the processors to get the global Q^T*Q in Temp2.
00206     vector<Scalar> Temp2 (ncols*ncols, STS::nan());
00207     messenger->globalVectorSum (&Temp[0], &Temp2[0], ncols*ncols);
00208 
00209     // Compute I-(Q^T*Q) redundantly on all processors
00210     for (LocalOrdinal j = 0; j < ncols; j++)
00211       Temp2[j + j*ld_temp] = ONE - Temp2[j + j*ld_temp];
00212   
00213     // Compute the Frobenius norm of I - Q^T*Q, redundantly on all processors.
00214     const magnitude_type Orthog_F = 
00215       local_frobenius_norm (ncols, ncols, &Temp2[0], ld_temp);
00216 
00217     // Compute the Frobenius norm of A.  
00218     const magnitude_type A_F = 
00219       global_frobenius_norm (nrows_local, ncols, &A_local[0], lda_local, messenger);
00220 
00221     //
00222     // Compute $\| A - Q*R \|_F$
00223     //
00224 
00225     vector<Scalar> Resid (nrows_local * ncols, STS::nan());
00226     const LocalOrdinal ld_resid = nrows_local;
00227 
00228     // Resid := A (deep copy)
00229     copy_matrix (nrows_local, ncols, &Resid[0], ld_resid, A_local, lda_local);
00230 
00231     // Resid := Resid - Q*R
00232     blas.GEMM ("N", "N", nrows_local, ncols, ncols, 
00233          -ONE, Q_local, ldq_local, R, ldr, 
00234          ONE, &Resid[0], ld_resid);
00235 
00236     const magnitude_type Resid_F = 
00237       global_frobenius_norm (nrows_local, ncols, &Resid[0], ld_resid, messenger);
00238 
00239     vector<magnitude_type> results (3);
00240     results[0] = Resid_F;
00241     results[1] = Orthog_F;
00242     results[2] = A_F;
00243     return results;
00244   }
00245 
00246 } // namespace TSQR
00247 
00248 #endif // __TSQR_Tsqr_GlobalVerify_hpp
00249 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends