Anasazi Version of the Day
Tsqr_GlobalVerify.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_Tsqr_GlobalVerify_hpp
00030 #define __TSQR_Tsqr_GlobalVerify_hpp
00031 
00032 #include <Tsqr_LocalVerify.hpp>
00033 #include <Tsqr_MessengerBase.hpp>
00034 #include <Tsqr_ScalarTraits.hpp>
00035 #include <Tsqr_Blas.hpp>
00036 #include <Tsqr_Util.hpp>
00037 
00038 #include <utility> // std::pair
00039 #include <vector>
00040 
00043 
00044 namespace TSQR {
00045 
00046   // Unfortunately, you need c++0x support to have default template
00047   // arguments of template functions.  Otherwise we would make this a
00048   // template function and set the default value of isComplex to
00049   // ScalarTraits< Scalar >::is_complex.  Also, C++ doesn't like
00050   // partial specialization of template functions, for no good reason.
00051   // So we have to make this a class.
00052   template< class Scalar, bool isComplex = ScalarTraits< Scalar >::is_complex >
00053   class GlobalSummer {
00054   public:
00055     typedef Scalar scalar_type;
00056     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00057 
00058     static magnitude_type
00059     sum (const Scalar& localSum,
00060    MessengerBase< Scalar >* const messenger);
00061   };
00062 
00063   // Complex-arithmetic "forward declaration"
00064   template< class Scalar >
00065   class GlobalSummer< Scalar, true > {
00066   public:
00067     typedef Scalar scalar_type;
00068     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00069 
00070     static magnitude_type
00071     sum (const Scalar& localSum,
00072    MessengerBase< Scalar >* const messenger);
00073   };
00074 
00075   // Real-arithmetic "forward declaration"
00076   template< class Scalar >
00077   class GlobalSummer< Scalar, false > {
00078   public:
00079     typedef Scalar scalar_type;
00080     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00081 
00082     static magnitude_type
00083     sum (const Scalar& localSum,
00084    MessengerBase< Scalar >* const messenger);
00085   };
00086 
00087   // Complex-arithmetic case
00088   template< class Scalar >
00089   typename ScalarTraits< Scalar >::magnitude_type
00090   GlobalSummer< Scalar, true >::sum (const Scalar& localSum,
00091              MessengerBase< Scalar >* const messenger)
00092   {
00093     // In order to use a MessengerBase<Scalar> on magnitude_type
00094     // values, we have to convert local_result to a Scalar, and then
00095     // convert back the result.  We convert by setting the real
00096     // component of the Scalar to the magnitude_type.  This isn't
00097     // guaranteed to work if magnitude_type has a greater dynamic
00098     // range than Scalar.  That's possible, but that's not how we do
00099     // things with ScalarTraits< std::complex< T > >, and that's not
00100     // how LAPACK does it either, so it's fair to assume that
00101     // magnitude_type and the individual components of Scalar have the
00102     // same dynamic range.
00103     const magnitude_type localSumAbs = ScalarTraits< Scalar >::abs (localSum);
00104     const Scalar localSumAsScalar (localSumAbs, magnitude_type(0));
00105     const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar);
00106     const magnitude_type globalSum = ScalarTraits< Scalar >::abs (globalSumAsScalar);
00107     return globalSum;
00108   }
00109 
00110   // Real-arithmetic case
00111   template< class Scalar >
00112   typename ScalarTraits< Scalar >::magnitude_type
00113   GlobalSummer< Scalar, false >::sum (const Scalar& localSum,
00114               MessengerBase< Scalar >* const messenger)
00115   {
00116     const Scalar localSumAsScalar (localSum);
00117     const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar);
00118     const magnitude_type globalSum = ScalarTraits< Scalar >::abs (globalSumAsScalar);
00119     return globalSum;
00120   }
00121 
00122   template< class LocalOrdinal, class Scalar >
00123   typename ScalarTraits< Scalar >::magnitude_type
00124   global_frobenius_norm (const LocalOrdinal nrows_local, 
00125        const LocalOrdinal ncols,
00126        const Scalar A_local[],
00127        const LocalOrdinal lda_local,
00128        MessengerBase< Scalar >* const messenger)
00129   {
00130     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00131 
00132     // FIXME (mfh 20 Apr 2010) This is currently implemented using an
00133     // all-reduction.  This may result in different processors getting
00134     // slightly different answers, due to floating-point arithmetic
00135     // roundoff.  We might not want this if we are using this function
00136     // to test a routine.
00137 
00138     magnitude_type local_result (0);
00139     for (LocalOrdinal j = 0; j < ncols; j++)
00140       {
00141   const Scalar* const cur_col = &A_local[j*lda_local];
00142   for (LocalOrdinal i = 0; i < nrows_local; ++i)
00143     {
00144       const magnitude_type abs_xi = ScalarTraits< Scalar >::abs (cur_col[i]);
00145       local_result = local_result + abs_xi * abs_xi;
00146     }
00147       }
00148     // GlobalSummmer() is a hack to let us use a Scalar - type
00149     // MessengerBase with magnitude_type inputs and outputs.
00150     // Otherwise we would need to carry around a MessengerBase<
00151     // magnitude_type > object as well.
00152     const magnitude_type globalResult = 
00153       GlobalSummer< Scalar, ScalarTraits< Scalar >::is_complex >::sum (local_result, messenger);
00154     return sqrt (globalResult);
00155   }
00156 
00157   template< class LocalOrdinal, class Scalar >
00158   std::pair< typename ScalarTraits< Scalar >::magnitude_type, typename ScalarTraits< Scalar >::magnitude_type >
00159   global_verify (const LocalOrdinal nrows_local, 
00160      const LocalOrdinal ncols, 
00161      const Scalar A_local[],
00162      const LocalOrdinal lda_local,
00163      const Scalar Q_local[],
00164      const LocalOrdinal ldq_local,
00165      const Scalar R[],
00166      const LocalOrdinal ldr,
00167      MessengerBase< Scalar >* const messenger)
00168   {
00169     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00170     using std::make_pair;
00171     using std::pair;
00172     using std::vector;
00173 
00174     const magnitude_type ZERO (0);
00175     const magnitude_type ONE (1);
00176     BLAS< LocalOrdinal, Scalar > blas;
00177 
00178     //
00179     // Compute $\| I - Q^T * Q \|_F$
00180     //
00181 
00182     // Compute Q_local^T * Q_local (this node's component of Q^T*Q)
00183     vector< Scalar > Temp (ncols*ncols, std::numeric_limits< Scalar >::quiet_NaN());
00184     const LocalOrdinal ld_temp = ncols;
00185 
00186     if (ScalarTraits< Scalar >::is_complex)
00187       blas.GEMM ("C", "N", ncols, ncols, nrows_local, 
00188      ONE, Q_local, ldq_local, Q_local, ldq_local, 
00189      ZERO, &Temp[0], ld_temp);
00190     else
00191       blas.GEMM ("T", "N", ncols, ncols, nrows_local, 
00192      ONE, Q_local, ldq_local, Q_local, ldq_local, 
00193      ZERO, &Temp[0], ld_temp);
00194   
00195     // Reduce over all the processors to get the global Q^T*Q in Temp2.
00196     vector< Scalar > Temp2 (ncols*ncols, std::numeric_limits< Scalar >::quiet_NaN());
00197     messenger->globalVectorSum (&Temp[0], &Temp2[0], ncols*ncols);
00198 
00199     // Compute I-(Q^T*Q) redundantly on all processors
00200     for (LocalOrdinal j = 0; j < ncols; j++)
00201       Temp2[j + j*ld_temp] = ONE - Temp2[j + j*ld_temp];
00202   
00203     // Compute the Frobenius norm of I - Q^T*Q, redundantly on all processors.
00204     const magnitude_type Orthog_F = 
00205       local_frobenius_norm (ncols, ncols, &Temp2[0], ld_temp);
00206 
00207     // Compute the Frobenius norm of A.  
00208     const magnitude_type A_F = 
00209       global_frobenius_norm (nrows_local, ncols, &A_local[0], lda_local, messenger);
00210 
00211     //
00212     // Compute $\| A - Q*R \|_F$
00213     //
00214 
00215     vector< Scalar > Resid (nrows_local * ncols, std::numeric_limits< Scalar >::quiet_NaN());
00216     const LocalOrdinal ld_resid = nrows_local;
00217 
00218     // Resid := A (deep copy)
00219     copy_matrix (nrows_local, ncols, &Resid[0], ld_resid, A_local, lda_local);
00220 
00221     // Resid := Resid - Q*R
00222     blas.GEMM ("N", "N", nrows_local, ncols, ncols, 
00223          -ONE, Q_local, ldq_local, R, ldr, 
00224          ONE, &Resid[0], ld_resid);
00225 
00226     const magnitude_type Resid_F = 
00227       global_frobenius_norm (nrows_local, ncols, &Resid[0], ld_resid, messenger);
00228 
00229     return make_pair (Resid_F / A_F, Orthog_F / A_F);
00230   }
00231 
00232 } // namespace TSQR
00233 
00234 #endif // __TSQR_Tsqr_GlobalVerify_hpp
00235 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends