Teuchos - Trilinos Tools Package Version of the Day
Teuchos_MatrixMarket_Raw_Reader.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef __Teuchos_MatrixMarket_Raw_Reader_hpp
00043 #define __Teuchos_MatrixMarket_Raw_Reader_hpp
00044 
00045 #include "Teuchos_MatrixMarket_Raw_Adder.hpp"
00046 #include "Teuchos_MatrixMarket_SymmetrizingAdder.hpp"
00047 #include "Teuchos_MatrixMarket_CoordDataReader.hpp"
00048 
00049 
00050 namespace Teuchos {
00071   namespace MatrixMarket {
00087     namespace Raw {
00100       template<class Scalar, class Ordinal>
00101       class Reader {
00102       public:
00109         Reader (const bool tolerant, const bool debug) :
00110           tolerant_ (tolerant), debug_ (debug)
00111         {
00112           init ();
00113         }
00114 
00116         Reader () :
00117           tolerant_ (false), debug_ (false)
00118         {
00119           init ();
00120         }
00121 
00129         Reader (const RCP<ParameterList>& params) :
00130           tolerant_ (false), debug_ (false)
00131         {
00132           setParameters (params);
00133           init ();
00134         }
00135 
00139         void
00140         setParameters (const RCP<ParameterList>& params)
00141         {
00142           // Default parameter values.
00143           bool tolerant = false;
00144           bool debug = false;
00145 
00146           // Read parameters.
00147           tolerant = params->get ("Parse tolerantly", tolerant);
00148           debug = params->get ("Debug mode", debug);
00149 
00150           // No side effects on the class until ParameterList
00151           // processing is complete.
00152           tolerant_ = tolerant;
00153           debug_ = debug;
00154         }
00155 
00157         RCP<const ParameterList>
00158         getValidParameters () const
00159         {
00160           // Default parameter values.
00161           const bool tolerant = false;
00162           const bool debug = false;
00163 
00164           // Set default parameters with documentation.
00165           RCP<ParameterList> params = parameterList ("Matrix Market Reader");
00166           params->set ("Parse tolerantly", tolerant, "Whether to tolerate "
00167                        "syntax errors when parsing the Matrix Market file");
00168           params->set ("Debug mode", debug, "Whether to print debugging output "
00169                        "to stderr, on all participating MPI processes");
00170 
00171           return rcp_const_cast<const ParameterList> (params);
00172         }
00173 
00199         bool
00200         readFile (ArrayRCP<Ordinal>& rowptr,
00201                   ArrayRCP<Ordinal>& colind,
00202                   ArrayRCP<Scalar>& values,
00203                   Ordinal& numRows,
00204                   Ordinal& numCols,
00205                   const std::string& filename)
00206         {
00207           std::ifstream in (filename.c_str ());
00208           TEUCHOS_TEST_FOR_EXCEPTION(! in, std::runtime_error,
00209             "Failed to open file \"" << filename << "\" for reading.");
00210           return read (rowptr, colind, values, numRows, numCols, in);
00211         }
00212 
00239         bool
00240         read (ArrayRCP<Ordinal>& rowptr,
00241               ArrayRCP<Ordinal>& colind,
00242               ArrayRCP<Scalar>& values,
00243               Ordinal& numRows,
00244               Ordinal& numCols,
00245               std::istream& in)
00246         {
00247           using std::cerr;
00248           using std::cout;
00249           using std::endl;
00250           typedef ScalarTraits<Scalar> STS;
00251 
00252           // This "Adder" knows how to add sparse matrix entries,
00253           // given a line of data from the file.  It also stores the
00254           // entries and can sort them.
00255           typedef Adder<Scalar, Ordinal> raw_adder_type;
00256           // SymmetrizingAdder "advices" (yes, I'm using that as a verb)
00257           // the original Adder, so that additional entries are filled
00258           // in symmetrically, if the Matrix Market banner line
00259           // specified a symmetry type other than "general".
00260           typedef SymmetrizingAdder<raw_adder_type> adder_type;
00261 
00262           // Current line number of the input stream.
00263           size_t lineNumber = 1;
00264 
00265           // Construct the "Banner" (matrix metadata, including type
00266           // and symmetry information, but not dimensions).
00267           RCP<const Banner> banner;
00268           std::ostringstream err;
00269           try {
00270             banner = readBanner (in, lineNumber);
00271           }
00272           catch (std::exception& e) {
00273             err << "Failed to read Matrix Market input's Banner: " << e.what();
00274             if (tolerant_) {
00275               if (debug_) {
00276                 cerr << err.str() << endl;
00277               }
00278               return false;
00279             }
00280             else {
00281               TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, err.str());
00282             }
00283           }
00284 
00285           //
00286           // Validate the metadata in the Banner.
00287           //
00288           bool ok = true;
00289           if (banner->matrixType () != "coordinate") {
00290             err << "Matrix Market input file must contain a \"coordinate\"-"
00291               "format sparse matrix in order to create a sparse matrix object "
00292               "from it.";
00293             ok = false;
00294           }
00295           else if (! STS::isComplex && banner->dataType () == "complex") {
00296             err << "The Matrix Market sparse matrix file contains complex-"
00297               "valued data, but you are try to read the data into a sparse "
00298               "matrix containing real values (your matrix's Scalar type is "
00299               "real).";
00300             ok = false;
00301           }
00302           else if (banner->dataType () != "real" &&
00303                    banner->dataType () != "complex") {
00304             err << "Only real or complex data types (no pattern or integer "
00305               "matrices) are currently supported.";
00306             ok = false;
00307           }
00308           if (! ok) {
00309             if (tolerant_) {
00310               if (debug_) {
00311                 cerr << "Matrix Market banner is invalid: " << err.str () << endl;
00312                 return false;
00313               }
00314             }
00315             else {
00316               TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
00317                 "Matrix Market banner is invalid: " << err.str ());
00318             }
00319           }
00320           if (debug_) {
00321             cerr << "Matrix Market Banner line:" << endl << *banner << endl;
00322           }
00323 
00324           // The reader will invoke the adder (see below) once for
00325           // each matrix entry it reads from the input stream.
00326           typedef CoordDataReader<adder_type, Ordinal, Scalar, STS::isComplex> reader_type;
00327           // We will set the adder below, after calling readDimensions().
00328           reader_type reader;
00329 
00330           // Read in the dimensions of the sparse matrix: (# rows, #
00331           // columns, # matrix entries (counting duplicates as
00332           // separate entries)).  The second element of the pair tells
00333           // us whether the values were gotten successfully.
00334           std::pair<Tuple<Ordinal, 3>, bool> dims =
00335             reader.readDimensions (in, lineNumber, tolerant_);
00336           if (! dims.second) {
00337             err << "Error reading Matrix Market sparse matrix file: failed to "
00338               "read coordinate dimensions.";
00339             if (tolerant_) {
00340               if (debug_) {
00341                 cerr << err.str () << endl;
00342               }
00343               return false;
00344             }
00345             else {
00346               TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, err.str ());
00347             }
00348           }
00349 
00350           // These are "expected" values read from the input stream's
00351           // metadata.  The actual matrix entries read from the input
00352           // stream might not conform to their constraints.  We allow
00353           // such nonconformity only in "tolerant" mode; otherwise, we
00354           // throw an exception.
00355           numRows = dims.first[0];
00356           numCols = dims.first[1];
00357           const Ordinal numEntries = dims.first[2];
00358           if (debug_) {
00359             cerr << "Reported dimensions: " << numRows << " x " << numCols
00360                  << ", with " << numEntries << " entries (counting possible "
00361                  << "duplicates)." << endl;
00362           }
00363 
00364           // The "raw" adder knows about the expected matrix
00365           // dimensions, but doesn't know about symmetry.
00366           RCP<raw_adder_type> rawAdder =
00367             rcp (new raw_adder_type (numRows, numCols, numEntries,
00368                                      tolerant_, debug_));
00369           // The symmetrizing adder knows about symmetry.  It mediates
00370           // adding entries to the "raw" adder.  We'll use the raw
00371           // adder to compute the CSR arrays.
00372           RCP<adder_type> adder =
00373             rcp (new adder_type (rawAdder, banner->symmType ()));
00374 
00375           // Give the adder to the reader.
00376           reader.setAdder (adder);
00377 
00378           // Read the sparse matrix entries.  "results" just tells us if
00379           // and where there were any bad lines of input.  The actual
00380           // sparse matrix entries are stored in the (raw) Adder object.
00381           std::pair<bool, std::vector<size_t> > results =
00382             reader.read (in, lineNumber, tolerant_, debug_);
00383 
00384           // Report any bad line number(s).
00385           if (! results.first) {
00386             err << "The Matrix Market input stream had syntax error(s)."
00387               "  Here is the error report." << endl;
00388             reportBadness (err, results);
00389             if (tolerant_) {
00390               if (debug_) {
00391                 cerr << err.str() << endl;
00392               }
00393               return false;
00394             }
00395             else {
00396               TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, err.str ());
00397             }
00398           }
00399           // Done reading the sparse matrix; now extract CSR arrays.
00400           size_t numUnique, numRemoved;
00401           ArrayRCP<Ordinal> ptr;
00402           ArrayRCP<Ordinal> ind;
00403           ArrayRCP<Scalar> val;
00404           try {
00405             rawAdder->mergeAndConvertToCSR (numUnique, numRemoved, ptr, ind, val);
00406           }
00407           catch (std::exception& e) {
00408             err << "Failed to convert sparse matrix data to CSR (compressed "
00409               "sparse row) format.  Reported error: " << e.what ();
00410             if (tolerant_) {
00411               if (debug_) {
00412                 cerr << err.str () << endl;
00413               }
00414               return false;
00415             }
00416             else {
00417               TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, err.str ());
00418             }
00419           }
00420           rowptr = ptr;
00421           colind = ind;
00422           values = val;
00423           return true;
00424         }
00425 
00426       private:
00428         bool tolerant_;
00430         bool debug_;
00431 
00438         void init () {
00439           using std::cerr;
00440           using std::endl;
00441 
00442           if (debug_) {
00443             cerr << "MatrixMarket::Raw::Reader:" << endl
00444                  << "- Tolerant mode: " << tolerant_ << endl
00445                  << "- Debug mode: " << debug_ << endl;
00446           }
00447         }
00448 
00461         RCP<const Banner>
00462         readBanner (std::istream& in, size_t& lineNumber)
00463         {
00464           using std::cerr;
00465           using std::endl;
00466           std::string line; // The presumed banner line
00467 
00468           // The first line of the Matrix Market file should always be
00469           // the banner line.  In tolerant mode, we allow comment
00470           // lines before the banner line.  This complicates detection
00471           // of comment lines a bit.
00472           if (tolerant_) {
00473             // Keep reading lines until we get a noncomment line.
00474             const bool maybeBannerLine = true;
00475             size_t numLinesRead = 0;
00476             bool commentLine = false;
00477             do {
00478               // Try to read a line from the input stream.
00479               const bool readFailed = ! getline (in, line);
00480               TEUCHOS_TEST_FOR_EXCEPTION(readFailed, std::invalid_argument,
00481                 "Failed to get Matrix Market banner line from input, after reading "
00482                 << numLinesRead << "line" << (numLinesRead != 1 ? "s." : "."));
00483               // We read a line from the input stream.
00484               ++lineNumber;
00485               ++numLinesRead;
00486               size_t start, size; // Output args of checkCommentLine
00487               commentLine = checkCommentLine (line, start, size, lineNumber,
00488                                               tolerant_, maybeBannerLine);
00489             } while (commentLine); // Loop until we find a noncomment line.
00490           }
00491           else {
00492             const bool readFailed = ! getline (in, line);
00493             TEUCHOS_TEST_FOR_EXCEPTION(readFailed, std::invalid_argument,
00494               "Failed to get Matrix Market banner line from input.  This "
00495               "probably means that the file is empty (contains zero lines).");
00496           }
00497 
00498           if (debug_) {
00499             cerr << "Raw::Reader::readBanner: Here is the presumed banner line:"
00500                  << endl << line << endl;
00501           }
00502 
00503           // Assume that the noncomment line we found is the banner line.
00504           RCP<Banner> banner;
00505           try {
00506             banner = rcp (new Banner (line, tolerant_));
00507           } catch (std::exception& e) {
00508             TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
00509               "Matrix Market file's banner line contains syntax error(s): "
00510               << e.what ());
00511           }
00512           return rcp_const_cast<const Banner> (banner);
00513         }
00514 
00519         void
00520         reportBadness (std::ostream& out,
00521                        const std::pair<bool, std::vector<size_t> >& results)
00522         {
00523           using std::endl;
00524           const size_t numErrors = results.second.size();
00525           const size_t maxNumErrorsToReport = 20;
00526           out << numErrors << " errors when reading Matrix Market sparse "
00527             "matrix file." << endl;
00528           if (numErrors > maxNumErrorsToReport) {
00529             out << "-- We do not report individual errors when there "
00530               "are more than " << maxNumErrorsToReport << ".";
00531           }
00532           else if (numErrors == 1) {
00533             out << "Error on line " << results.second[0] << endl;
00534           }
00535           else if (numErrors > 1) {
00536             out << "Errors on lines {";
00537             for (size_t k = 0; k < numErrors-1; ++k) {
00538               out << results.second[k] << ", ";
00539             }
00540             out << results.second[numErrors-1] << "}" << endl;
00541           }
00542         }
00543       }; // end of class Reader
00544     } // namespace Raw
00545   } // namespace MatrixMarket
00546 } // namespace Teuchos
00547 
00548 #endif // __Teuchos_MatrixMarket_Raw_Reader_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines