tokenize.hpp

00001 /**********************************************************************
00002  * tokenize() - Tokenizes a string into parts by separators.          *
00003  * Copyright (C) 2000  C. Brandon Forehand                            *
00004  * <b4hand@users.sourceforge.net>                                     *
00005  *                                                                    *
00006  * This code is free software; you can redistribute it and/or         *
00007  * modify it under the terms of the GNU General Public License        *
00008  * as published by the Free Software Foundation; either version 2     *
00009  * of the License, or (at your option) any later version.             *
00010  *                                                                    *
00011  * This program is distributed in the hope that it will be useful,    *
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of     *
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
00014  * GNU General Public License for more details.                       *
00015  **********************************************************************/
00016 
00017 #ifndef STK_IO_UTIL_TOKENIZE_H
00018 #define STK_IO_UTIL_TOKENIZE_H
00019 
00020 #include <algorithm>
00021 #include <functional>
00022 #include <string>
00023 #include <vector>
00024 
00025 namespace stk {
00026   namespace io {
00027     namespace util {
00028       //-----------------------------------------------------------------------------
00031       //----------------------------------------------------------------------------
00032       struct is_space : public std::unary_function<char,bool>
00033       {
00034   bool operator() (char c) const
00035   { return static_cast<bool>(isspace(c)); }
00036       };
00037 
00038       struct is_punct : public std::unary_function<char,bool>
00039       {
00040   bool operator() (char c) const
00041   { return static_cast<bool>(isspace(c) || ispunct(c)); }
00042       };
00043 
00044       //----------------------------------------------------------------------------
00049       //----------------------------------------------------------------------------
00050       class recognize : public std::unary_function<char,bool>
00051       {
00052       public:
00053   explicit recognize(const std::string &str) : mStr (str) {}
00054   bool operator() (char c) const
00055   { return (mStr.end() != std::find(mStr.begin(),mStr.end(),c)); }
00056 
00057       private:
00058   std::string mStr;
00059       };
00060 
00061       //----------------------------------------------------------------------------
00065       //----------------------------------------------------------------------------
00066       template <class Pred>
00067       inline std::vector<std::string> tokenize (const std::string &s, Pred p)
00068       {
00069   using namespace std;
00070 
00071   vector<string> result;
00072   string::const_iterator i = s.begin();
00073   string::const_iterator tokenEnd = s.begin();
00074 
00075   while (i != s.end())
00076     {
00077       // Eat seperators
00078       while (p(*i))
00079         i++;
00080 
00081       // Find next token
00082       tokenEnd = find_if(i,s.end(),p);
00083 
00084       // Append token to result
00085       if (i != tokenEnd)
00086         result.push_back(string(i,tokenEnd));
00087 
00088       i = tokenEnd;
00089     }
00090 
00091   return result;
00092       }
00093 
00094       //----------------------------------------------------------------------------
00097       //----------------------------------------------------------------------------
00098       inline std::vector<std::string> tokenize (const std::string &s)
00099       {
00100   return tokenize(s,is_space());
00101       }
00102     }
00103   }
00104 }
00105 #endif
00106 
00107 #if 0
00108 #include <iostream>
00109 using std::cout;
00110 using std::cin;
00111 
00112 typedef std::vector<std::string> TokenList;
00113 
00114 int main()
00115 {
00116   char s[128];
00117   while(!cin.eof()) {
00118     cout << "Enter a string: ";
00119     cin.getline(s,128);
00120     std::string input_line(s);
00121     if (input_line != "quit") {
00122       std::vector<std::string> tokens = tokenize(input_line);
00123       cout << "There were " << tokens.size() << " tokens in the line\n";
00124       TokenList::const_iterator I = tokens.begin();
00125       while (I != tokens.end()) {
00126   cout << "'" << *I++ << "'\t";
00127       }
00128       cout << '\n';
00129     } else {
00130       exit(0);
00131     }
00132   }
00133 }
00134 #endif
Generated on Wed Apr 13 10:05:49 2011 for Sierra Toolkit by  doxygen 1.6.3