Difference between revisions of "CPP/TR1/Regex Tokenising"
From ProgrammingExamples
		< CPP
		
		
|  (Boost/TR1 Regular Expressions - Tokenising on multiple consecutive delimiters) | m (Vertical spacing/alignment) | ||
| (One intermediate revision by the same user not shown) | |||
| Line 1: | Line 1: | ||
| − | <source lang="cpp"> | + | <source lang="cpp">#include <string> | 
| − | #include <string> | + | |
| #include <algorithm>    // copy | #include <algorithm>    // copy | ||
| #include <iterator>     // back_inserter, ostream_iterator | #include <iterator>     // back_inserter, ostream_iterator | ||
| Line 12: | Line 11: | ||
|      static const int submatch_off = -1; |      static const int submatch_off = -1; | ||
| − |      std::string str = "the\t    quick  brown\n\n fox jumped over the lazy dog"; | + |      std::string str = "the\t    quick  brown\n-\n- fox jumped..over,the,lazy,.dog"; | 
|      std::vector<std::string> tokens; |      std::vector<std::string> tokens; | ||
| − |      std::tr1::regex re("[\\s]+"); | + |      std::tr1::regex re("[\\s-,.]+"); | 
|      //start/end points of tokens in str |      //start/end points of tokens in str | ||
| Line 24: | Line 23: | ||
|      std::copy(tokens.begin(), tokens.end(), |      std::copy(tokens.begin(), tokens.end(), | ||
| − | + |               std::ostream_iterator<std::string>(std::cout, "\n")); | |
| } | } | ||
| </source> | </source> | ||
Latest revision as of 11:47, 26 June 2010
#include <string> #include <algorithm> // copy #include <iterator> // back_inserter, ostream_iterator #include <iostream> #include <regex> // regex, sregex_token_iterator #include <vector> int main() { //flag to switch off submatching static const int submatch_off = -1; std::string str = "the\t quick brown\n-\n- fox jumped..over,the,lazy,.dog"; std::vector<std::string> tokens; std::tr1::regex re("[\\s-,.]+"); //start/end points of tokens in str std::tr1::sregex_token_iterator begin(str.begin(), str.end(), re, submatch_off), end; std::copy(begin, end, std::back_inserter(tokens)); std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, "\n")); }
