#include <regex> #include <iostream> using std::tr1::regex; using std::tr1::cmatch; using std::tr1::regex_search; using std::cout; int main() { // demonstrate regular expression search const char *expr = "[ ,.\\t\\n;:]"; regex rgx(expr); cmatch match; const char *tgt = "This is a test."; if (regex_search(tgt, match, rgx)) cout << "Match found after `" << match.prefix() << "`\n"; else cout << "Not found."; return 0; }
#include <regex> #include <algorithm> #include <iomanip> #include <iostream> #include <iterator> #include <map> #include <string> using std::tr1::regex; using std::tr1::cmatch; using std::tr1::regex_constants::match_continuous; using std::tr1::sregex_token_iterator; using std::map; using std::cout; using std::basic_ostream; using std::setw; using std::ostream_iterator; using std::string; using std::copy; static char text[] = "The quality of mercy is not strain'd,\n" "It droppeth as the gentle rain from heaven\n" "Upon the place beneath: it is twice bless'd;\n" "It blesseth him that gives and him that takes:\n" "'Tis mightiest in the mightiest; it becomes\n" "The throned monarch better than his crown;\n" "His sceptre shows the force of temporal power,\n" "The attribute to awe and majesty,\n" "Wherein doth sit the dread and fear of kings\n"; // William Shakespeare, from "The Merchant of Venice" // word separators, as plain text and as regular expression static char separators[] = " ,.\t\n;:"; static char seps_rgx[] = "[ ,.\\t\\n;:]+"; static char words_rgx[] = "([^ ,.\\t\\n;:]+)([ ,.\\t\\n;:]+|$)"; // the important types, and the database typedef map<string, int> counter; typedef counter::value_type pairs; static counter word_count; namespace std { // add inserter to namespace std template <class Elem, class Alloc> basic_ostream<Elem, Alloc>& operator<<( basic_ostream<Elem, Alloc>& out, const pairs& val) { // insert pair<string, int> into stream return out << setw(10) << val.first << ": " << val.second; } } // the search functions //static void use_ad_hoc_parser(); //static void use_strtok(); //static void use_string(); static void use_regex(); //static void use_regex_iter(); static void show_results(const char *title) { // show contents of word_count, then clear cout << title << " --------------------\n"; copy(word_count.begin(), word_count.end(), ostream_iterator<pairs>(cout, "\n")); word_count.clear(); } int main() { // demonstrate various counting techniques use_ad_hoc_parser(); show_results("ad hoc parser"); use_strtok(); show_results("strtok"); use_string(); show_results("string"); use_regex(); show_results("regular expression"); use_regex_iter(); show_results("regular expression iterator"); return 0; }
static void use_ad_hoc_parser() { // count word frequencies with ad hoc parser const char *txt = text; // skip leading whitespace: while (*txt && strchr(separators, *txt)) ++txt; bool inword = false; string word; while (*txt) { // classify current character bool at_sep = strchr(separators, *txt); const char *start; if (!inword && !at_sep) { // at start of word inword = true; start = txt; } else if (inword && at_sep) { // at end of word inword = false; word.assign(start, txt); ++word_count[word]; } ++txt; } }
static void use_strtok() { // count word frequencies with strtok string word; char cpy[sizeof(text)/sizeof(*text)]; strcpy(cpy, text); const char *start = strtok(cpy, separators); while (start) { // at start of word word.assign(start); ++word_count[word]; start = strtok(0, separators); } }
static void use_string() { // count word frequencies with string member functions string cpy(text); string::size_type pos = cpy.find_first_not_of(separators); while (pos != string::npos) { // at start of word string::size_type end = cpy.find_first_of(separators, pos); ++word_count[cpy.substr(pos, end == string::npos ? end : (end - pos))]; pos = cpy.find_first_not_of(separators, end); } }
static void use_regex() { // count word frequencies with regular expression const char *begin = text; const char *end = text + strlen(text); cmatch match; // skip leading white space: regex rgx(seps_rgx); if (regex_search(begin, end, match, rgx, match_continuous)) begin = match[0].second; // start search rgx = words_rgx; while (regex_search(begin, end, match, rgx, match_continuous)) { // found a word ++word_count[match[1].str()]; begin = match[0].second; } }
static void use_regex_iter() { // count word frequencies with regular expression iterator regex word_sep(seps_rgx); sregex_token_iterator words( text, text + strlen(text), word_sep, -1); sregex_token_iterator end; while (words != end) ++word_count[*words++]; }