C/C++

Regular Expressions

By Pete Becker, April 11, 2006

Pete Becker finds a new appreciation for regular expressions as he examines TR1's regex implementation.

#include <regex>
#include <iostream>
using std::tr1::regex; using std::tr1::cmatch;
using std::tr1::regex_search;
using std::cout;

int main()
  { // demonstrate regular expression search
  const char *expr = "[ ,.\\t\\n;:]";
  regex rgx(expr);
  cmatch match;
  const char *tgt = "This is a test.";
  if (regex_search(tgt, match, rgx))
    cout << "Match found after `" << match.prefix() << "`\n";
  else
    cout << "Not found.";
  return 0;
  }

Listing One: A basic regex search.

#include <regex>
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <string>
using std::tr1::regex; using std::tr1::cmatch;
using std::tr1::regex_constants::match_continuous;
using std::tr1::sregex_token_iterator;
using std::map;
using std::cout; using std::basic_ostream;
using std::setw; using std::ostream_iterator;
using std::string; using std::copy;

static char text[] =
"The quality of mercy is not strain'd,\n"
"It droppeth as the gentle rain from heaven\n"
"Upon the place beneath: it is twice bless'd;\n"
"It blesseth him that gives and him that takes:\n"
"'Tis mightiest in the mightiest; it becomes\n"
"The throned monarch better than his crown;\n"
"His sceptre shows the force of temporal power,\n"
"The attribute to awe and majesty,\n"
"Wherein doth sit the dread and fear of kings\n";
// William Shakespeare, from "The Merchant of Venice"

    // word separators, as plain text and as regular expression
static char separators[] = " ,.\t\n;:";
static char seps_rgx[] = "[ ,.\\t\\n;:]+";
static char words_rgx[] = "([^ ,.\\t\\n;:]+)([ ,.\\t\\n;:]+|$)";

    // the important types, and the database
typedef map<string, int> counter;
typedef counter::value_type pairs;
static counter word_count;

namespace std { // add inserter to namespace std
template <class Elem, class Alloc>
basic_ostream<Elem, Alloc>& operator<<(
  basic_ostream<Elem, Alloc>& out, const pairs& val)
  {  // insert pair<string, int> into stream
  return out << setw(10) << val.first << ": " << val.second;
  }
}
    // the search functions
//static void use_ad_hoc_parser();
//static void use_strtok();
//static void use_string();
static void use_regex();
//static void use_regex_iter();

static void show_results(const char *title)
  { // show contents of word_count, then clear
  cout << title << " --------------------\n";
  copy(word_count.begin(), word_count.end(),
    ostream_iterator<pairs>(cout, "\n"));
  word_count.clear();
  }
int main()
  { // demonstrate various counting techniques
  use_ad_hoc_parser();
  show_results("ad hoc parser");
  use_strtok();
  show_results("strtok");
  use_string();
  show_results("string");
  use_regex();
  show_results("regular expression");
  use_regex_iter();
  show_results("regular expression iterator");
  return 0;
  }

Listing Two: Counting occurences.

static void use_ad_hoc_parser()
  { // count word frequencies with ad hoc parser
  const char *txt = text;
  // skip leading whitespace:
  while (*txt && strchr(separators, *txt))
    ++txt;
  bool inword = false;
  string word;
  while (*txt)
    { // classify current character
    bool at_sep = strchr(separators, *txt);
    const char *start;
    if (!inword && !at_sep)
      { // at start of word
      inword = true;
      start = txt;
      }
    else if (inword && at_sep)
      { // at end of word
      inword = false;
      word.assign(start, txt);
      ++word_count[word];
      }
    ++txt;
    }
  }

Listing Three: Search using an ad hoc parser.

static void use_strtok()
  { // count word frequencies with strtok
  string word;
  char cpy[sizeof(text)/sizeof(*text)];
  strcpy(cpy, text);
  const char *start = strtok(cpy, separators);
  while (start)
    { // at start of word
    word.assign(start);
    ++word_count[word];
    start = strtok(0, separators);
    }
  }

Listing Four: Search using strtok.

static void use_string()
  { // count word frequencies with string member functions
  string cpy(text);
  string::size_type pos = cpy.find_first_not_of(separators);
  while (pos != string::npos)
    { // at start of word
    string::size_type end = cpy.find_first_of(separators, pos);
    ++word_count[cpy.substr(pos, end == string::npos ? end : (end - pos))];
    pos = cpy.find_first_not_of(separators, end);
    }
  }

Listing Five: Search using basic_string member functions.

static void use_regex()
  { // count word frequencies with regular expression
  const char *begin = text;
  const char *end = text + strlen(text);
  cmatch match;
  // skip leading white space:
  regex rgx(seps_rgx);
  if (regex_search(begin, end, match, rgx, match_continuous))
    begin = match[0].second;
  // start search
  rgx = words_rgx;
  while (regex_search(begin, end, match, rgx, match_continuous))
    { // found a word
    ++word_count[match[1].str()];
    begin = match[0].second;
    }
  }

Listing Six: The first search using regular expressions.

static void use_regex_iter()
  { // count word frequencies with regular expression iterator
  regex word_sep(seps_rgx);
  sregex_token_iterator words(
    text, text + strlen(text), word_sep, -1);
  sregex_token_iterator end;
  while (words != end)
    ++word_count[*words++];
  }

Listing Seven: The second search using regular expressions.

Previous 1 2

More Insights

INFO-LINK


	To upload an avatar photo, first complete your Disqus profile. \| View the list of supported HTML tags you can use to style comments. \| Please read our commenting policy.

C/C++

Regular Expressions

Related Reading

More Insights

Currently we allow the following HTML tags in comments:

Single tags

Matching tags

C/C++ Recent Articles

Most Popular

This month's Dr. Dobb's Journal

Upcoming Events

Featured Reports

Featured Whitepapers

Most Recent Premium Content

C/C++

Regular Expressions

Related Reading

News

Commentary

Slideshow

Video

Most Popular

More Insights

White Papers

Reports

Webcasts

Currently we allow the following HTML tags in comments:

Single tags

Matching tags

C/C++ Recent Articles

Most Popular

This month's Dr. Dobb's Journal

Upcoming Events

Featured Reports

Featured Whitepapers

Most Recent Premium Content