Regular Expressions in C++ by John Maddock Listing One bool validate_card_format(const std::string s) { static const boost::regex e("\\d{15,16}"); return regex_match(s, e); } Listing Two bool validate_card_format(const std::wstring s) { static const boost::wregex e(L"\\d{15,16}"); return regex_match(s, e); } Listing Three // match any format with the regular expression: const boost::regex e("\\A" // asserts start of string "(\\d{3,4})[- ]?" // first group of digits "(\\d{4})[- ]?" // second group of digits "(\\d{4})[- ]?" // third group of digits "(\\d{4})" // forth group of digits "\\z"); // asserts end of string // format strings using sed syntax: const std::string machine_format("\\1\\2\\3\\4"); const std::string human_format("\\1-\\2-\\3-\\4"); std::string machine_readable_card_number(const std::string& s) { std::string result = regex_merge(s, e, machine_format, boost::match_default | boost::format_sed | boost::format_no_copy); if(result.size() == 0) throw std::runtime_error ("String is not a credit card number"); return result; } std::string human_readable_card_number(const std::string& s) { std::string result = regex_merge(s, e, human_format, boost::match_default | boost::format_sed | boost::format_no_copy); if(result.size() == 0) throw std::runtime_error ("String is not a credit card number"); return result; } Listing Four #include #include #include #include #include const char* expression = "<\\s*datamerge" // tag prefix "(?:" // non-marking grouping "\\s+table\\s*=\\s*\"([^\"]*)\"" // $1 = table name "|\\s+item\\s*=\\s*\"([^\"]*)\"" // $2 = item name "|\\s+field\\s*=\\s*\"([^\"]*)\"" // $3 = field name "){1,3}" // grouping repeated 1, 2 or 3 times "\\s*>"; // tag suffix const boost::regex e(expression); std::string::const_iterator endp; std::string lookup_datamerge_string(const std::string& table, const std::string& item, const std::string& field) { // this should carry out a database lookup, // for now just concatonate the names together: std::string result = table + "#" + item + "#" + field; return result; } bool grep_callback(const boost::match_results& in) { // get table name with default if necessary: std::string table = in[1]; if(table.size() == 0) table = "default_table_name"; // get item name (required no defaults): std::string item = in[2]; if(item.size() == 0) throw std::runtime_error("Incomplete datamerge field found"); // get field name with default if necessary: std::string field = in[3]; if(field.size() == 0) field = "default_field_name"; // now carry out output, start by // sending everything from the end of the last match // to the start of this match to output: std::cout << std::string(in[-1]); // output $` std::cout << lookup_datamerge_string(table, item, field); // now save end of what matched for later: endp = in[0].second; return true; // continue grepping } void load_file(std::string& s, std::istream& is) { s.erase(); s.reserve(is.rdbuf()->in_avail()); char c; while(is.get(c)) { if(s.capacity() == s.size()) s.reserve(s.capacity() * 3); s.append(1, c); } } int main(int argc, char * argv[]) { try{ std::filebuf ifs; std::filebuf ofs; std::streambuf* old_in = 0; std::streambuf* old_out = 0; if(argc > 1) { // redirect cin: ifs.open(argv[1], std::ios_base::in); old_in = std::cin.rdbuf(&ifs); } if(argc > 2) { // redirect cout: ofs.open(argv[2], std::ios_base::out); old_out = std::cout.rdbuf(&ofs); } std::string s; load_file(s, std::cin); endp = s.begin(); // perform search and replace with lookup: boost::regex_grep(&grep_callback, s, e); // copy tail of file to output: std::string::const_iterator end = s.end(); std::copy(endp, end, std::ostream_iterator(std::cout)); // reset streams: if(old_in) std::cin.rdbuf(old_in); if(old_out) std::cout.rdbuf(old_out); } catch(const std::exception& e) { std::cerr << "Exception thrown during merge: \"" << e.what() << "\"" << std::endl; } return 0; } 3