Home | History | Annotate | Download | only in libregex
      1 /**
      2  * @file op_regex.h
      3  * This file contains various definitions and interface for a
      4  * lightweight wrapper around libc regex, providing match
      5  * and replace facility.
      6  *
      7  * @remark Copyright 2003 OProfile authors
      8  * @remark Read the file COPYING
      9  * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
     10  *
     11  * @author Philippe Elie
     12  */
     13 
     14 #ifndef OP_REGEX_H
     15 #define OP_REGEX_H
     16 
     17 // required by posix before including regex.h
     18 #include <sys/types.h>
     19 #include <regex.h>
     20 
     21 #include <string>
     22 #include <vector>
     23 #include <map>
     24 
     25 #include "op_exception.h"
     26 
     27 /**
     28  * ill formed regular expression or expression throw such exception
     29  */
     30 struct bad_regex : op_exception {
     31 	bad_regex(std::string const & pattern);
     32 };
     33 
     34 /**
     35  * lightweight encapsulation of regex lib search and replace
     36  *
     37  * See stl.pat for further details and examples of used syntax.
     38  */
     39 class regular_expression_replace {
     40 public:
     41 	/**
     42 	 * @param limit limit on number of search and replace done
     43 	 * @param limit_defs_expansion limit on number of expansion done
     44 	 *  during replacement of regular definition name by their expansion
     45 	 *
     46 	 * build an object holding regular defintion and regular expression
     47 	 * & replace, preparing it for substitution ala sed
     48 	 */
     49 	regular_expression_replace(size_t limit = 100,
     50 				   size_t limit_defs_expansion = 100);
     51 	~regular_expression_replace();
     52 
     53 	/**
     54 	 * @param name a regular definition name
     55 	 * @param replace the string to subsitute in other regular definition
     56 	 * or regular exepression when this regular defintion name is
     57 	 * encoutered.
     58 	 */
     59 	void add_definition(std::string const & name,
     60 			    std::string const & replace);
     61 	/**
     62 	 * @param pattern a regular expression pattern, POSIX extended notation
     63 	 * @param replace the replace string to use when this regular
     64 	 *  expression is matched
     65 	 *
     66 	 * You can imbed regular definition in pattern but not in replace.
     67 	 */
     68 	void add_pattern(std::string const & pattern,
     69 			 std::string const & replace);
     70 
     71 	/**
     72 	 * @param str the input/output string where we search pattern and
     73 	 * replace them.
     74 	 *
     75 	 * Execute loop at max limit time on the set of regular expression
     76 	 *
     77 	 * Return true if too many match occur and replacing has been stopped
     78 	 * due to reach limit_defs_expansion. You can test if some pattern has
     79 	 * been matched by saving the input string and comparing it to the new
     80 	 * value. There is no way to detect s/a/a because the output string
     81 	 * will be identical to the input string.
     82 	 */
     83 	bool execute(std::string & str) const;
     84 private:
     85 	struct replace_t {
     86 		// when this regexp is matched
     87 		regex_t regexp;
     88 		// replace the matched part with this string
     89 		std::string replace;
     90 	};
     91 
     92 	// helper to execute
     93 	bool do_execute(std::string & str, replace_t const & regexp) const;
     94 	void do_replace(std::string & str, std::string const & replace,
     95 			regmatch_t const * match) const;
     96 
     97 	// helper to add_definition() and add_pattern()
     98 	std::string expand_string(std::string const & input);
     99 
    100 	// helper to add_pattern
    101 	std::string substitute_definition(std::string const & pattern);
    102 
    103 	// return the match of throw if idx is invalid
    104 	regmatch_t const & get_match(regmatch_t const * match, char idx) const;
    105 
    106 	// don't increase too, it have direct impact on performance. This limit
    107 	// the number of grouping expression allowed in a regular expression
    108 	// Note than you can use grouping match operator > 9 only in the
    109 	// replace rule not in match regular expression since POSIX don't allow
    110 	// more than \9 in matching sequence.
    111 	static const size_t max_match = 16;
    112 
    113 	size_t limit;
    114 	size_t limit_defs_expansion;
    115 	std::vector<replace_t> regex_replace;
    116 	/// dictionary of regular definition
    117 	typedef std::map<std::string, std::string> defs_dict;
    118 	defs_dict defs;
    119 };
    120 
    121 /**
    122  * @param regex the regular_expression_replace to fill
    123  * @param filename the filename from where the deifnition and pattern are read
    124  *
    125  * add to regex pattern and regular definition read from the given file
    126  */
    127 void setup_regex(regular_expression_replace& regex,
    128 		 std::string const & filename);
    129 
    130 #endif /* !OP_REGEX_H */
    131