Raw File
RegularExpression.hxx.in
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
   file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
// Original Copyright notice:
// Copyright (C) 1991 Texas Instruments Incorporated.
//
// Permission is granted to any individual or institution to use, copy, modify,
// and distribute this software, provided that this complete copyright and
// permission notice is maintained, intact, in all copies and supporting
// documentation.
//
// Texas Instruments Incorporated provides this software "as is" without
// express or implied warranty.
//
// Created: MNF 06/13/89  Initial Design and Implementation
// Updated: LGO 08/09/89  Inherit from Generic
// Updated: MBN 09/07/89  Added conditional exception handling
// Updated: MBN 12/15/89  Sprinkled "const" qualifiers all over the place!
// Updated: DLS 03/22/91  New lite version
//

#ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
#define @KWSYS_NAMESPACE@_RegularExpression_hxx

#include <@KWSYS_NAMESPACE@/Configure.h>
#include <@KWSYS_NAMESPACE@/Configure.hxx>

#include <string>

/* Disable useless Borland warnings.  KWSys tries not to force things
   on its includers, but there is no choice here.  */
#if defined(__BORLANDC__)
#pragma warn - 8027 /* function not inlined.  */
#endif

namespace @KWSYS_NAMESPACE@ {

// Forward declaration
class RegularExpression;

/** \class RegularExpressionMatch
 * \brief Stores the pattern matches of a RegularExpression
 */
class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
{
public:
  RegularExpressionMatch();

  bool isValid() const;
  void clear();

  std::string::size_type start() const;
  std::string::size_type end() const;
  std::string::size_type start(int n) const;
  std::string::size_type end(int n) const;
  std::string match(int n) const;

  enum
  {
    NSUBEXP = 10
  };

private:
  friend class RegularExpression;
  const char* startp[NSUBEXP];
  const char* endp[NSUBEXP];
  const char* searchstring;
};

/**
 * \brief Creates an invalid match object
 */
inline RegularExpressionMatch::RegularExpressionMatch()
{
  startp[0] = 0;
  endp[0] = 0;
  searchstring = 0;
}

/**
 * \brief Returns true if the match pointers are valid
 */
inline bool RegularExpressionMatch::isValid() const
{
  return (this->startp[0] != 0);
}

/**
 * \brief Resets to the (invalid) construction state.
 */
inline void RegularExpressionMatch::clear()
{
  startp[0] = 0;
  endp[0] = 0;
  searchstring = 0;
}

/**
 * \brief Returns the start index of the full match.
 */
inline std::string::size_type RegularExpressionMatch::start() const
{
  return static_cast<std::string::size_type>(this->startp[0] - searchstring);
}

/**
 * \brief Returns the end index of the full match.
 */
inline std::string::size_type RegularExpressionMatch::end() const
{
  return static_cast<std::string::size_type>(this->endp[0] - searchstring);
}

/**
 * \brief Returns the start index of nth submatch.
 *        start(0) is the start of the full match.
 */
inline std::string::size_type RegularExpressionMatch::start(int n) const
{
  return static_cast<std::string::size_type>(this->startp[n] -
                                             this->searchstring);
}

/**
 * \brief Returns the end index of nth submatch.
 *        end(0) is the end of the full match.
 */
inline std::string::size_type RegularExpressionMatch::end(int n) const
{
  return static_cast<std::string::size_type>(this->endp[n] -
                                             this->searchstring);
}

/**
 * \brief Returns the nth submatch as a string.
 */
inline std::string RegularExpressionMatch::match(int n) const
{
  if (this->startp[n] == 0) {
    return std::string();
  } else {
    return std::string(this->startp[n], static_cast<std::string::size_type>(
                                          this->endp[n] - this->startp[n]));
  }
}

/** \class RegularExpression
 * \brief Implements pattern matching with regular expressions.
 *
 * This is the header file for the regular expression class.  An object of
 * this class contains a regular expression, in a special "compiled" format.
 * This compiled format consists of several slots all kept as the objects
 * private data.  The RegularExpression class provides a convenient way to
 * represent regular expressions.  It makes it easy to search for the same
 * regular expression in many different strings without having to compile a
 * string to regular expression format more than necessary.
 *
 * This class implements pattern matching via regular expressions.
 * A regular expression allows a programmer to specify  complex
 * patterns  that  can  be searched for and matched against the
 * character string of a string object. In its simplest form, a
 * regular  expression  is  a  sequence  of  characters used to
 * search for exact character matches. However, many times  the
 * exact  sequence to be found is not known, or only a match at
 * the beginning or end of a string is desired. The RegularExpression regu-
 * lar  expression  class implements regular expression pattern
 * matching as is found and implemented in many  UNIX  commands
 * and utilities.
 *
 * Example: The perl code
 *
 *    $filename =~ m"([a-z]+)\.cc";
 *    print $1;
 *
 * Is written as follows in C++
 *
 *    RegularExpression re("([a-z]+)\\.cc");
 *    re.find(filename);
 *    cerr << re.match(1);
 *
 *
 * The regular expression class provides a convenient mechanism
 * for  specifying  and  manipulating  regular expressions. The
 * regular expression object allows specification of such  pat-
 * terns  by using the following regular expression metacharac-
 * ters:
 *
 *  ^        Matches at beginning of a line
 *
 *  $        Matches at end of a line
 *
 * .         Matches any single character
 *
 * [ ]       Matches any character(s) inside the brackets
 *
 * [^ ]      Matches any character(s) not inside the brackets
 *
 *  -        Matches any character in range on either side of a dash
 *
 *  *        Matches preceding pattern zero or more times
 *
 *  +        Matches preceding pattern one or more times
 *
 *  ?        Matches preceding pattern zero or once only
 *
 * ()        Saves a matched expression and uses it in a later match
 *
 * Note that more than one of these metacharacters can be  used
 * in  a  single  regular expression in order to create complex
 * search patterns. For example, the pattern [^ab1-9]  says  to
 * match  any  character  sequence that does not begin with the
 * characters "ab"  followed  by  numbers  in  the  series  one
 * through nine.
 *
 * There are three constructors for RegularExpression.  One just creates an
 * empty RegularExpression object.  Another creates a RegularExpression
 * object and initializes it with a regular expression that is given in the
 * form of a char*.  The third takes a reference to a RegularExpression
 * object as an argument and creates an object initialized with the
 * information from the given RegularExpression object.
 *
 * The  find  member function  finds   the  first  occurrence   of  the regular
 * expression of that object in the string given to find as an argument.  Find
 * returns a boolean, and  if true,  mutates  the private  data appropriately.
 * Find sets pointers to the beginning and end of  the thing last  found, they
 * are pointers into the actual string  that was searched.   The start and end
 * member functions return indices  into the searched string that  correspond
 * to the beginning   and  end pointers  respectively.   The    compile member
 * function takes a char* and puts the  compiled version of the char* argument
 * into the object's private data fields.  The == and  != operators only check
 * the  to see  if   the compiled  regular  expression   is the same, and  the
 * deep_equal functions also checks  to see if the  start and end pointers are
 * the same.  The is_valid  function returns false if  program is set to NULL,
 * (i.e. there is no valid compiled exression).  The set_invalid function sets
 * the  program to NULL  (Warning: this deletes the compiled  expression). The
 * following examples may help clarify regular expression usage:
 *
 *   *  The regular expression  "^hello" matches  a "hello"  only at  the
 *      beginning of a  line.  It would match "hello  there" but not "hi,
 *      hello there".
 *
 *   *  The regular expression "long$" matches a  "long"  only at the end
 *      of a line. It would match "so long\0", but not "long ago".
 *
 *   *  The regular expression "t..t..g"  will match anything that  has a
 *      "t" then any two characters, another "t", any  two characters and
 *      then a "g".   It will match  "testing", or "test again" but would
 *      not match "toasting"
 *
 *   *  The regular  expression "[1-9ab]" matches any  number one through
 *      nine, and the characters  "a" and  "b".  It would match "hello 1"
 *      or "begin", but would not match "no-match".
 *
 *   *  The  regular expression "[^1-9ab]"  matches any character that is
 *      not a number one  through nine, or  an "a" or "b".   It would NOT
 *      match "hello 1" or "begin", but would match "no-match".
 *
 *   *  The regular expression "br* " matches  something that begins with
 *      a "b", is followed by zero or more "r"s, and ends in a space.  It
 *      would match "brrrrr ", and "b ", but would not match "brrh ".
 *
 *   *  The regular expression "br+ " matches something  that begins with
 *      a "b", is followed by one or more "r"s, and ends in  a space.  It
 *      would match "brrrrr ",  and  "br ", but would not  match "b  " or
 *      "brrh ".
 *
 *   *  The regular expression "br? " matches  something that begins with
 *      a "b", is followed by zero or one "r"s, and ends in  a space.  It
 *      would  match  "br ", and "b  ", but would not match  "brrrr "  or
 *      "brrh ".
 *
 *   *  The regular expression "(..p)b" matches  something ending with pb
 *      and beginning with whatever the two characters before the first p
 *      encounterd in the line were.  It would find  "repb" in "rep drepa
 *      qrepb".  The regular expression "(..p)a"  would find "repa qrepb"
 *      in "rep drepa qrepb"
 *
 *   *  The regular expression "d(..p)" matches something ending  with p,
 *      beginning with d, and having  two characters  in between that are
 *      the same as the two characters before  the first p  encounterd in
 *      the line.  It would match "drepa qrepb" in "rep drepa qrepb".
 *
 * All methods of RegularExpression can be called simultaneously from
 * different threads but only if each invocation uses an own instance of
 * RegularExpression.
 */
class @KWSYS_NAMESPACE@_EXPORT RegularExpression
{
public:
  /**
   * Instantiate RegularExpression with program=NULL.
   */
  inline RegularExpression();

  /**
   * Instantiate RegularExpression with compiled char*.
   */
  inline RegularExpression(char const*);

  /**
   * Instantiate RegularExpression as a copy of another regular expression.
   */
  RegularExpression(RegularExpression const&);

  /**
   * Instantiate RegularExpression with compiled string.
   */
  inline RegularExpression(std::string const&);

  /**
   * Destructor.
   */
  inline ~RegularExpression();

  /**
   * Compile a regular expression into internal code
   * for later pattern matching.
   */
  bool compile(char const*);

  /**
   * Compile a regular expression into internal code
   * for later pattern matching.
   */
  inline bool compile(std::string const&);

  /**
   * Matches the regular expression to the given string.
   * Returns true if found, and sets start and end indexes
   * in the RegularExpressionMatch instance accordingly.
   *
   * This method is thread safe when called with different
   * RegularExpressionMatch instances.
   */
  bool find(char const*, RegularExpressionMatch&) const;

  /**
   * Matches the regular expression to the given string.
   * Returns true if found, and sets start and end indexes accordingly.
   */
  inline bool find(char const*);

  /**
   * Matches the regular expression to the given std string.
   * Returns true if found, and sets start and end indexes accordingly.
   */
  inline bool find(std::string const&);

  /**
   * Match indices
   */
  inline RegularExpressionMatch const& regMatch() const;
  inline std::string::size_type start() const;
  inline std::string::size_type end() const;
  inline std::string::size_type start(int n) const;
  inline std::string::size_type end(int n) const;

  /**
   * Match strings
   */
  inline std::string match(int n) const;

  /**
   * Copy the given regular expression.
   */
  RegularExpression& operator=(const RegularExpression& rxp);

  /**
   * Returns true if two regular expressions have the same
   * compiled program for pattern matching.
   */
  bool operator==(RegularExpression const&) const;

  /**
   * Returns true if two regular expressions have different
   * compiled program for pattern matching.
   */
  inline bool operator!=(RegularExpression const&) const;

  /**
   * Returns true if have the same compiled regular expressions
   * and the same start and end pointers.
   */
  bool deep_equal(RegularExpression const&) const;

  /**
   * True if the compiled regexp is valid.
   */
  inline bool is_valid() const;

  /**
   * Marks the regular expression as invalid.
   */
  inline void set_invalid();

private:
  RegularExpressionMatch regmatch;
  char regstart;                  // Internal use only
  char reganch;                   // Internal use only
  const char* regmust;            // Internal use only
  std::string::size_type regmlen; // Internal use only
  char* program;
  int progsize;
};

/**
 * Create an empty regular expression.
 */
inline RegularExpression::RegularExpression()
{
  this->program = 0;
}

/**
 * Creates a regular expression from string s, and
 * compiles s.
 */
inline RegularExpression::RegularExpression(const char* s)
{
  this->program = 0;
  if (s) {
    this->compile(s);
  }
}

/**
 * Creates a regular expression from string s, and
 * compiles s.
 */
inline RegularExpression::RegularExpression(const std::string& s)
{
  this->program = 0;
  this->compile(s);
}

/**
 * Destroys and frees space allocated for the regular expression.
 */
inline RegularExpression::~RegularExpression()
{
  //#ifndef _WIN32
  delete[] this->program;
  //#endif
}

/**
 * Compile a regular expression into internal code
 * for later pattern matching.
 */
inline bool RegularExpression::compile(std::string const& s)
{
  return this->compile(s.c_str());
}

/**
 * Matches the regular expression to the given std string.
 * Returns true if found, and sets start and end indexes accordingly.
 */
inline bool RegularExpression::find(const char* s)
{
  return this->find(s, this->regmatch);
}

/**
 * Matches the regular expression to the given std string.
 * Returns true if found, and sets start and end indexes accordingly.
 */
inline bool RegularExpression::find(std::string const& s)
{
  return this->find(s.c_str());
}

/**
 * Returns the internal match object
 */
inline RegularExpressionMatch const& RegularExpression::regMatch() const
{
  return this->regmatch;
}

/**
 * Returns the start index of the full match.
 */
inline std::string::size_type RegularExpression::start() const
{
  return regmatch.start();
}

/**
 * Returns the end index of the full match.
 */
inline std::string::size_type RegularExpression::end() const
{
  return regmatch.end();
}

/**
 * Return start index of nth submatch. start(0) is the start of the full match.
 */
inline std::string::size_type RegularExpression::start(int n) const
{
  return regmatch.start(n);
}

/**
 * Return end index of nth submatch. end(0) is the end of the full match.
 */
inline std::string::size_type RegularExpression::end(int n) const
{
  return regmatch.end(n);
}

/**
 * Return nth submatch as a string.
 */
inline std::string RegularExpression::match(int n) const
{
  return regmatch.match(n);
}

/**
 * Returns true if two regular expressions have different
 * compiled program for pattern matching.
 */
inline bool RegularExpression::operator!=(const RegularExpression& r) const
{
  return (!(*this == r));
}

/**
 * Returns true if a valid regular expression is compiled
 * and ready for pattern matching.
 */
inline bool RegularExpression::is_valid() const
{
  return (this->program != 0);
}

inline void RegularExpression::set_invalid()
{
  //#ifndef _WIN32
  delete[] this->program;
  //#endif
  this->program = 0;
}

} // namespace @KWSYS_NAMESPACE@

#endif
back to top