https://github.com/mozilla/gecko-dev
Raw File
Tip revision: ed635421756a45e0fddd91f20f0cc7fa68aab395 authored by Mihai Tabara on 22 September 2020, 12:14:25 UTC
Bug 1666434 - remove all cron jobs because EOL.DONTBUILD CLOSED TREE r=bhearsum a=release
Tip revision: ed63542
RegExpAST.h
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80: */

// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef V8_REGEXP_AST_H_
#define V8_REGEXP_AST_H_

// Prevent msvc build failures as indicated in bug 1205328
#ifdef min
# undef min
#endif
#ifdef max
# undef max
#endif

#include "irregexp/RegExpEngine.h"

namespace js {
namespace irregexp {

class RegExpCompiler;
class RegExpNode;

class RegExpVisitor
{
  public:
    virtual ~RegExpVisitor() { }
#define MAKE_CASE(Name)                                         \
    virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
#undef MAKE_CASE
};

class RegExpTree
{
  public:
    static const int kInfinity = INT32_MAX;
    virtual ~RegExpTree() {}
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) = 0;
    virtual bool IsTextElement() { return false; }
    virtual bool IsAnchoredAtStart() { return false; }
    virtual bool IsAnchoredAtEnd() { return false; }
    virtual int min_match() = 0;
    virtual int max_match() = 0;
    // Returns the interval of registers used for captures within this
    // expression.
    virtual Interval CaptureRegisters() { return Interval::Empty(); }
    virtual void AppendToText(RegExpText* text) {
        MOZ_CRASH("Bad call");
    }
#define MAKE_ASTYPE(Name)                                               \
    virtual RegExp##Name* As##Name();                                   \
    virtual bool Is##Name();
    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
#undef MAKE_ASTYPE
};

typedef InfallibleVector<RegExpTree*, 1> RegExpTreeVector;

class RegExpDisjunction : public RegExpTree
{
  public:
    explicit RegExpDisjunction(RegExpTreeVector* alternatives);
    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpDisjunction* AsDisjunction() override;
    virtual Interval CaptureRegisters() override;
    virtual bool IsDisjunction() override;
    virtual bool IsAnchoredAtStart() override;
    virtual bool IsAnchoredAtEnd() override;
    virtual int min_match() override { return min_match_; }
    virtual int max_match() override { return max_match_; }

    const RegExpTreeVector& alternatives() { return *alternatives_; }

  private:
    RegExpTreeVector* alternatives_;
    int min_match_;
    int max_match_;
};

class RegExpAlternative : public RegExpTree
{
  public:
    explicit RegExpAlternative(RegExpTreeVector* nodes);
    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpAlternative* AsAlternative() override;
    virtual Interval CaptureRegisters() override;
    virtual bool IsAlternative() override;
    virtual bool IsAnchoredAtStart() override;
    virtual bool IsAnchoredAtEnd() override;
    virtual int min_match() override { return min_match_; }
    virtual int max_match() override { return max_match_; }

    const RegExpTreeVector& nodes() { return *nodes_; }

  private:
    RegExpTreeVector* nodes_;
    int min_match_;
    int max_match_;
};

class RegExpAssertion : public RegExpTree {
 public:
  enum AssertionType {
    START_OF_LINE,
    START_OF_INPUT,
    END_OF_LINE,
    END_OF_INPUT,
    BOUNDARY,
    NON_BOUNDARY,
    NOT_AFTER_LEAD_SURROGATE,
    NOT_IN_SURROGATE_PAIR
  };
  explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
  virtual void* Accept(RegExpVisitor* visitor, void* data);
  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                             RegExpNode* on_success) override;
  virtual RegExpAssertion* AsAssertion() override;
  virtual bool IsAssertion() override;
  virtual bool IsAnchoredAtStart() override;
  virtual bool IsAnchoredAtEnd() override;
  virtual int min_match() override { return 0; }
  virtual int max_match() override { return 0; }
  AssertionType assertion_type() { return assertion_type_; }
 private:
  AssertionType assertion_type_;
};

class CharacterSet
{
  public:
    explicit CharacterSet(char16_t standard_set_type)
      : ranges_(nullptr),
        standard_set_type_(standard_set_type)
    {}
    explicit CharacterSet(CharacterRangeVector* ranges)
      : ranges_(ranges),
        standard_set_type_(0)
    {}

    CharacterRangeVector& ranges(LifoAlloc* alloc);
    char16_t standard_set_type() { return standard_set_type_; }
    void set_standard_set_type(char16_t special_set_type) {
        standard_set_type_ = special_set_type;
    }
    bool is_standard() { return standard_set_type_ != 0; }
    void Canonicalize();

  private:
    CharacterRangeVector* ranges_;

    // If non-zero, the value represents a standard set (e.g., all whitespace
    // characters) without having to expand the ranges.
    char16_t standard_set_type_;
};

class RegExpCharacterClass : public RegExpTree
{
  public:
    RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated)
      : set_(ranges),
        is_negated_(is_negated)
    {}

    explicit RegExpCharacterClass(char16_t type)
      : set_(type),
        is_negated_(false)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpCharacterClass* AsCharacterClass() override;
    virtual bool IsCharacterClass() override;
    virtual bool IsTextElement() override { return true; }
    virtual int min_match() override { return 1; }
    virtual int max_match() override { return 1; }
    virtual void AppendToText(RegExpText* text) override;

    CharacterSet character_set() { return set_; }

    // TODO(lrn): Remove need for complex version if is_standard that
    // recognizes a mangled standard set and just do { return set_.is_special(); }
    bool is_standard(LifoAlloc* alloc);

    // Returns a value representing the standard character set if is_standard()
    // returns true.
    // Currently used values are:
    // s : unicode whitespace
    // S : unicode non-whitespace
    // w : ASCII word character (digit, letter, underscore)
    // W : non-ASCII word character
    // d : ASCII digit
    // D : non-ASCII digit
    // . : non-unicode non-newline
    // * : All characters
    char16_t standard_type() { return set_.standard_set_type(); }

    CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); }
    bool is_negated() { return is_negated_; }

  private:
    CharacterSet set_;
    bool is_negated_;
};

typedef InfallibleVector<char16_t, 10> CharacterVector;

class RegExpAtom : public RegExpTree
{
  public:
    explicit RegExpAtom(CharacterVector* data)
      : data_(data)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpAtom* AsAtom() override;
    virtual bool IsAtom() override;
    virtual bool IsTextElement() override { return true; }
    virtual int min_match() override { return data_->length(); }
    virtual int max_match() override { return data_->length(); }
    virtual void AppendToText(RegExpText* text) override;

    const CharacterVector& data() { return *data_; }
    int length() { return data_->length(); }

  private:
    CharacterVector* data_;
};

class RegExpText : public RegExpTree
{
  public:
    explicit RegExpText(LifoAlloc* alloc)
      : elements_(*alloc), length_(0)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpText* AsText() override;
    virtual bool IsText() override;
    virtual bool IsTextElement() override { return true; }
    virtual int min_match() override { return length_; }
    virtual int max_match() override { return length_; }
    void AppendToText(RegExpText* text) override;

    void AddElement(TextElement elm)  {
        elements_.append(elm);
        length_ += elm.length();
    }
    const TextElementVector& elements() { return elements_; }

  private:
    TextElementVector elements_;
    int length_;
};

class RegExpQuantifier : public RegExpTree
{
  public:
    enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
    RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
      : body_(body),
        min_(min),
        max_(max),
        min_match_(min * body->min_match()),
        quantifier_type_(type)
    {
        if (max > 0 && body->max_match() > kInfinity / max) {
            max_match_ = kInfinity;
        } else {
            max_match_ = max * body->max_match();
        }
    }

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    static RegExpNode* ToNode(int min,
                              int max,
                              bool is_greedy,
                              RegExpTree* body,
                              RegExpCompiler* compiler,
                              RegExpNode* on_success,
                              bool not_at_start = false);
    virtual RegExpQuantifier* AsQuantifier() override;
    virtual Interval CaptureRegisters() override;
    virtual bool IsQuantifier() override;
    virtual int min_match() override { return min_match_; }
    virtual int max_match() override { return max_match_; }
    int min() { return min_; }
    int max() { return max_; }
    bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
    bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
    bool is_greedy() { return quantifier_type_ == GREEDY; }
    RegExpTree* body() { return body_; }

  private:
    RegExpTree* body_;
    int min_;
    int max_;
    int min_match_;
    int max_match_;
    QuantifierType quantifier_type_;
};

class RegExpCapture : public RegExpTree
{
  public:
    explicit RegExpCapture(RegExpTree* body, int index)
      : body_(body), index_(index)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    static RegExpNode* ToNode(RegExpTree* body,
                              int index,
                              RegExpCompiler* compiler,
                              RegExpNode* on_success);
    virtual RegExpCapture* AsCapture() override;
    virtual bool IsAnchoredAtStart() override;
    virtual bool IsAnchoredAtEnd() override;
    virtual Interval CaptureRegisters() override;
    virtual bool IsCapture() override;
    virtual int min_match() override { return body_->min_match(); }
    virtual int max_match() override { return body_->max_match(); }
    RegExpTree* body() { return body_; }
    int index() { return index_; }
    static int StartRegister(int index) { return index * 2; }
    static int EndRegister(int index) { return index * 2 + 1; }

  private:
    RegExpTree* body_;
    int index_;
};

class RegExpLookahead : public RegExpTree
{
  public:
    RegExpLookahead(RegExpTree* body,
                    bool is_positive,
                    int capture_count,
                    int capture_from)
      : body_(body),
        is_positive_(is_positive),
        capture_count_(capture_count),
        capture_from_(capture_from)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpLookahead* AsLookahead() override;
    virtual Interval CaptureRegisters() override;
    virtual bool IsLookahead() override;
    virtual bool IsAnchoredAtStart() override;
    virtual int min_match() override { return 0; }
    virtual int max_match() override { return 0; }
    RegExpTree* body() { return body_; }
    bool is_positive() { return is_positive_; }
    int capture_count() { return capture_count_; }
    int capture_from() { return capture_from_; }

  private:
    RegExpTree* body_;
    bool is_positive_;
    int capture_count_;
    int capture_from_;
};

typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;

class RegExpBackReference : public RegExpTree
{
  public:
    explicit RegExpBackReference(RegExpCapture* capture)
      : capture_(capture)
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpBackReference* AsBackReference() override;
    virtual bool IsBackReference() override;
    virtual int min_match() override { return 0; }
    virtual int max_match() override { return capture_->max_match(); }
    int index() { return capture_->index(); }
    RegExpCapture* capture() { return capture_; }
  private:
    RegExpCapture* capture_;
};

class RegExpEmpty : public RegExpTree
{
  public:
    RegExpEmpty()
    {}

    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success) override;
    virtual RegExpEmpty* AsEmpty() override;
    virtual bool IsEmpty() override;
    virtual int min_match() override { return 0; }
    virtual int max_match() override { return 0; }
    static RegExpEmpty* GetInstance() {
        static RegExpEmpty instance;
        return &instance;
    }
};

} } // namespace js::irregexp

#endif  // V8_REGEXP_AST_H_
back to top