https://github.com/google/cayley
Tip revision: 6fb42084bad95ecdb98a4bf9eb354c82558c7506 authored by 3pCode on 07 January 2019, 16:42:23 UTC
Merge pull request #1 from cayleygraph/master
Merge pull request #1 from cayleygraph/master
Tip revision: 6fb4208
regex.go
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
import (
"context"
"fmt"
"regexp"
"github.com/cayleygraph/cayley/graph"
"github.com/cayleygraph/cayley/quad"
)
var _ graph.Iterator = &Regex{}
// Regex is a unary operator -- a filter across the values in the relevant
// subiterator. It works similarly to gremlin's filter{it.matches('exp')},
// reducing the iterator set to values whose string representation passes a
// regular expression test.
type Regex struct {
uid uint64
tags graph.Tagger
subIt graph.Iterator
re *regexp.Regexp
qs graph.QuadStore
result graph.Value
err error
allowRefs bool
}
func NewRegex(sub graph.Iterator, re *regexp.Regexp, qs graph.QuadStore) *Regex {
return &Regex{
uid: NextUID(),
subIt: sub,
re: re,
qs: qs,
}
}
// AllowRefs allows regexp iterator to match IRIs and BNodes.
//
// Consider using it carefully. In most cases it's better to reconsider
// your graph structure instead of relying on slow unoptimizable regexp.
//
// An example of incorrect usage is to match IRIs:
// <http://example.org/page>
// <http://example.org/page/foo>
// Via regexp like:
// http://example.org/page.*
//
// The right way is to explicitly link graph nodes and query them by this relation:
// <http://example.org/page/foo> <type> <http://example.org/page>
func (it *Regex) AllowRefs(v bool) {
it.allowRefs = v
}
func (it *Regex) testRegex(val graph.Value) bool {
// Type switch to avoid coercing and testing numeric types
v := it.qs.NameOf(val)
switch v := v.(type) {
case quad.String:
return it.re.MatchString(string(v))
case quad.TypedString:
return it.re.MatchString(string(v.Value))
default:
if it.allowRefs {
switch v := v.(type) {
case quad.BNode:
return it.re.MatchString(string(v))
case quad.IRI:
return it.re.MatchString(string(v))
}
}
}
return false
}
func (it *Regex) UID() uint64 {
return it.uid
}
func (it *Regex) Close() error {
return it.subIt.Close()
}
func (it *Regex) Reset() {
it.subIt.Reset()
it.err = nil
it.result = nil
}
func (it *Regex) Tagger() *graph.Tagger {
return &it.tags
}
func (it *Regex) Clone() graph.Iterator {
out := NewRegex(it.subIt.Clone(), it.re, it.qs)
out.tags.CopyFrom(it)
return out
}
func (it *Regex) Next(ctx context.Context) bool {
for it.subIt.Next(ctx) {
val := it.subIt.Result()
if it.testRegex(val) {
it.result = val
return true
}
}
it.err = it.subIt.Err()
return false
}
func (it *Regex) Err() error {
return it.err
}
func (it *Regex) Result() graph.Value {
return it.result
}
func (it *Regex) NextPath(ctx context.Context) bool {
for {
hasNext := it.subIt.NextPath(ctx)
if !hasNext {
it.err = it.subIt.Err()
return false
}
if it.testRegex(it.subIt.Result()) {
break
}
}
it.result = it.subIt.Result()
return true
}
func (it *Regex) SubIterators() []graph.Iterator {
return []graph.Iterator{it.subIt}
}
func (it *Regex) Contains(ctx context.Context, val graph.Value) bool {
if !it.testRegex(val) {
return false
}
ok := it.subIt.Contains(ctx, val)
if !ok {
it.err = it.subIt.Err()
}
return ok
}
// Registers the Regex iterator.
func (it *Regex) Type() graph.Type {
return graph.Regex
}
func (it *Regex) String() string {
return fmt.Sprintf("Regexp(%v)", it.re.String())
}
// There's nothing to optimize, locally, for a Regex iterator.
// Replace the underlying iterator if need be.
func (it *Regex) Optimize() (graph.Iterator, bool) {
newSub, changed := it.subIt.Optimize()
if changed {
it.subIt.Close()
it.subIt = newSub
}
return it, false
}
// We're only as expensive as our subiterator.
func (it *Regex) Stats() graph.IteratorStats {
return it.subIt.Stats()
}
// If we failed the check, then the subiterator should not contribute to the result
// set. Otherwise, go ahead and tag it.
func (it *Regex) TagResults(dst map[string]graph.Value) {
it.tags.TagResult(dst, it.Result())
it.subIt.TagResults(dst)
}
func (it *Regex) Size() (int64, bool) {
sz, _ := it.subIt.Size()
return sz / 2, false
}