Revision 2bbb310544293fd977c454eaf0ce48c995bcc095 authored by Denys Smirnov on 08 February 2019, 22:09:34 UTC, committed by Denys Smirnov on 08 February 2019, 22:09:34 UTC
1 parent 807b7b8
Raw File
optimizer.go
// Copyright 2017 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sql

import (
	"fmt"
	"sort"
	"strings"

	"github.com/cayleygraph/cayley/graph/iterator"
	"github.com/cayleygraph/cayley/graph/shape"
	"github.com/cayleygraph/cayley/quad"
)

func NewOptimizer() *Optimizer {
	return &Optimizer{}
}

type Optimizer struct {
	tableInd int

	regexpOp             CmpOp
	noOffsetWithoutLimit bool // blame mysql
}

func (opt *Optimizer) SetRegexpOp(op CmpOp) {
	opt.regexpOp = op
}

func (opt *Optimizer) NoOffsetWithoutLimit() {
	opt.noOffsetWithoutLimit = true
}

func (opt *Optimizer) nextTable() string {
	opt.tableInd++
	return fmt.Sprintf("t_%d", opt.tableInd)
}

func (opt *Optimizer) ensureAliases(s *Select) {
	for i, src := range s.From {
		if t, ok := src.(Table); ok && t.Alias == "" {
			t.Alias = opt.nextTable()
			s.From[i] = t
			// TODO: copy slice
			for j := range s.Fields {
				f := &s.Fields[j]
				if f.Table == "" {
					f.Table = t.Alias
				}
			}
			for j := range s.Where {
				w := &s.Where[j]
				if w.Table == "" {
					w.Table = t.Alias
				}
			}
		}
	}
}

func sortDirs(dirs []quad.Direction) {
	sort.Slice(dirs, func(i, j int) bool {
		return dirs[i] < dirs[j]
	})
}

func (opt *Optimizer) OptimizeShape(s shape.Shape) (shape.Shape, bool) {
	switch s := s.(type) {
	case shape.AllNodes:
		return AllNodes(), true
	case shape.Lookup:
		return opt.optimizeLookup(s)
	case shape.Filter:
		return opt.optimizeFilters(s)
	case shape.Intersect:
		return opt.optimizeIntersect(s)
	case shape.Quads:
		return opt.optimizeQuads(s)
	case shape.NodesFrom:
		return opt.optimizeNodesFrom(s)
	case shape.QuadsAction:
		return opt.optimizeQuadsAction(s)
	case shape.Save:
		return opt.optimizeSave(s)
	case shape.Page:
		return opt.optimizePage(s)
	default:
		return s, false
	}
}

func selectValueQuery(v quad.Value, op CmpOp) ([]Where, []Value, bool) {
	if op == OpEqual {
		// we can use hash to check equality
		return []Where{
				{Field: "hash", Op: op, Value: Placeholder{}},
			}, []Value{
				HashOf(v),
			}, true
	}
	var (
		where  []Where
		params []Value
	)
	switch v := v.(type) {
	case quad.IRI:
		where = []Where{
			{Field: "value_string", Op: op, Value: Placeholder{}},
			{Field: "iri", Op: OpIsTrue},
		}
		params = []Value{
			StringVal(v),
		}
	case quad.BNode:
		where = []Where{
			{Field: "value_string", Op: op, Value: Placeholder{}},
			{Field: "bnode", Op: OpIsTrue},
		}
		params = []Value{
			StringVal(v),
		}
	case quad.String:
		where = []Where{
			{Field: "value_string", Op: op, Value: Placeholder{}},
			{Field: "iri", Op: OpIsNull},
			{Field: "bnode", Op: OpIsNull},
			{Field: "datatype", Op: OpIsNull},
			{Field: "language", Op: OpIsNull},
		}
		params = []Value{
			StringVal(v),
		}
	case quad.LangString:
		where = []Where{
			{Field: "value_string", Op: op, Value: Placeholder{}},
			{Field: "language", Op: OpEqual, Value: Placeholder{}},
		}
		params = []Value{
			StringVal(v.Value),
			StringVal(v.Lang),
		}
	case quad.TypedString:
		where = []Where{
			{Field: "value_string", Op: op, Value: Placeholder{}},
			{Field: "datatype", Op: OpEqual, Value: Placeholder{}},
		}
		params = []Value{
			StringVal(v.Value),
			StringVal(v.Type),
		}
	case quad.Int:
		where = []Where{
			{Field: "value_int", Op: op, Value: Placeholder{}},
		}
		params = []Value{
			IntVal(v),
		}
	case quad.Float:
		where = []Where{
			{Field: "value_float", Op: op, Value: Placeholder{}},
		}
		params = []Value{
			FloatVal(v),
		}
	case quad.Bool:
		where = []Where{
			{Field: "value_bool", Op: op, Value: Placeholder{}},
		}
		params = []Value{
			BoolVal(v),
		}
	case quad.Time:
		where = []Where{
			{Field: "value_time", Op: op, Value: Placeholder{}},
		}
		params = []Value{
			TimeVal(v),
		}
	default:
		return nil, nil, false
	}
	return where, params, true
}

func SelectValue(v quad.Value, op CmpOp) *Select {
	where, params, ok := selectValueQuery(v, op)
	if !ok {
		return nil
	}
	sel := Nodes(where, params)
	return &sel
}

func (opt *Optimizer) optimizeLookup(s shape.Lookup) (shape.Shape, bool) {
	if len(s) != 1 {
		// TODO: support for IN
		return s, false
	}
	sel := SelectValue(s[0], OpEqual)
	if sel == nil {
		return s, false
	}
	return *sel, true
}

func convRegexp(re string) string {
	return re // TODO: convert regular expression
}

func (opt *Optimizer) optimizeFilter(from shape.Shape, f shape.ValueFilter) ([]Where, []Value, bool) {
	switch f := f.(type) {
	case shape.Comparison:
		var cmp CmpOp
		switch f.Op {
		case iterator.CompareGT:
			cmp = OpGT
		case iterator.CompareGTE:
			cmp = OpGTE
		case iterator.CompareLT:
			cmp = OpLT
		case iterator.CompareLTE:
			cmp = OpLTE
		default:
			return nil, nil, false
		}
		return selectValueQuery(f.Val, cmp)
	case shape.Wildcard:
		if opt.regexpOp == "" {
			return nil, nil, false
		}
		return []Where{
				{Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}},
			}, []Value{
				StringVal(convRegexp(f.Regexp())),
			}, true
	case shape.Regexp:
		if opt.regexpOp == "" {
			return nil, nil, false
		}
		where := []Where{
			{Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}},
		}
		if !f.Refs {
			where = append(where, []Where{
				{Field: "iri", Op: OpIsNull},
				{Field: "bnode", Op: OpIsNull},
			}...)
		}
		return where, []Value{
			StringVal(convRegexp(f.Re.String())),
		}, true
	default:
		return nil, nil, false
	}
}
func (opt *Optimizer) optimizeFilters(s shape.Filter) (shape.Shape, bool) {
	switch from := s.From.(type) {
	case shape.AllNodes:
	case Select:
		if !from.isAll() {
			return s, false
		}
		t, ok := from.From[0].(Table)
		if !ok || t.Name != "nodes" {
			return s, false
		}
	default:
		return s, false
	}
	var (
		where  []Where
		params []Value
	)
	left := shape.Filter{
		From: s.From,
	}
	for _, f := range s.Filters {
		if w, p, ok := opt.optimizeFilter(s.From, f); ok {
			where = append(where, w...)
			params = append(params, p...)
		} else {
			left.Filters = append(left.Filters, f)
		}
	}
	if len(where) == 0 {
		return s, false
	}
	sel := Nodes(where, params)
	if len(left.Filters) == 0 {
		return sel, true
	}
	left.From = sel
	return left, true
}

func (opt *Optimizer) optimizeQuads(s shape.Quads) (shape.Shape, bool) {
	t1 := opt.nextTable()
	sel := AllQuads(t1)
	for _, f := range s {
		wr := Where{
			Table: t1,
			Field: dirField(f.Dir),
			Op:    OpEqual,
		}
		switch fv := f.Values.(type) {
		case shape.Fixed:
			if len(fv) != 1 {
				// TODO: support IN, or generate SELECT equivalent
				return s, false
			}
			wr.Value = sel.AppendParam(fv[0].(Value))
			sel.Where = append(sel.Where, wr)
		case Select:
			if len(fv.Fields) == 1 {
				// simple case - just add subquery to FROM
				tbl := opt.nextTable()
				sel.From = append(sel.From, Subquery{
					Query: fv,
					Alias: tbl,
				})
				wr.Value = FieldName{
					Name:  fv.Fields[0].NameOrAlias(),
					Table: tbl,
				}
				sel.Where = append(sel.Where, wr)
				continue
			} else if fv.onlyAsSubquery() {
				// TODO: generic subquery: pass all tags to main query, set WHERE on specific direction, drop __* tags
				return s, false
			}
			opt.ensureAliases(&fv)
			// add all tables from subquery to the main one, but skip __node field - we should add it to WHERE
			var head Field
			for _, f := range fv.Fields {
				if f.Alias == tagNode {
					for _, w := range fv.Where {
						if w.Table == f.Table && w.Field == f.Alias {
							// TODO: if __node was used in WHERE of subquery, we should rewrite it
							return s, false
						}
					}
					f.Alias = ""
					head = f
					continue
				}
				sel.Fields = append(sel.Fields, f)
			}
			if head.Table == "" {
				// something is wrong
				return s, false
			}
			sel.From = append(sel.From, fv.From...)
			sel.Where = append(sel.Where, fv.Where...)
			sel.Params = append(sel.Params, fv.Params...)
			wr.Value = FieldName{
				Name:  head.Name,
				Table: head.Table,
			}
			sel.Where = append(sel.Where, wr)
		default:
			return s, false
		}
	}
	return sel, true
}

func (opt *Optimizer) optimizeNodesFrom(s shape.NodesFrom) (shape.Shape, bool) {
	sel, ok := s.Quads.(Select)
	if !ok {
		return s, false
	}
	sel.Fields = append([]Field{}, sel.Fields...)

	// all we need is to remove all quad-related tags and preserve one with matching direction
	dir := dirTag(s.Dir)
	found := false
	for i := 0; i < len(sel.Fields); i++ {
		f := &sel.Fields[i]
		if f.Alias == dir {
			f.Alias = tagNode
			found = true
		} else if strings.HasPrefix(f.Alias, tagPref) {
			sel.Fields = append(sel.Fields[:i], sel.Fields[i+1:]...)
			i--
		}
	}
	if !found {
		return s, false
	}
	return sel, true
}

func (opt *Optimizer) optimizeQuadsAction(s shape.QuadsAction) (shape.Shape, bool) {
	sel := Select{
		Fields: []Field{
			{Name: dirField(s.Result), Alias: tagNode},
		},
		From: []Source{
			Table{Name: "quads"},
		},
	}
	var dirs []quad.Direction
	for d := range s.Save {
		dirs = append(dirs, d)
	}
	sortDirs(dirs)
	for _, d := range dirs {
		for _, t := range s.Save[d] {
			sel.Fields = append(sel.Fields, Field{
				Name: dirField(d), Alias: t,
			})
		}
	}
	dirs = nil
	for d := range s.Filter {
		dirs = append(dirs, d)
	}
	sortDirs(dirs)
	for _, d := range dirs {
		v := s.Filter[d]
		sel.WhereEq("", dirField(d), v.(Value))
	}
	return sel, true
}

func (opt *Optimizer) optimizeSave(s shape.Save) (shape.Shape, bool) {
	sel, ok := s.From.(Select)
	if !ok {
		return s, false
	}
	// find primary value used by iterators
	fi := -1
	for i, f := range sel.Fields {
		if f.Alias == tagNode {
			fi = i
			break
		}
	}
	if fi < 0 {
		return s, false
	}
	// add SELECT fields as aliases for primary field
	f := sel.Fields[fi]
	fields := make([]Field, 0, len(s.Tags)+len(sel.Fields))
	for _, tag := range s.Tags {
		f.Alias = tag
		fields = append(fields, f)
	}
	// add other fields
	fields = append(fields, sel.Fields...)
	sel.Fields = fields
	return sel, true
}

func (opt *Optimizer) optimizePage(s shape.Page) (shape.Shape, bool) {
	sel, ok := s.From.(Select)
	if !ok {
		return s, false
	}
	// do not optimize if db only can use offset with limit, and we have no limits set
	if opt.noOffsetWithoutLimit && sel.Limit == 0 && s.Limit == 0 {
		return s, false
	}
	// call shapes optimizer to calculate correct skip and limit
	p := shape.Page{
		Skip:  sel.Offset,
		Limit: sel.Limit,
	}.ApplyPage(s)
	if p == nil {
		// no intersection - no results
		return nil, true
	}
	sel.Limit = p.Limit
	sel.Offset = p.Skip
	return sel, true
}

func (opt *Optimizer) optimizeIntersect(s shape.Intersect) (shape.Shape, bool) {
	var (
		sels  []Select
		other shape.Intersect
	)
	// we will add our merged Select to this slot
	other = append(other, nil)
	for _, sub := range s {
		// TODO: sort by onlySubquery flag first
		if sel, ok := sub.(Select); ok && !sel.onlyAsSubquery() {
			sels = append(sels, sel)
		} else {
			other = append(other, sub)
		}
	}
	if len(sels) <= 1 {
		return s, false
	}
	for i := range sels {
		sels[i] = sels[i].Clone()
		opt.ensureAliases(&sels[i])
	}
	pri := sels[0]
	var head *Field
	for i, f := range pri.Fields {
		if f.Alias == tagNode {
			head = &pri.Fields[i]
			break
		}
	}
	if head == nil {
		return s, false
	}
	sec := sels[1:]

	for _, s2 := range sec {
		// merge From, Where and Params
		pri.From = append(pri.From, s2.From...)
		pri.Where = append(pri.Where, s2.Where...)
		pri.Params = append(pri.Params, s2.Params...)
		// also find and remove primary tag, but add the same field to WHERE
		ok := false
		for _, f := range s2.Fields {
			if f.Alias == tagNode {
				ok = true
				pri.Where = append(pri.Where, Where{
					Table: head.Table,
					Field: head.Name,
					Op:    OpEqual,
					Value: FieldName{
						Table: f.Table,
						Name:  f.Name,
					},
				})
			} else {
				pri.Fields = append(pri.Fields, f)
			}
		}
		if !ok {
			return s, false
		}
	}
	if len(other) == 1 {
		return pri, true
	}
	other[0] = pri
	return other, true
}
back to top