https://github.com/postgres/postgres
Revision 861670369093b0bd5ddae336aed6e1e3c4ce8a13 authored by Andrew Gierth on 28 August 2018, 08:52:25 UTC, committed by Andrew Gierth on 28 August 2018, 10:48:43 UTC
regexp_matches, regexp_split_to_table and regexp_split_to_array all
work by compiling a list of match positions as character offsets (NOT
byte positions) in the source string.

Formerly, they then used text_substr to extract the matched text; but
in a multi-byte encoding, that counts the characters in the string,
and the characters needed to reach the starting byte position, on
every call. Accordingly, the performance degraded as the product of
the input string length and the number of match positions, such that
splitting a string of a few hundred kbytes could take many minutes.

Repair by keeping the wide-character copy of the input string
available (only in the case where encoding_max_length is not 1) after
performing the match operation, and extracting substrings from that
instead. This reduces the complexity to being linear in the number of
result bytes, discounting the actual regexp match itself (which is not
affected by this patch).

In passing, remove cleanup using retail pfree() which was obsoleted by
commit ff428cded (Feb 2008) which made cleanup of SRF multi-call
contexts automatic. Also increase (to ~134 million) the maximum number
of matches and provide an error message when it is reached.

Backpatch all the way because this has been wrong forever.

Analysis and patch by me; review by Kaiting Chen.

Discussion: https://postgr.es/m/87pnyn55qh.fsf@news-spur.riddles.org.uk

see also https://postgr.es/m/87lg996g4r.fsf@news-spur.riddles.org.uk
1 parent 23f21e0
Raw File
Tip revision: 861670369093b0bd5ddae336aed6e1e3c4ce8a13 authored by Andrew Gierth on 28 August 2018, 08:52:25 UTC
Avoid quadratic slowdown in regexp match/split functions.
Tip revision: 8616703
GNUmakefile.in
#
# PostgreSQL top level makefile
#
# GNUmakefile.in
#

subdir =
top_builddir = .
include $(top_builddir)/src/Makefile.global

$(call recurse,all install,src config)

all:
	+@echo "All of PostgreSQL successfully made. Ready to install."

docs:
	$(MAKE) -C doc all

$(call recurse,world,doc src config contrib,all)
world:
	+@echo "PostgreSQL, contrib, and documentation successfully made. Ready to install."

# build src/ before contrib/
world-contrib-recurse: world-src-recurse

html man:
	$(MAKE) -C doc $@

install:
	+@echo "PostgreSQL installation complete."

install-docs:
	$(MAKE) -C doc install

$(call recurse,install-world,doc src config contrib,install)
install-world:
	+@echo "PostgreSQL, contrib, and documentation installation complete."

# build src/ before contrib/
install-world-contrib-recurse: install-world-src-recurse

$(call recurse,installdirs uninstall coverage init-po update-po,doc src config)

$(call recurse,distprep,doc src config contrib)

# clean, distclean, etc should apply to contrib too, even though
# it's not built by default
$(call recurse,clean,doc contrib src config)
clean:
# Garbage from autoconf:
	@rm -rf autom4te.cache/

# Important: distclean `src' last, otherwise Makefile.global
# will be gone too soon.
distclean maintainer-clean:
	$(MAKE) -C doc $@
	$(MAKE) -C contrib $@
	$(MAKE) -C config $@
	$(MAKE) -C src $@
	rm -f config.cache config.log config.status GNUmakefile
# Garbage from autoconf:
	@rm -rf autom4te.cache/

check: all

check installcheck installcheck-parallel:
	$(MAKE) -C src/test/regress $@

$(call recurse,check-world,src/test src/pl src/interfaces/ecpg contrib,check)

$(call recurse,installcheck-world,src/test src/pl src/interfaces/ecpg contrib,installcheck)

$(call recurse,maintainer-check,doc src config contrib)

GNUmakefile: GNUmakefile.in $(top_builddir)/config.status
	./config.status $@


##########################################################################

distdir	= postgresql-$(VERSION)
dummy	= =install=
garbage = =*  "#"*  ."#"*  *~*  *.orig  *.rej  core  postgresql-*

dist: $(distdir).tar.gz $(distdir).tar.bz2
	rm -rf $(distdir)

$(distdir).tar: distdir
	$(TAR) chf $@ $(distdir)

.INTERMEDIATE: $(distdir).tar

distdir-location:
	@echo $(distdir)

distdir:
	rm -rf $(distdir)* $(dummy)
	for x in `cd $(top_srcdir) && find . \( -name CVS -prune \) -o \( -name .git -prune \) -o -print`; do \
	  file=`expr X$$x : 'X\./\(.*\)'`; \
	  if test -d "$(top_srcdir)/$$file" ; then \
	    mkdir "$(distdir)/$$file" && chmod 777 "$(distdir)/$$file";	\
	  else \
	    ln "$(top_srcdir)/$$file" "$(distdir)/$$file" >/dev/null 2>&1 \
	      || cp "$(top_srcdir)/$$file" "$(distdir)/$$file"; \
	  fi || exit; \
	done
	$(MAKE) -C $(distdir) distprep
	$(MAKE) -C $(distdir)/doc/src/sgml/ INSTALL
	cp $(distdir)/doc/src/sgml/INSTALL $(distdir)/
	$(MAKE) -C $(distdir) distclean
	rm -f $(distdir)/README.git

distcheck: dist
	rm -rf $(dummy)
	mkdir $(dummy)
	$(GZIP) -d -c $(distdir).tar.gz | $(TAR) xf -
	install_prefix=`cd $(dummy) && pwd`; \
	cd $(distdir) \
	&& ./configure --prefix="$$install_prefix"
	$(MAKE) -C $(distdir) -q distprep
	$(MAKE) -C $(distdir)
	$(MAKE) -C $(distdir) install
	$(MAKE) -C $(distdir) uninstall
	@echo "checking whether \`$(MAKE) uninstall' works"
	test `find $(dummy) ! -type d | wc -l` -eq 0
	$(MAKE) -C $(distdir) dist
# Room for improvement: Check here whether this distribution tarball
# is sufficiently similar to the original one.
	rm -rf $(distdir) $(dummy)
	@echo "Distribution integrity checks out."

.PHONY: dist distdir distcheck docs install-docs world check-world install-world installcheck-world
back to top