Raw File
search.jl
# This file is a part of Julia. License is MIT: http://julialang.org/license

# some test strings
astr = "Hello, world.\n"
u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"

# I think these should give error on 4 also, and "" is not treated
# consistently with SubString("",1,1), nor with Char[]
for ind in (0, 5)
    @test_throws BoundsError search("foo", SubString("",1,1), ind)
    @test_throws BoundsError rsearch("foo", SubString("",1,1), ind)
    @test_throws BoundsError searchindex("foo", SubString("",1,1), ind)
    @test_throws BoundsError rsearchindex("foo", SubString("",1,1), ind)
end

# Note: the commented out tests will be enabled after fixes to make
# sure that search/rsearch/searchindex/rsearchindex are consistent
# no matter what type of AbstractString the second argument is
@test_throws BoundsError search("foo", Char[], 0)
@test_throws BoundsError search("foo", Char[], 5)
# @test_throws BoundsError rsearch("foo", Char[], 0)
@test_throws BoundsError rsearch("foo", Char[], 5)

# @test_throws BoundsError searchindex("foo", Char[], 0)
# @test_throws BoundsError searchindex("foo", Char[], 5)
# @test_throws BoundsError rsearchindex("foo", Char[], 0)
# @test_throws BoundsError rsearchindex("foo", Char[], 5)

# @test_throws ErrorException in("foobar","bar")
@test_throws BoundsError search(b"\x1\x2",0x1,0)
@test rsearchindex(b"foo",b"o",0) == 0
@test rsearchindex(SubString("",1,1),SubString("",1,1)) == 1

@test search(b"foo",'o') == 2
@test rsearch(b"foo",'o') == 3
@test search(b"foó",'ó') == 3
@test rsearch(b"foó",'ó') == 3

# ascii search
for str in [astr, GenericString(astr)]
    @test_throws BoundsError search(str, 'z', 0)
    @test_throws BoundsError search(str, '∀', 0)
    @test search(str, 'x') == 0
    @test search(str, '\0') == 0
    @test search(str, '\u80') == 0
    @test search(str, '∀') == 0
    @test search(str, 'H') == 1
    @test search(str, 'l') == 3
    @test search(str, 'l', 4) == 4
    @test search(str, 'l', 5) == 11
    @test search(str, 'l', 12) == 0
    @test search(str, ',') == 6
    @test search(str, ',', 7) == 0
    @test search(str, '\n') == 14
    @test search(str, '\n', 15) == 0
    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
end

# ascii rsearch
for str in [astr]
    @test rsearch(str, 'x') == 0
    @test rsearch(str, '\0') == 0
    @test rsearch(str, '\u80') == 0
    @test rsearch(str, '∀') == 0
    @test rsearch(str, 'H') == 1
    @test rsearch(str, 'H', 0) == 0
    @test rsearch(str, 'l') == 11
    @test rsearch(str, 'l', 5) == 4
    @test rsearch(str, 'l', 4) == 4
    @test rsearch(str, 'l', 3) == 3
    @test rsearch(str, 'l', 2) == 0
    @test rsearch(str, ',') == 6
    @test rsearch(str, ',', 5) == 0
    @test rsearch(str, '\n') == 14
end

# utf-8 search
for str in (u8str, GenericString(u8str))
    @test_throws BoundsError search(str, 'z', 0)
    @test_throws BoundsError search(str, '∀', 0)
    @test search(str, 'z') == 0
    @test search(str, '\0') == 0
    @test search(str, '\u80') == 0
    @test search(str, '∄') == 0
    @test search(str, '∀') == 1
    @test_throws UnicodeError search(str, '∀', 2)
    @test search(str, '∀', 4) == 0
    @test search(str, '∃') == 13
    @test_throws UnicodeError search(str, '∃', 15)
    @test search(str, '∃', 16) == 0
    @test search(str, 'x') == 26
    @test search(str, 'x', 27) == 43
    @test search(str, 'x', 44) == 0
    @test search(str, 'δ') == 17
    @test_throws UnicodeError search(str, 'δ', 18)
    @test search(str, 'δ', nextind(str,17)) == 33
    @test search(str, 'δ', nextind(str,33)) == 0
    @test search(str, 'ε') == 5
    @test search(str, 'ε', nextind(str,5)) == 54
    @test search(str, 'ε', nextind(str,54)) == 0
    @test search(str, 'ε', nextind(str,endof(str))) == 0
    @test search(str, 'a', nextind(str,endof(str))) == 0
    @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1)
    @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1)
end

# utf-8 rsearch
for str in [u8str]
    @test rsearch(str, 'z') == 0
    @test rsearch(str, '\0') == 0
    @test rsearch(str, '\u80') == 0
    @test rsearch(str, '∄') == 0
    @test rsearch(str, '∀') == 1
    @test rsearch(str, '∀', 0) == 0
    @test rsearch(str, '∃') == 13
    @test rsearch(str, '∃', 14) == 13
    @test rsearch(str, '∃', 13) == 13
    @test rsearch(str, '∃', 12) == 0
    @test rsearch(str, 'x') == 43
    @test rsearch(str, 'x', 42) == 26
    @test rsearch(str, 'x', 25) == 0
    @test rsearch(str, 'δ') == 33
    @test rsearch(str, 'δ', 32) == 17
    @test rsearch(str, 'δ', 16) == 0
    @test rsearch(str, 'ε') == 54
    @test rsearch(str, 'ε', 53) == 5
    @test rsearch(str, 'ε', 4) == 0
end

# string search with a single-char string
@test search(astr, "x") == 0:-1
@test search(astr, "H") == 1:1
@test search(astr, "H", 2) == 0:-1
@test search(astr, "l") == 3:3
@test search(astr, "l", 4) == 4:4
@test search(astr, "l", 5) == 11:11
@test search(astr, "l", 12) == 0:-1
@test search(astr, "\n") == 14:14
@test search(astr, "\n", 15) == 0:-1

@test search(u8str, "z") == 0:-1
@test search(u8str, "∄") == 0:-1
@test search(u8str, "∀") == 1:1
@test search(u8str, "∀", 4) == 0:-1
@test search(u8str, "∃") == 13:13
@test search(u8str, "∃", 16) == 0:-1
@test search(u8str, "x") == 26:26
@test search(u8str, "x", 27) == 43:43
@test search(u8str, "x", 44) == 0:-1
@test search(u8str, "ε") == 5:5
@test search(u8str, "ε", 7) == 54:54
@test search(u8str, "ε", 56) == 0:-1

# string rsearch with a single-char string
@test rsearch(astr, "x") == 0:-1
@test rsearch(astr, "H") == 1:1
@test rsearch(astr, "H", 2) == 1:1
@test rsearch(astr, "H", 0) == 0:-1
@test rsearch(astr, "l") == 11:11
@test rsearch(astr, "l", 10) == 4:4
@test rsearch(astr, "l", 4) == 4:4
@test rsearch(astr, "l", 3) == 3:3
@test rsearch(astr, "l", 2) == 0:-1
@test rsearch(astr, "\n") == 14:14
@test rsearch(astr, "\n", 13) == 0:-1

@test rsearch(u8str, "z") == 0:-1
@test rsearch(u8str, "∄") == 0:-1
@test rsearch(u8str, "∀") == 1:1
@test rsearch(u8str, "∀", 0) == 0:-1
#TODO: setting the limit in the middle of a wide char
#      makes search fail but rsearch succeed.
#      Should rsearch fail as well?
#@test rsearch(u8str, "∀", 2) == 0:-1 # gives 1:3
@test rsearch(u8str, "∃") == 13:13
@test rsearch(u8str, "∃", 12) == 0:-1
@test rsearch(u8str, "x") == 43:43
@test rsearch(u8str, "x", 42) == 26:26
@test rsearch(u8str, "x", 25) == 0:-1
@test rsearch(u8str, "ε") == 54:54
@test rsearch(u8str, "ε", 53) == 5:5
@test rsearch(u8str, "ε", 4) == 0:-1

# string search with a single-char regex
@test search(astr, r"x") == 0:-1
@test search(astr, r"H") == 1:1
@test search(astr, r"H", 2) == 0:-1
@test search(astr, r"l") == 3:3
@test search(astr, r"l", 4) == 4:4
@test search(astr, r"l", 5) == 11:11
@test search(astr, r"l", 12) == 0:-1
@test search(astr, r"\n") == 14:14
@test search(astr, r"\n", 15) == 0:-1
@test search(u8str, r"z") == 0:-1
@test search(u8str, r"∄") == 0:-1
@test search(u8str, r"∀") == 1:1
@test search(u8str, r"∀", 4) == 0:-1
@test search(u8str, r"∀") == search(u8str, r"\u2200")
@test search(u8str, r"∀", 4) == search(u8str, r"\u2200", 4)
@test search(u8str, r"∃") == 13:13
@test search(u8str, r"∃", 16) == 0:-1
@test search(u8str, r"x") == 26:26
@test search(u8str, r"x", 27) == 43:43
@test search(u8str, r"x", 44) == 0:-1
@test search(u8str, r"ε") == 5:5
@test search(u8str, r"ε", 7) == 54:54
@test search(u8str, r"ε", 56) == 0:-1
for i = 1:endof(astr)
    @test search(astr, r"."s, i) == i:i
end
for i = 1:endof(u8str)
    if isvalid(u8str,i)
        @test search(u8str, r"."s, i) == i:i
    end
end

# string search with a zero-char string
for i = 1:endof(astr)
    @test search(astr, "", i) == i:i-1
end
for i = 1:endof(u8str)
    @test search(u8str, "", i) == i:i-1
end
@test search("", "") == 1:0

# string rsearch with a zero-char string
for i = 1:endof(astr)
    @test rsearch(astr, "", i) == i:i-1
end
for i = 1:endof(u8str)
    @test rsearch(u8str, "", i) == i:i-1
end
@test rsearch("", "") == 1:0

# string search with a zero-char regex
for i = 1:endof(astr)
    @test search(astr, r"", i) == i:i-1
end
for i = 1:endof(u8str)
    # TODO: should regex search fast-forward invalid indices?
    if isvalid(u8str,i)
        @test search(u8str, r""s, i) == i:i-1
    end
end

# string search with a two-char string literal
@test search("foo,bar,baz", "xx") == 0:-1
@test search("foo,bar,baz", "fo") == 1:2
@test search("foo,bar,baz", "fo", 3) == 0:-1
@test search("foo,bar,baz", "oo") == 2:3
@test search("foo,bar,baz", "oo", 4) == 0:-1
@test search("foo,bar,baz", "o,") == 3:4
@test search("foo,bar,baz", "o,", 5) == 0:-1
@test search("foo,bar,baz", ",b") == 4:5
@test search("foo,bar,baz", ",b", 6) == 8:9
@test search("foo,bar,baz", ",b", 10) == 0:-1
@test search("foo,bar,baz", "az") == 10:11
@test search("foo,bar,baz", "az", 12) == 0:-1

# issue #9365
# string search with a two-char UTF-8 (2 byte) string literal
@test search("ééé", "éé") == 1:3
@test search("ééé", "éé", 1) == 1:3
# string search with a two-char UTF-8 (3 byte) string literal
@test search("€€€", "€€") == 1:4
@test search("€€€", "€€", 1) == 1:4
# string search with a two-char UTF-8 (4 byte) string literal
@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1:5
@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5

# string search with a two-char UTF-8 (2 byte) string literal
@test search("éé", "éé") == 1:3
@test search("éé", "éé", 1) == 1:3
# string search with a two-char UTF-8 (3 byte) string literal
@test search("€€", "€€") == 1:4
@test search("€€", "€€", 1) == 1:4
# string search with a two-char UTF-8 (4 byte) string literal
@test search("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
@test search("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5

# string rsearch with a two-char UTF-8 (2 byte) string literal
@test rsearch("ééé", "éé") == 3:5
@test rsearch("ééé", "éé", endof("ééé")) == 3:5
# string rsearch with a two-char UTF-8 (3 byte) string literal
@test rsearch("€€€", "€€") == 4:7
@test rsearch("€€€", "€€", endof("€€€")) == 4:7
# string rsearch with a two-char UTF-8 (4 byte) string literal
@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5:9
@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5:9

# string rsearch with a two-char UTF-8 (2 byte) string literal
@test rsearch("éé", "éé") == 1:3        # should really be 1:4!
@test rsearch("éé", "éé", endof("ééé")) == 1:3
# string search with a two-char UTF-8 (3 byte) string literal
@test rsearch("€€", "€€") == 1:4        # should really be 1:6!
@test rsearch("€€", "€€", endof("€€€")) == 1:4
# string search with a two-char UTF-8 (4 byte) string literal
@test rsearch("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
@test rsearch("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1:5

# string rsearch with a two-char string literal
@test rsearch("foo,bar,baz", "xx") == 0:-1
@test rsearch("foo,bar,baz", "fo") == 1:2
@test rsearch("foo,bar,baz", "fo", 1) == 0:-1
@test rsearch("foo,bar,baz", "oo") == 2:3
@test rsearch("foo,bar,baz", "oo", 2) == 0:-1
@test rsearch("foo,bar,baz", "o,") == 3:4
@test rsearch("foo,bar,baz", "o,", 1) == 0:-1
@test rsearch("foo,bar,baz", ",b") == 8:9
@test rsearch("foo,bar,baz", ",b", 6) == 4:5
@test rsearch("foo,bar,baz", ",b", 3) == 0:-1
@test rsearch("foo,bar,baz", "az") == 10:11
@test rsearch("foo,bar,baz", "az", 10) == 0:-1

# array rsearch
@test rsearch(UInt8[1,2,3],UInt8[2,3],3) == 2:3
@test rsearch(UInt8[1,2,3],UInt8[2,3],1) == 0:-1

# string search with a two-char regex
@test search("foo,bar,baz", r"xx") == 0:-1
@test search("foo,bar,baz", r"fo") == 1:2
@test search("foo,bar,baz", r"fo", 3) == 0:-1
@test search("foo,bar,baz", r"oo") == 2:3
@test search("foo,bar,baz", r"oo", 4) == 0:-1
@test search("foo,bar,baz", r"o,") == 3:4
@test search("foo,bar,baz", r"o,", 5) == 0:-1
@test search("foo,bar,baz", r",b") == 4:5
@test search("foo,bar,baz", r",b", 6) == 8:9
@test search("foo,bar,baz", r",b", 10) == 0:-1
@test search("foo,bar,baz", r"az") == 10:11
@test search("foo,bar,baz", r"az", 12) == 0:-1

@test searchindex("foo", 'o') == 2
@test searchindex("foo", 'o', 3) == 3

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("ééé", "éé") == 1
@test searchindex("ééé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€€", "€€") == 1
@test searchindex("€€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("éé", "éé") == 1
@test searchindex("éé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€", "€€") == 1
@test searchindex("€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("ééé", "éé") == 3
@test rsearchindex("ééé", "éé", endof("ééé")) == 3
# string rsearchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€€", "€€") == 4
@test rsearchindex("€€€", "€€", endof("€€€")) == 4
# string rsearchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("éé", "éé") == 1
@test rsearchindex("éé", "éé", endof("ééé")) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€", "€€") == 1
@test rsearchindex("€€", "€€", endof("€€€")) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1

@test_throws ErrorException "ab" ∈ "abc"
back to top