diff --git a/ext/regexp/regexp.go b/ext/regexp/regexp.go index 6210147f..1d0aa306 100644 --- a/ext/regexp/regexp.go +++ b/ext/regexp/regexp.go @@ -32,11 +32,14 @@ func Register(db *sqlite3.Conn) error { db.CreateFunction("regexp_instr", 3, flags, regexInstr), db.CreateFunction("regexp_instr", 4, flags, regexInstr), db.CreateFunction("regexp_instr", 5, flags, regexInstr), + db.CreateFunction("regexp_instr", 6, flags, regexInstr), db.CreateFunction("regexp_substr", 2, flags, regexSubstr), db.CreateFunction("regexp_substr", 3, flags, regexSubstr), db.CreateFunction("regexp_substr", 4, flags, regexSubstr), + db.CreateFunction("regexp_substr", 5, flags, regexSubstr), db.CreateFunction("regexp_replace", 3, flags, regexReplace), - db.CreateFunction("regexp_replace", 4, flags, regexReplace)) + db.CreateFunction("regexp_replace", 4, flags, regexReplace), + db.CreateFunction("regexp_replace", 5, flags, regexReplace)) } func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) { @@ -68,6 +71,7 @@ func regexLike(ctx sqlite3.Context, arg ...sqlite3.Value) { ctx.ResultError(err) return // notest } + text := arg[0].RawText() ctx.ResultBool(re.Match(text)) } @@ -78,10 +82,11 @@ func regexCount(ctx sqlite3.Context, arg ...sqlite3.Value) { ctx.ResultError(err) return // notest } + text := arg[0].RawText() if len(arg) > 2 { pos := arg[2].Int() - _, text = split(text, pos) + text = text[skip(text, pos):] } ctx.ResultInt(len(re.FindAll(text, -1))) } @@ -92,26 +97,23 @@ func regexSubstr(ctx sqlite3.Context, arg ...sqlite3.Value) { ctx.ResultError(err) return // notest } + text := arg[0].RawText() + var pos, n, subexpr int if len(arg) > 2 { - pos := arg[2].Int() - _, text = split(text, pos) + pos = arg[2].Int() } - n := 0 if len(arg) > 3 { n = arg[3].Int() } + if len(arg) > 4 { + subexpr = arg[4].Int() + } - var res []byte - if n <= 1 { - res = re.Find(text) - } else { - all := re.FindAll(text, n) - if n <= len(all) { - res = all[n-1] - } + loc := regexFind(re, text, pos, n, subexpr) + if loc != nil { + ctx.ResultRawText(text[loc[0]:loc[1]]) } - ctx.ResultRawText(res) } func regexInstr(ctx sqlite3.Context, arg ...sqlite3.Value) { @@ -120,35 +122,26 @@ func regexInstr(ctx sqlite3.Context, arg ...sqlite3.Value) { ctx.ResultError(err) return // notest } - pos := 1 + text := arg[0].RawText() + var pos, n, end, subexpr int if len(arg) > 2 { pos = arg[2].Int() - _, text = split(text, pos) } - n := 0 if len(arg) > 3 { n = arg[3].Int() } - - var loc []int - if n <= 1 { - loc = re.FindIndex(text) - } else { - all := re.FindAllIndex(text, n) - if n <= len(all) { - loc = all[n-1] - } + if len(arg) > 4 && arg[4].Bool() { + end = 1 } - if loc == nil { - return + if len(arg) > 5 { + subexpr = arg[5].Int() } - end := 0 - if len(arg) > 4 && arg[4].Bool() { - end = 1 + loc := regexFind(re, text, pos, n, subexpr) + if loc != nil { + ctx.ResultInt(loc[end] + 1) } - ctx.ResultInt(pos + loc[end]) } func regexReplace(ctx sqlite3.Context, arg ...sqlite3.Value) { @@ -157,24 +150,71 @@ func regexReplace(ctx sqlite3.Context, arg ...sqlite3.Value) { ctx.ResultError(err) return // notest } - var head, tail []byte - tail = arg[0].RawText() + + text := arg[0].RawText() + repl := arg[2].RawText() + var pos, n int if len(arg) > 3 { - pos := arg[3].Int() - head, tail = split(tail, pos) + pos = arg[3].Int() + } + if len(arg) > 4 { + n = arg[4].Int() + } + + res := text + pos = skip(text, pos) + if n > 0 { + all := re.FindAllSubmatchIndex(text[pos:], n) + if n <= len(all) { + loc := all[n-1] + res = text[:pos+loc[0]] + res = re.Expand(res, repl, text[pos:], loc) + res = append(res, text[pos+loc[1]:]...) + } + } else { + res = append(text[:pos], re.ReplaceAll(text[pos:], repl)...) } - tail = re.ReplaceAll(tail, arg[2].RawText()) - if head != nil { - tail = append(head, tail...) + ctx.ResultRawText(res) +} + +func regexFind(re *regexp.Regexp, text []byte, pos, n, subexpr int) (loc []int) { + pos = skip(text, pos) + text = text[pos:] + + if n <= 1 { + if subexpr == 0 { + loc = re.FindIndex(text) + } else { + loc = re.FindSubmatchIndex(text) + } + } else { + if subexpr == 0 { + all := re.FindAllIndex(text, n) + if n <= len(all) { + loc = all[n-1] + } + } else { + all := re.FindAllSubmatchIndex(text, n) + if n <= len(all) { + loc = all[n-1] + } + } + } + + if 2+2*subexpr <= len(loc) { + loc = loc[2*subexpr : 2+2*subexpr] + loc[0] += pos + loc[1] += pos + return loc } - ctx.ResultRawText(tail) + return nil } -func split(s []byte, i int) (head, tail []byte) { - for pos := range string(s) { - if i--; i <= 0 { - return s[:pos:pos], s[pos:] +func skip(text []byte, start int) int { + for pos := range string(text) { + if start--; start <= 0 { + return pos } } - return s, nil + return len(text) } diff --git a/ext/regexp/regexp_test.go b/ext/regexp/regexp_test.go index e9d86a79..d8dc83ed 100644 --- a/ext/regexp/regexp_test.go +++ b/ext/regexp/regexp_test.go @@ -34,13 +34,33 @@ func TestRegister(t *testing.T) { {`regexp_instr('Hello', 'el.')`, "2"}, {`regexp_instr('Hello', '.', 6)`, ""}, {`regexp_substr('Hello', 'el.')`, "ell"}, - {`regexp_substr('Hello', 'l', 2, 2)`, "l"}, {`regexp_replace('Hello', 'llo', 'll')`, "Hell"}, - + // https://www.postgresql.org/docs/current/functions-matching.html + {`regexp_count('ABCABCAXYaxy', 'A.')`, "3"}, + {`regexp_count('ABCABCAXYaxy', '(?i)A.', 1)`, "4"}, + {`regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2)`, "23"}, + {`regexp_instr('ABCDEFGHI', '(?i)(c..)(...)', 1, 1, 0, 2)`, "6"}, + {`regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2)`, " town zip"}, + {`regexp_substr('ABCDEFGHI', '(?i)(c..)(...)', 1, 1, 2)`, "FGH"}, + {`regexp_replace('foobarbaz', 'b..', 'X', 1, 1)`, "fooXbaz"}, + {`regexp_replace('foobarbaz', 'b..', 'X')`, "fooXX"}, + {`regexp_replace('foobarbaz', 'b(..)', 'X${1}Y')`, "fooXarYXazY"}, + {`regexp_replace('A PostgreSQL function', '(?i)a|e|i|o|u', 'X', 1, 0)`, "X PXstgrXSQL fXnctXXn"}, + {`regexp_replace('A PostgreSQL function', '(?i)a|e|i|o|u', 'X', 1, 3)`, "A PostgrXSQL function"}, + // https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/REGEXP_COUNT.html {`regexp_count('123123123123123', '(12)3', 1)`, "5"}, + {`regexp_count('123123123123', '123', 3)`, "3"}, + {`regexp_instr('500 Oracle Parkway, Redwood Shores, CA', '[^ ]+', 1, 6)`, "37"}, {`regexp_instr('500 Oracle Parkway, Redwood Shores, CA', '(?i)[s|r|p][[:alpha:]]{6}', 3, 2, 1)`, "28"}, - {`regexp_substr('500 Oracle Parkway, Redwood Shores, CA', ',[^,]+,', 3, 1)`, ", Redwood Shores,"}, - {`regexp_replace('500 Oracle Parkway, Redwood Shores, CA', '( ){2,}', ' ', 3)`, "500 Oracle Parkway, Redwood Shores, CA"}, + {`regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 1)`, "1"}, + {`regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 2)`, "4"}, + {`regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 4)`, "7"}, + {`regexp_substr('500 Oracle Parkway, Redwood Shores, CA', ',[^,]+,')`, ", Redwood Shores,"}, + {`regexp_substr('http://www.example.com/products', 'http://([[:alnum:]]+\.?){3,4}/?')`, "http://www.example.com/"}, + {`regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 1)`, "123"}, + {`regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 4)`, "78"}, + {`regexp_substr('123123123123', '1(.)3', 3, 2, 1)`, "2"}, + {`regexp_replace('500 Oracle Parkway, Redwood Shores, CA', '( ){2,}', ' ')`, "500 Oracle Parkway, Redwood Shores, CA"}, } for _, tt := range tests {