Implement REGEXMATCH function (#15)

Co-authored-by: Cursor Agent <cursoragent@cursor.com>
This commit is contained in:
Pijus Kamandulis
2026-05-30 21:31:45 +03:00
committed by GitHub
parent c3726a6633
commit 05e8cd2842
8 changed files with 1160 additions and 922 deletions
@@ -0,0 +1,73 @@
package tests_test
import (
"fmt"
"testing"
"github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos"
"github.com/pikami/cosmium/api/config"
"github.com/pikami/cosmium/internal/datastore"
"github.com/stretchr/testify/assert"
)
func documents_InitializeSingleDocumentDb(t *testing.T, ts *TestServer) *azcosmos.ContainerClient {
ts.DataStore.CreateDatabase(datastore.Database{ID: testDatabaseName})
ts.DataStore.CreateCollection(testDatabaseName, datastore.Collection{
ID: testCollectionName,
PartitionKey: struct {
Paths []string "json:\"paths\""
Kind string "json:\"kind\""
Version int "json:\"Version\""
}{
Paths: []string{"/pk"},
},
})
ts.DataStore.CreateDocument(testDatabaseName, testCollectionName, map[string]interface{}{"id": "regexmatch-test", "pk": "regexmatch-test"})
client, err := azcosmos.NewClientFromConnectionString(
fmt.Sprintf("AccountEndpoint=%s;AccountKey=%s", ts.URL, config.DefaultAccountKey),
&azcosmos.ClientOptions{},
)
assert.Nil(t, err)
collectionClient, err := client.NewContainer(testDatabaseName, testCollectionName)
assert.Nil(t, err)
return collectionClient
}
func Test_Documents_RegexMatch(t *testing.T) {
presets := []testPreset{PresetJsonStore, PresetBadgerStore}
runTestsWithPresets(t, "Test_Documents_RegexMatch", presets, func(t *testing.T, ts *TestServer, client *azcosmos.Client) {
collectionClient := documents_InitializeSingleDocumentDb(t, ts)
t.Run("Should execute REGEXMATCH()", func(t *testing.T) {
testCosmosQuery(t, collectionClient,
`SELECT VALUE {
noModifiers: REGEXMATCH("abcd", "ABC"),
caseInsensitive: REGEXMATCH("abcd", "ABC", "i"),
wildcardCharacter: REGEXMATCH("abcd", "ab.", ""),
ignoreWhiteSpace: REGEXMATCH("abcd", "ab c", "x"),
caseInsensitiveAndIgnoreWhiteSpace: REGEXMATCH("abcd", "aB c", "ix"),
containNumberBetweenZeroAndNine: REGEXMATCH("03a", "[0-9]"),
containPrefix: REGEXMATCH("salt3824908", "salt{1}"),
containsFiveLetterWordStartingWithS: REGEXMATCH("shame", "s....", "i")
}`,
nil,
[]interface{}{
map[string]interface{}{
"noModifiers": false,
"caseInsensitive": true,
"wildcardCharacter": true,
"ignoreWhiteSpace": true,
"caseInsensitiveAndIgnoreWhiteSpace": true,
"containNumberBetweenZeroAndNine": true,
"containPrefix": true,
"containsFiveLetterWordStartingWithS": true,
},
},
)
})
})
}
+1
View File
@@ -107,6 +107,7 @@ const (
FunctionCallContains FunctionCallType = "Contains"
FunctionCallEndsWith FunctionCallType = "EndsWith"
FunctionCallStartsWith FunctionCallType = "StartsWith"
FunctionCallRegexMatch FunctionCallType = "RegexMatch"
FunctionCallIndexOf FunctionCallType = "IndexOf"
FunctionCallToString FunctionCallType = "ToString"
FunctionCallUpper FunctionCallType = "Upper"
+929 -921
View File
File diff suppressed because it is too large Load Diff
+3 -1
View File
@@ -681,6 +681,8 @@ ThreeArgumentStringFunctionExpression <- function:ThreeArgumentStringFunction ws
functionType = parsers.FunctionCallEndsWith
case "STARTSWITH":
functionType = parsers.FunctionCallStartsWith
case "REGEXMATCH":
functionType = parsers.FunctionCallRegexMatch
case "INDEX_OF":
functionType = parsers.FunctionCallIndexOf
}
@@ -688,7 +690,7 @@ ThreeArgumentStringFunctionExpression <- function:ThreeArgumentStringFunction ws
return createFunctionCall(functionType, []interface{}{ex1, ex2, ignoreCase})
}
ThreeArgumentStringFunction <- ("CONTAINS"i / "ENDSWITH"i / "STARTSWITH"i / "INDEX_OF"i) {
ThreeArgumentStringFunction <- ("CONTAINS"i / "ENDSWITH"i / "STARTSWITH"i / "REGEXMATCH"i / "INDEX_OF"i) {
return string(c.text), nil
}
+26
View File
@@ -168,6 +168,32 @@ func Test_Execute_StringFunctions(t *testing.T) {
)
})
t.Run("Should parse function REGEXMATCH()", func(t *testing.T) {
testQueryParse(
t,
`SELECT REGEXMATCH(c.id, "aB c", "ix") FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallRegexMatch,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "id"},
Type: parsers.SelectItemTypeField,
},
testutils.SelectItem_Constant_String("aB c"),
testutils.SelectItem_Constant_String("ix"),
},
},
},
},
Table: parsers.Table{SelectItem: testutils.SelectItem_Path("c")},
},
)
})
t.Run("Should parse function INDEX_OF()", func(t *testing.T) {
testQueryParse(
t,
@@ -162,6 +162,8 @@ func (r rowContext) selectItem_SelectItemTypeFunctionCall(functionCall parsers.F
return r.strings_EndsWith(functionCall.Arguments)
case parsers.FunctionCallStartsWith:
return r.strings_StartsWith(functionCall.Arguments)
case parsers.FunctionCallRegexMatch:
return r.strings_RegexMatch(functionCall.Arguments)
case parsers.FunctionCallConcat:
return r.strings_Concat(functionCall.Arguments)
case parsers.FunctionCallIndexOf:
@@ -2,6 +2,7 @@ package memoryexecutor
import (
"fmt"
"regexp"
"strings"
"github.com/pikami/cosmium/internal/logger"
@@ -75,6 +76,46 @@ func (r rowContext) strings_StartsWith(arguments []interface{}) bool {
return strings.HasPrefix(str1, str2)
}
func (r rowContext) strings_RegexMatch(arguments []interface{}) bool {
value, valueOk := r.parseString(arguments[0])
pattern, patternOk := r.parseString(arguments[1])
if !valueOk || !patternOk {
return false
}
modifiers, ok := r.getStringFlag(arguments)
if !ok {
return false
}
regexPattern := pattern
if strings.Contains(modifiers, "x") {
regexPattern = stripRegexIgnoredWhitespace(regexPattern)
}
var flags strings.Builder
if strings.Contains(modifiers, "i") {
flags.WriteByte('i')
}
if strings.Contains(modifiers, "m") {
flags.WriteByte('m')
}
if strings.Contains(modifiers, "s") {
flags.WriteByte('s')
}
if flags.Len() > 0 {
regexPattern = "(?" + flags.String() + ")" + regexPattern
}
matched, err := regexp.MatchString(regexPattern, value)
if err != nil {
logger.Errorf("strings_RegexMatch - invalid pattern %q: %v", pattern, err)
return false
}
return matched
}
func (r rowContext) strings_Concat(arguments []interface{}) string {
result := ""
@@ -318,6 +359,20 @@ func (r rowContext) getBoolFlag(arguments []interface{}) bool {
return ignoreCase
}
func (r rowContext) getStringFlag(arguments []interface{}) (string, bool) {
if len(arguments) <= 2 || arguments[2] == nil {
return "", true
}
flagItem := arguments[2].(parsers.SelectItem)
if value, ok := r.resolveSelectItem(flagItem).(string); ok {
return value, true
}
logger.ErrorLn("getStringFlag - got parameters of wrong type")
return "", false
}
func (r rowContext) parseString(argument interface{}) (value string, ok bool) {
exItem := argument.(parsers.SelectItem)
ex := r.resolveSelectItem(exItem)
@@ -329,6 +384,41 @@ func (r rowContext) parseString(argument interface{}) (value string, ok bool) {
return "", false
}
func stripRegexIgnoredWhitespace(pattern string) string {
var result strings.Builder
inCharClass := false
escaped := false
for _, r := range pattern {
if escaped {
result.WriteRune(r)
escaped = false
continue
}
if r == '\\' {
result.WriteRune(r)
escaped = true
continue
}
switch r {
case '[':
inCharClass = true
case ']':
inCharClass = false
}
if !inCharClass && (r == ' ' || r == '\t' || r == '\n' || r == '\r' || r == '\f') {
continue
}
result.WriteRune(r)
}
return result.String()
}
func convertToString(value interface{}) string {
switch v := value.(type) {
case string:
@@ -231,6 +231,42 @@ func Test_Execute_StringFunctions(t *testing.T) {
)
})
t.Run("Should execute function REGEXMATCH()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Path: []string{"c", "id"},
Type: parsers.SelectItemTypeField,
},
{
Alias: "regexMatch",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallRegexMatch,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "str"},
Type: parsers.SelectItemTypeField,
},
testutils.SelectItem_Constant_String("COOL WORLD"),
testutils.SelectItem_Constant_String("i"),
},
},
},
},
Table: parsers.Table{SelectItem: testutils.SelectItem_Path("c")},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"id": "123", "regexMatch": false},
map[string]interface{}{"id": "456", "regexMatch": false},
map[string]interface{}{"id": "789", "regexMatch": true},
},
)
})
t.Run("Should execute function INDEX_OF()", func(t *testing.T) {
testQueryExecute(
t,