From 50b672a3670da42b24d84e2855114ac68a7395b7 Mon Sep 17 00:00:00 2001 From: Pijus Kamandulis Date: Sun, 11 Feb 2024 22:15:08 +0200 Subject: [PATCH] Added initial query parser implementation --- parsers/nosql/nosql.go | 2687 +++++++++++++++++++++++++++++++++++ parsers/nosql/nosql.peg | 217 +++ parsers/nosql/nosql_test.go | 147 ++ 3 files changed, 3051 insertions(+) create mode 100644 parsers/nosql/nosql.go create mode 100644 parsers/nosql/nosql.peg create mode 100644 parsers/nosql/nosql_test.go diff --git a/parsers/nosql/nosql.go b/parsers/nosql/nosql.go new file mode 100644 index 0000000..249f1d0 --- /dev/null +++ b/parsers/nosql/nosql.go @@ -0,0 +1,2687 @@ +// Code generated by pigeon; DO NOT EDIT. + +package nosql + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +type LogicalExpressionType int + +const ( + LogicalExpressionTypeOr LogicalExpressionType = iota + LogicalExpressionTypeAnd +) + +type ConstantType int + +const ( + ConstantTypeString ConstantType = iota + ConstantTypeInteger + ConstantTypeFloat + ConstantTypeBoolean +) + +type SelectStmt struct { + Columns []FieldPath + Table Table + Filters interface{} +} + +type Table struct { + Value string +} + +type FieldPath struct { + Alias string + Path []string +} + +type LogicalExpression struct { + Expressions []interface{} + Operation LogicalExpressionType +} + +type ComparisonExpression struct { + Left interface{} + Right interface{} + Operation string +} + +type Constant struct { + Type ConstantType + Value interface{} +} + +func makeSelectStmt(columns, table, whereClause interface{}) (SelectStmt, error) { + selectStmt := SelectStmt{ + Columns: columns.([]FieldPath), + Table: table.(Table), + } + + if filters, ok := whereClause.(ComparisonExpression); ok { + selectStmt.Filters = filters + } else if filters, ok := whereClause.(LogicalExpression); ok { + selectStmt.Filters = filters + } + + return selectStmt, nil +} + +func makeFieldPath(name interface{}, path interface{}, alias interface{}) (FieldPath, error) { + ps := path.([]interface{}) + + paths := make([]string, 1) + paths[0] = name.(string) + for _, p := range ps { + pa := p.([]interface{}) + px := pa[1:] + for _, pi := range px { + paths = append(paths, pi.(string)) + } + } + + fieldPath := FieldPath{Path: paths} + if aliasValue, ok := alias.(string); ok { + fieldPath.Alias = aliasValue + } + + return fieldPath, nil +} + +func makeColumnList(column interface{}, other_columns interface{}) ([]FieldPath, error) { + collsAsArray := other_columns.([]interface{}) + columnList := make([]FieldPath, len(collsAsArray)+1) + columnList[0] = column.(FieldPath) + + for i, v := range collsAsArray { + if col, ok := v.(FieldPath); ok { + columnList[i+1] = col + } + } + + return columnList, nil +} + +func joinStrings(array []interface{}) string { + var stringsArray []string + for _, elem := range array { + str, ok := elem.(string) + if !ok { + continue + } + stringsArray = append(stringsArray, str) + } + + return strings.Join(stringsArray, "") +} + +func combineExpressions(ex1 interface{}, exs interface{}, operation LogicalExpressionType) (interface{}, error) { + if exs == nil || len(exs.([]interface{})) < 1 { + return ex1, nil + } + + return LogicalExpression{ + Expressions: append([]interface{}{ex1}, exs.([]interface{})...), + Operation: operation, + }, nil +} + +var g = &grammar{ + rules: []*rule{ + { + name: "Input", + pos: position{line: 127, col: 1, offset: 2636}, + expr: &actionExpr{ + pos: position{line: 127, col: 10, offset: 2645}, + run: (*parser).callonInput1, + expr: &labeledExpr{ + pos: position{line: 127, col: 10, offset: 2645}, + label: "selectStmt", + expr: &ruleRefExpr{ + pos: position{line: 127, col: 21, offset: 2656}, + name: "SelectStmt", + }, + }, + }, + }, + { + name: "SelectStmt", + pos: position{line: 131, col: 1, offset: 2699}, + expr: &actionExpr{ + pos: position{line: 131, col: 15, offset: 2713}, + run: (*parser).callonSelectStmt1, + expr: &seqExpr{ + pos: position{line: 131, col: 15, offset: 2713}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 131, col: 15, offset: 2713}, + name: "Select", + }, + &ruleRefExpr{ + pos: position{line: 131, col: 22, offset: 2720}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 131, col: 25, offset: 2723}, + label: "columns", + expr: &ruleRefExpr{ + pos: position{line: 131, col: 33, offset: 2731}, + name: "ColumnList", + }, + }, + &ruleRefExpr{ + pos: position{line: 131, col: 44, offset: 2742}, + name: "ws", + }, + &ruleRefExpr{ + pos: position{line: 132, col: 5, offset: 2749}, + name: "From", + }, + &ruleRefExpr{ + pos: position{line: 132, col: 10, offset: 2754}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 132, col: 13, offset: 2757}, + label: "table", + expr: &ruleRefExpr{ + pos: position{line: 132, col: 19, offset: 2763}, + name: "TableName", + }, + }, + &ruleRefExpr{ + pos: position{line: 132, col: 29, offset: 2773}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 133, col: 5, offset: 2780}, + label: "whereClause", + expr: &zeroOrOneExpr{ + pos: position{line: 133, col: 17, offset: 2792}, + expr: &actionExpr{ + pos: position{line: 133, col: 18, offset: 2793}, + run: (*parser).callonSelectStmt15, + expr: &seqExpr{ + pos: position{line: 133, col: 18, offset: 2793}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 133, col: 18, offset: 2793}, + name: "ws", + }, + &ruleRefExpr{ + pos: position{line: 133, col: 21, offset: 2796}, + name: "Where", + }, + &ruleRefExpr{ + pos: position{line: 133, col: 27, offset: 2802}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 133, col: 30, offset: 2805}, + label: "condition", + expr: &ruleRefExpr{ + pos: position{line: 133, col: 40, offset: 2815}, + name: "Condition", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "ColumnList", + pos: position{line: 137, col: 1, offset: 2913}, + expr: &actionExpr{ + pos: position{line: 137, col: 15, offset: 2927}, + run: (*parser).callonColumnList1, + expr: &seqExpr{ + pos: position{line: 137, col: 15, offset: 2927}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 137, col: 15, offset: 2927}, + label: "column", + expr: &ruleRefExpr{ + pos: position{line: 137, col: 22, offset: 2934}, + name: "FieldPath", + }, + }, + &labeledExpr{ + pos: position{line: 137, col: 32, offset: 2944}, + label: "other_columns", + expr: &zeroOrMoreExpr{ + pos: position{line: 137, col: 46, offset: 2958}, + expr: &actionExpr{ + pos: position{line: 137, col: 47, offset: 2959}, + run: (*parser).callonColumnList7, + expr: &seqExpr{ + pos: position{line: 137, col: 47, offset: 2959}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 137, col: 47, offset: 2959}, + name: "ws", + }, + &litMatcher{ + pos: position{line: 137, col: 50, offset: 2962}, + val: ",", + ignoreCase: false, + want: "\",\"", + }, + &ruleRefExpr{ + pos: position{line: 137, col: 54, offset: 2966}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 137, col: 57, offset: 2969}, + label: "coll", + expr: &ruleRefExpr{ + pos: position{line: 137, col: 62, offset: 2974}, + name: "FieldPath", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "TableName", + pos: position{line: 141, col: 1, offset: 3060}, + expr: &actionExpr{ + pos: position{line: 141, col: 14, offset: 3073}, + run: (*parser).callonTableName1, + expr: &labeledExpr{ + pos: position{line: 141, col: 14, offset: 3073}, + label: "key", + expr: &ruleRefExpr{ + pos: position{line: 141, col: 18, offset: 3077}, + name: "Identifier", + }, + }, + }, + }, + { + name: "FieldPath", + pos: position{line: 145, col: 1, offset: 3136}, + expr: &actionExpr{ + pos: position{line: 145, col: 14, offset: 3149}, + run: (*parser).callonFieldPath1, + expr: &seqExpr{ + pos: position{line: 145, col: 14, offset: 3149}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 145, col: 14, offset: 3149}, + label: "name", + expr: &ruleRefExpr{ + pos: position{line: 145, col: 19, offset: 3154}, + name: "Identifier", + }, + }, + &labeledExpr{ + pos: position{line: 145, col: 30, offset: 3165}, + label: "path", + expr: &zeroOrMoreExpr{ + pos: position{line: 145, col: 35, offset: 3170}, + expr: &seqExpr{ + pos: position{line: 145, col: 36, offset: 3171}, + exprs: []any{ + &litMatcher{ + pos: position{line: 145, col: 36, offset: 3171}, + val: ".", + ignoreCase: false, + want: "\".\"", + }, + &ruleRefExpr{ + pos: position{line: 145, col: 40, offset: 3175}, + name: "Identifier", + }, + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 146, col: 5, offset: 3192}, + label: "asClause", + expr: &zeroOrOneExpr{ + pos: position{line: 146, col: 14, offset: 3201}, + expr: &actionExpr{ + pos: position{line: 146, col: 15, offset: 3202}, + run: (*parser).callonFieldPath12, + expr: &seqExpr{ + pos: position{line: 146, col: 15, offset: 3202}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 146, col: 15, offset: 3202}, + name: "ws", + }, + &litMatcher{ + pos: position{line: 146, col: 18, offset: 3205}, + val: "AS", + ignoreCase: false, + want: "\"AS\"", + }, + &ruleRefExpr{ + pos: position{line: 146, col: 23, offset: 3210}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 146, col: 26, offset: 3213}, + label: "alias", + expr: &ruleRefExpr{ + pos: position{line: 146, col: 32, offset: 3219}, + name: "Identifier", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "Identifier", + pos: position{line: 150, col: 1, offset: 3306}, + expr: &actionExpr{ + pos: position{line: 150, col: 15, offset: 3320}, + run: (*parser).callonIdentifier1, + expr: &seqExpr{ + pos: position{line: 150, col: 15, offset: 3320}, + exprs: []any{ + &charClassMatcher{ + pos: position{line: 150, col: 15, offset: 3320}, + val: "[a-zA-Z_]", + chars: []rune{'_'}, + ranges: []rune{'a', 'z', 'A', 'Z'}, + ignoreCase: false, + inverted: false, + }, + &zeroOrMoreExpr{ + pos: position{line: 150, col: 24, offset: 3329}, + expr: &charClassMatcher{ + pos: position{line: 150, col: 24, offset: 3329}, + val: "[a-zA-Z0-9_]", + chars: []rune{'_'}, + ranges: []rune{'a', 'z', 'A', 'Z', '0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, + }, + }, + { + name: "Condition", + pos: position{line: 154, col: 1, offset: 3379}, + expr: &actionExpr{ + pos: position{line: 154, col: 14, offset: 3392}, + run: (*parser).callonCondition1, + expr: &labeledExpr{ + pos: position{line: 154, col: 14, offset: 3392}, + label: "expression", + expr: &ruleRefExpr{ + pos: position{line: 154, col: 25, offset: 3403}, + name: "OrExpression", + }, + }, + }, + }, + { + name: "OrExpression", + pos: position{line: 158, col: 1, offset: 3448}, + expr: &actionExpr{ + pos: position{line: 158, col: 17, offset: 3464}, + run: (*parser).callonOrExpression1, + expr: &seqExpr{ + pos: position{line: 158, col: 17, offset: 3464}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 158, col: 17, offset: 3464}, + label: "ex1", + expr: &ruleRefExpr{ + pos: position{line: 158, col: 21, offset: 3468}, + name: "AndExpression", + }, + }, + &labeledExpr{ + pos: position{line: 158, col: 35, offset: 3482}, + label: "ex2", + expr: &zeroOrMoreExpr{ + pos: position{line: 158, col: 39, offset: 3486}, + expr: &actionExpr{ + pos: position{line: 158, col: 40, offset: 3487}, + run: (*parser).callonOrExpression7, + expr: &seqExpr{ + pos: position{line: 158, col: 40, offset: 3487}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 158, col: 40, offset: 3487}, + name: "ws", + }, + &litMatcher{ + pos: position{line: 158, col: 43, offset: 3490}, + val: "OR", + ignoreCase: false, + want: "\"OR\"", + }, + &ruleRefExpr{ + pos: position{line: 158, col: 48, offset: 3495}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 158, col: 51, offset: 3498}, + label: "ex", + expr: &ruleRefExpr{ + pos: position{line: 158, col: 54, offset: 3501}, + name: "AndExpression", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "AndExpression", + pos: position{line: 162, col: 1, offset: 3606}, + expr: &actionExpr{ + pos: position{line: 162, col: 18, offset: 3623}, + run: (*parser).callonAndExpression1, + expr: &seqExpr{ + pos: position{line: 162, col: 18, offset: 3623}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 162, col: 18, offset: 3623}, + label: "ex1", + expr: &ruleRefExpr{ + pos: position{line: 162, col: 22, offset: 3627}, + name: "ComparisonExpression", + }, + }, + &labeledExpr{ + pos: position{line: 162, col: 43, offset: 3648}, + label: "ex2", + expr: &zeroOrMoreExpr{ + pos: position{line: 162, col: 47, offset: 3652}, + expr: &actionExpr{ + pos: position{line: 162, col: 48, offset: 3653}, + run: (*parser).callonAndExpression7, + expr: &seqExpr{ + pos: position{line: 162, col: 48, offset: 3653}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 162, col: 48, offset: 3653}, + name: "ws", + }, + &litMatcher{ + pos: position{line: 162, col: 51, offset: 3656}, + val: "AND", + ignoreCase: false, + want: "\"AND\"", + }, + &ruleRefExpr{ + pos: position{line: 162, col: 57, offset: 3662}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 162, col: 60, offset: 3665}, + label: "ex", + expr: &ruleRefExpr{ + pos: position{line: 162, col: 63, offset: 3668}, + name: "ComparisonExpression", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "ComparisonExpression", + pos: position{line: 166, col: 1, offset: 3781}, + expr: &actionExpr{ + pos: position{line: 166, col: 25, offset: 3805}, + run: (*parser).callonComparisonExpression1, + expr: &seqExpr{ + pos: position{line: 166, col: 25, offset: 3805}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 166, col: 25, offset: 3805}, + label: "left", + expr: &choiceExpr{ + pos: position{line: 166, col: 31, offset: 3811}, + alternatives: []any{ + &ruleRefExpr{ + pos: position{line: 166, col: 31, offset: 3811}, + name: "Literal", + }, + &ruleRefExpr{ + pos: position{line: 166, col: 41, offset: 3821}, + name: "FieldPath", + }, + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 166, col: 52, offset: 3832}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 166, col: 55, offset: 3835}, + label: "op", + expr: &ruleRefExpr{ + pos: position{line: 166, col: 58, offset: 3838}, + name: "ComparisonOperator", + }, + }, + &ruleRefExpr{ + pos: position{line: 166, col: 77, offset: 3857}, + name: "ws", + }, + &labeledExpr{ + pos: position{line: 166, col: 80, offset: 3860}, + label: "right", + expr: &choiceExpr{ + pos: position{line: 166, col: 87, offset: 3867}, + alternatives: []any{ + &ruleRefExpr{ + pos: position{line: 166, col: 87, offset: 3867}, + name: "Literal", + }, + &ruleRefExpr{ + pos: position{line: 166, col: 97, offset: 3877}, + name: "FieldPath", + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "Select", + pos: position{line: 170, col: 1, offset: 3984}, + expr: &choiceExpr{ + pos: position{line: 170, col: 12, offset: 3995}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 170, col: 12, offset: 3995}, + val: "select", + ignoreCase: false, + want: "\"select\"", + }, + &litMatcher{ + pos: position{line: 170, col: 23, offset: 4006}, + val: "SELECT", + ignoreCase: false, + want: "\"SELECT\"", + }, + }, + }, + }, + { + name: "From", + pos: position{line: 172, col: 1, offset: 4017}, + expr: &choiceExpr{ + pos: position{line: 172, col: 10, offset: 4026}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 172, col: 10, offset: 4026}, + val: "from", + ignoreCase: false, + want: "\"from\"", + }, + &litMatcher{ + pos: position{line: 172, col: 19, offset: 4035}, + val: "FROM", + ignoreCase: false, + want: "\"FROM\"", + }, + }, + }, + }, + { + name: "Where", + pos: position{line: 174, col: 1, offset: 4044}, + expr: &choiceExpr{ + pos: position{line: 174, col: 11, offset: 4054}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 174, col: 11, offset: 4054}, + val: "where", + ignoreCase: false, + want: "\"where\"", + }, + &litMatcher{ + pos: position{line: 174, col: 21, offset: 4064}, + val: "WHERE", + ignoreCase: false, + want: "\"WHERE\"", + }, + }, + }, + }, + { + name: "ComparisonOperator", + pos: position{line: 176, col: 1, offset: 4074}, + expr: &choiceExpr{ + pos: position{line: 176, col: 23, offset: 4096}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 176, col: 23, offset: 4096}, + val: "=", + ignoreCase: false, + want: "\"=\"", + }, + &litMatcher{ + pos: position{line: 176, col: 29, offset: 4102}, + val: "!=", + ignoreCase: false, + want: "\"!=\"", + }, + &litMatcher{ + pos: position{line: 176, col: 36, offset: 4109}, + val: "<", + ignoreCase: false, + want: "\"<\"", + }, + &litMatcher{ + pos: position{line: 176, col: 42, offset: 4115}, + val: "<=", + ignoreCase: false, + want: "\"<=\"", + }, + &litMatcher{ + pos: position{line: 176, col: 49, offset: 4122}, + val: ">", + ignoreCase: false, + want: "\">\"", + }, + &actionExpr{ + pos: position{line: 176, col: 55, offset: 4128}, + run: (*parser).callonComparisonOperator7, + expr: &litMatcher{ + pos: position{line: 176, col: 55, offset: 4128}, + val: ">=", + ignoreCase: false, + want: "\">=\"", + }, + }, + }, + }, + }, + { + name: "Literal", + pos: position{line: 180, col: 1, offset: 4169}, + expr: &choiceExpr{ + pos: position{line: 180, col: 12, offset: 4180}, + alternatives: []any{ + &ruleRefExpr{ + pos: position{line: 180, col: 12, offset: 4180}, + name: "FloatLiteral", + }, + &ruleRefExpr{ + pos: position{line: 180, col: 27, offset: 4195}, + name: "IntegerLiteral", + }, + &ruleRefExpr{ + pos: position{line: 180, col: 44, offset: 4212}, + name: "StringLiteral", + }, + &ruleRefExpr{ + pos: position{line: 180, col: 60, offset: 4228}, + name: "BooleanLiteral", + }, + }, + }, + }, + { + name: "IntegerLiteral", + pos: position{line: 182, col: 1, offset: 4244}, + expr: &actionExpr{ + pos: position{line: 182, col: 19, offset: 4262}, + run: (*parser).callonIntegerLiteral1, + expr: &oneOrMoreExpr{ + pos: position{line: 182, col: 19, offset: 4262}, + expr: &charClassMatcher{ + pos: position{line: 182, col: 19, offset: 4262}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, + { + name: "StringLiteral", + pos: position{line: 186, col: 1, offset: 4390}, + expr: &actionExpr{ + pos: position{line: 186, col: 18, offset: 4407}, + run: (*parser).callonStringLiteral1, + expr: &seqExpr{ + pos: position{line: 186, col: 18, offset: 4407}, + exprs: []any{ + &litMatcher{ + pos: position{line: 186, col: 18, offset: 4407}, + val: "\"", + ignoreCase: false, + want: "\"\\\"\"", + }, + &labeledExpr{ + pos: position{line: 186, col: 23, offset: 4412}, + label: "chars", + expr: &zeroOrMoreExpr{ + pos: position{line: 186, col: 29, offset: 4418}, + expr: &ruleRefExpr{ + pos: position{line: 186, col: 29, offset: 4418}, + name: "StringCharacter", + }, + }, + }, + &litMatcher{ + pos: position{line: 186, col: 46, offset: 4435}, + val: "\"", + ignoreCase: false, + want: "\"\\\"\"", + }, + }, + }, + }, + }, + { + name: "FloatLiteral", + pos: position{line: 189, col: 1, offset: 4537}, + expr: &actionExpr{ + pos: position{line: 189, col: 17, offset: 4553}, + run: (*parser).callonFloatLiteral1, + expr: &seqExpr{ + pos: position{line: 189, col: 17, offset: 4553}, + exprs: []any{ + &oneOrMoreExpr{ + pos: position{line: 189, col: 17, offset: 4553}, + expr: &charClassMatcher{ + pos: position{line: 189, col: 17, offset: 4553}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &litMatcher{ + pos: position{line: 189, col: 23, offset: 4559}, + val: ".", + ignoreCase: false, + want: "\".\"", + }, + &oneOrMoreExpr{ + pos: position{line: 189, col: 26, offset: 4562}, + expr: &charClassMatcher{ + pos: position{line: 189, col: 26, offset: 4562}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, + }, + }, + { + name: "BooleanLiteral", + pos: position{line: 193, col: 1, offset: 4702}, + expr: &actionExpr{ + pos: position{line: 193, col: 19, offset: 4720}, + run: (*parser).callonBooleanLiteral1, + expr: &choiceExpr{ + pos: position{line: 193, col: 20, offset: 4721}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 193, col: 20, offset: 4721}, + val: "true", + ignoreCase: false, + want: "\"true\"", + }, + &litMatcher{ + pos: position{line: 193, col: 29, offset: 4730}, + val: "false", + ignoreCase: false, + want: "\"false\"", + }, + }, + }, + }, + }, + { + name: "StringCharacter", + pos: position{line: 198, col: 1, offset: 4868}, + expr: &choiceExpr{ + pos: position{line: 198, col: 20, offset: 4887}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 198, col: 20, offset: 4887}, + run: (*parser).callonStringCharacter2, + expr: &seqExpr{ + pos: position{line: 198, col: 20, offset: 4887}, + exprs: []any{ + ¬Expr{ + pos: position{line: 198, col: 20, offset: 4887}, + expr: &choiceExpr{ + pos: position{line: 198, col: 22, offset: 4889}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 198, col: 22, offset: 4889}, + val: "\"", + ignoreCase: false, + want: "\"\\\"\"", + }, + &litMatcher{ + pos: position{line: 198, col: 28, offset: 4895}, + val: "\\", + ignoreCase: false, + want: "\"\\\\\"", + }, + }, + }, + }, + &anyMatcher{ + line: 198, col: 34, offset: 4901, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 199, col: 5, offset: 4938}, + run: (*parser).callonStringCharacter9, + expr: &seqExpr{ + pos: position{line: 199, col: 5, offset: 4938}, + exprs: []any{ + &litMatcher{ + pos: position{line: 199, col: 5, offset: 4938}, + val: "\\", + ignoreCase: false, + want: "\"\\\\\"", + }, + &labeledExpr{ + pos: position{line: 199, col: 10, offset: 4943}, + label: "seq", + expr: &ruleRefExpr{ + pos: position{line: 199, col: 14, offset: 4947}, + name: "EscapeSequenceCharacter", + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "EscapeSequenceCharacter", + pos: position{line: 201, col: 1, offset: 4992}, + expr: &labeledExpr{ + pos: position{line: 201, col: 28, offset: 5019}, + label: "char", + expr: &ruleRefExpr{ + pos: position{line: 201, col: 33, offset: 5024}, + name: "EscapeCharacter", + }, + }, + }, + { + name: "EscapeCharacter", + pos: position{line: 203, col: 1, offset: 5041}, + expr: &choiceExpr{ + pos: position{line: 203, col: 20, offset: 5060}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 203, col: 20, offset: 5060}, + val: "'", + ignoreCase: false, + want: "\"'\"", + }, + &litMatcher{ + pos: position{line: 204, col: 5, offset: 5068}, + val: "\"", + ignoreCase: false, + want: "\"\\\"\"", + }, + &litMatcher{ + pos: position{line: 205, col: 5, offset: 5076}, + val: "\\", + ignoreCase: false, + want: "\"\\\\\"", + }, + &actionExpr{ + pos: position{line: 206, col: 5, offset: 5085}, + run: (*parser).callonEscapeCharacter5, + expr: &litMatcher{ + pos: position{line: 206, col: 5, offset: 5085}, + val: "b", + ignoreCase: false, + want: "\"b\"", + }, + }, + &actionExpr{ + pos: position{line: 207, col: 5, offset: 5114}, + run: (*parser).callonEscapeCharacter7, + expr: &litMatcher{ + pos: position{line: 207, col: 5, offset: 5114}, + val: "f", + ignoreCase: false, + want: "\"f\"", + }, + }, + &actionExpr{ + pos: position{line: 208, col: 5, offset: 5143}, + run: (*parser).callonEscapeCharacter9, + expr: &litMatcher{ + pos: position{line: 208, col: 5, offset: 5143}, + val: "n", + ignoreCase: false, + want: "\"n\"", + }, + }, + &actionExpr{ + pos: position{line: 209, col: 5, offset: 5172}, + run: (*parser).callonEscapeCharacter11, + expr: &litMatcher{ + pos: position{line: 209, col: 5, offset: 5172}, + val: "r", + ignoreCase: false, + want: "\"r\"", + }, + }, + &actionExpr{ + pos: position{line: 210, col: 5, offset: 5201}, + run: (*parser).callonEscapeCharacter13, + expr: &litMatcher{ + pos: position{line: 210, col: 5, offset: 5201}, + val: "t", + ignoreCase: false, + want: "\"t\"", + }, + }, + }, + }, + }, + { + name: "non_escape_character", + pos: position{line: 212, col: 1, offset: 5227}, + expr: &actionExpr{ + pos: position{line: 212, col: 25, offset: 5251}, + run: (*parser).callonnon_escape_character1, + expr: &seqExpr{ + pos: position{line: 212, col: 25, offset: 5251}, + exprs: []any{ + ¬Expr{ + pos: position{line: 212, col: 25, offset: 5251}, + expr: &ruleRefExpr{ + pos: position{line: 212, col: 27, offset: 5253}, + name: "escape_character", + }, + }, + &labeledExpr{ + pos: position{line: 212, col: 45, offset: 5271}, + label: "char", + expr: &anyMatcher{ + line: 212, col: 50, offset: 5276, + }, + }, + }, + }, + }, + }, + { + name: "ws", + pos: position{line: 215, col: 1, offset: 5315}, + expr: &zeroOrMoreExpr{ + pos: position{line: 215, col: 7, offset: 5321}, + expr: &charClassMatcher{ + pos: position{line: 215, col: 7, offset: 5321}, + val: "[ \\t\\n\\r]", + chars: []rune{' ', '\t', '\n', '\r'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + { + name: "EOF", + pos: position{line: 217, col: 1, offset: 5333}, + expr: ¬Expr{ + pos: position{line: 217, col: 8, offset: 5340}, + expr: &anyMatcher{ + line: 217, col: 9, offset: 5341, + }, + }, + }, + }, +} + +func (c *current) onInput1(selectStmt any) (any, error) { + return selectStmt, nil +} + +func (p *parser) callonInput1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onInput1(stack["selectStmt"]) +} + +func (c *current) onSelectStmt15(condition any) (any, error) { + return condition, nil +} + +func (p *parser) callonSelectStmt15() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onSelectStmt15(stack["condition"]) +} + +func (c *current) onSelectStmt1(columns, table, whereClause any) (any, error) { + return makeSelectStmt(columns, table, whereClause) +} + +func (p *parser) callonSelectStmt1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onSelectStmt1(stack["columns"], stack["table"], stack["whereClause"]) +} + +func (c *current) onColumnList7(coll any) (any, error) { + return coll, nil +} + +func (p *parser) callonColumnList7() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onColumnList7(stack["coll"]) +} + +func (c *current) onColumnList1(column, other_columns any) (any, error) { + return makeColumnList(column, other_columns) +} + +func (p *parser) callonColumnList1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onColumnList1(stack["column"], stack["other_columns"]) +} + +func (c *current) onTableName1(key any) (any, error) { + return Table{Value: key.(string)}, nil +} + +func (p *parser) callonTableName1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onTableName1(stack["key"]) +} + +func (c *current) onFieldPath12(alias any) (any, error) { + return alias, nil +} + +func (p *parser) callonFieldPath12() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFieldPath12(stack["alias"]) +} + +func (c *current) onFieldPath1(name, path, asClause any) (any, error) { + return makeFieldPath(name, path, asClause) +} + +func (p *parser) callonFieldPath1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFieldPath1(stack["name"], stack["path"], stack["asClause"]) +} + +func (c *current) onIdentifier1() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonIdentifier1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onIdentifier1() +} + +func (c *current) onCondition1(expression any) (any, error) { + return expression, nil +} + +func (p *parser) callonCondition1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onCondition1(stack["expression"]) +} + +func (c *current) onOrExpression7(ex any) (any, error) { + return ex, nil +} + +func (p *parser) callonOrExpression7() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onOrExpression7(stack["ex"]) +} + +func (c *current) onOrExpression1(ex1, ex2 any) (any, error) { + return combineExpressions(ex1, ex2, LogicalExpressionTypeOr) +} + +func (p *parser) callonOrExpression1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onOrExpression1(stack["ex1"], stack["ex2"]) +} + +func (c *current) onAndExpression7(ex any) (any, error) { + return ex, nil +} + +func (p *parser) callonAndExpression7() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onAndExpression7(stack["ex"]) +} + +func (c *current) onAndExpression1(ex1, ex2 any) (any, error) { + return combineExpressions(ex1, ex2, LogicalExpressionTypeAnd) +} + +func (p *parser) callonAndExpression1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onAndExpression1(stack["ex1"], stack["ex2"]) +} + +func (c *current) onComparisonExpression1(left, op, right any) (any, error) { + return ComparisonExpression{Left: left, Right: right, Operation: string(op.([]uint8))}, nil +} + +func (p *parser) callonComparisonExpression1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onComparisonExpression1(stack["left"], stack["op"], stack["right"]) +} + +func (c *current) onComparisonOperator7() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonComparisonOperator7() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onComparisonOperator7() +} + +func (c *current) onIntegerLiteral1() (any, error) { + intValue, _ := strconv.Atoi(string(c.text)) + return Constant{Type: ConstantTypeInteger, Value: intValue}, nil +} + +func (p *parser) callonIntegerLiteral1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onIntegerLiteral1() +} + +func (c *current) onStringLiteral1(chars any) (any, error) { + return Constant{Type: ConstantTypeString, Value: joinStrings(chars.([]interface{}))}, nil +} + +func (p *parser) callonStringLiteral1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onStringLiteral1(stack["chars"]) +} + +func (c *current) onFloatLiteral1() (any, error) { + floatValue, _ := strconv.ParseFloat(string(c.text), 64) + return Constant{Type: ConstantTypeFloat, Value: floatValue}, nil +} + +func (p *parser) callonFloatLiteral1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onFloatLiteral1() +} + +func (c *current) onBooleanLiteral1() (any, error) { + boolValue, _ := strconv.ParseBool(string(c.text)) + return Constant{Type: ConstantTypeBoolean, Value: boolValue}, nil +} + +func (p *parser) callonBooleanLiteral1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onBooleanLiteral1() +} + +func (c *current) onStringCharacter2() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonStringCharacter2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onStringCharacter2() +} + +func (c *current) onStringCharacter9(seq any) (any, error) { + return seq, nil +} + +func (p *parser) callonStringCharacter9() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onStringCharacter9(stack["seq"]) +} + +func (c *current) onEscapeCharacter5() (any, error) { + return "\b", nil +} + +func (p *parser) callonEscapeCharacter5() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onEscapeCharacter5() +} + +func (c *current) onEscapeCharacter7() (any, error) { + return "\f", nil +} + +func (p *parser) callonEscapeCharacter7() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onEscapeCharacter7() +} + +func (c *current) onEscapeCharacter9() (any, error) { + return "\n", nil +} + +func (p *parser) callonEscapeCharacter9() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onEscapeCharacter9() +} + +func (c *current) onEscapeCharacter11() (any, error) { + return "\r", nil +} + +func (p *parser) callonEscapeCharacter11() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onEscapeCharacter11() +} + +func (c *current) onEscapeCharacter13() (any, error) { + return "\t", nil +} + +func (p *parser) callonEscapeCharacter13() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onEscapeCharacter13() +} + +func (c *current) onnon_escape_character1(char any) (any, error) { + return string(c.text), nil +} + +func (p *parser) callonnon_escape_character1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnon_escape_character1(stack["char"]) +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expressions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +type grammar struct { + pos position + rules []*rule +} + +type rule struct { + pos position + name string + displayName string + expr any +} + +type choiceExpr struct { + pos position + alternatives []any +} + +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +type seqExpr struct { + pos position + exprs []any +} + +type throwExpr struct { + pos position + label string +} + +type labeledExpr struct { + pos position + label string + expr any +} + +type expr struct { + pos position + expr any +} + +type ( + andExpr expr + notExpr expr + zeroOrOneExpr expr + zeroOrMoreExpr expr + oneOrMoreExpr expr +) + +type ruleRefExpr struct { + pos position + name string +} + +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +type resultTuple struct { + v any + b bool + end savepoint +} + +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + if p.memoize { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/parsers/nosql/nosql.peg b/parsers/nosql/nosql.peg new file mode 100644 index 0000000..d9b188d --- /dev/null +++ b/parsers/nosql/nosql.peg @@ -0,0 +1,217 @@ +{ +package nosql + +type LogicalExpressionType int + +const ( + LogicalExpressionTypeOr LogicalExpressionType = iota + LogicalExpressionTypeAnd +) + +type ConstantType int + +const ( + ConstantTypeString ConstantType = iota + ConstantTypeInteger + ConstantTypeFloat + ConstantTypeBoolean +) + +type SelectStmt struct{ + Columns []FieldPath + Table Table + Filters interface{} +} + +type Table struct{ + Value string +} + +type FieldPath struct { + Alias string + Path []string +} + +type LogicalExpression struct { + Expressions []interface{} + Operation LogicalExpressionType +} + +type ComparisonExpression struct { + Left interface{} + Right interface{} + Operation string +} + +type Constant struct { + Type ConstantType + Value interface{} +} + +func makeSelectStmt(columns, table, whereClause interface{}) (SelectStmt, error) { + selectStmt := SelectStmt{ + Columns: columns.([]FieldPath), + Table: table.(Table), + } + + if filters, ok := whereClause.(ComparisonExpression); ok { + selectStmt.Filters = filters + } else if filters, ok := whereClause.(LogicalExpression); ok { + selectStmt.Filters = filters + } + + return selectStmt, nil +} + +func makeFieldPath(name interface{}, path interface{}, alias interface{}) (FieldPath, error) { + ps := path.([]interface{}) + + paths := make([]string, 1) + paths[0] = name.(string) + for _, p := range ps { + pa := p.([]interface{}) + px := pa[1:] + for _, pi := range px { + paths = append(paths, pi.(string)) + } + } + + fieldPath := FieldPath{Path: paths} + if aliasValue, ok := alias.(string); ok { + fieldPath.Alias = aliasValue + } + + return fieldPath, nil +} + +func makeColumnList(column interface{}, other_columns interface{}) ([]FieldPath, error) { + collsAsArray := other_columns.([]interface{}) + columnList := make([]FieldPath, len(collsAsArray) + 1) + columnList[0] = column.(FieldPath) + + for i, v := range collsAsArray { + if col, ok := v.(FieldPath); ok { + columnList[i+1] = col + } + } + + return columnList, nil +} + +func joinStrings(array []interface{}) string { + var stringsArray []string + for _, elem := range array { + str, ok := elem.(string) + if !ok { + continue + } + stringsArray = append(stringsArray, str) + } + + return strings.Join(stringsArray, "") +} + +func combineExpressions(ex1 interface{}, exs interface{}, operation LogicalExpressionType) (interface{}, error) { + if exs == nil || len(exs.([]interface{})) < 1 { + return ex1, nil + } + + return LogicalExpression{ + Expressions: append([]interface{}{ex1}, exs.([]interface{})...), + Operation: operation, + }, nil +} + +} + +Input <- selectStmt:SelectStmt { + return selectStmt, nil +} + +SelectStmt <- Select ws columns:ColumnList ws + From ws table:TableName ws + whereClause:(ws Where ws condition:Condition { return condition, nil })? { + return makeSelectStmt(columns, table, whereClause) +} + +ColumnList <- column:FieldPath other_columns:(ws "," ws coll:FieldPath {return coll, nil })* { + return makeColumnList(column, other_columns) +} + +TableName <- key:Identifier { + return Table{Value: key.(string)}, nil +} + +FieldPath <- name:Identifier path:("." Identifier)* + asClause:(ws "AS" ws alias:Identifier { return alias, nil })? { + return makeFieldPath(name, path, asClause) +} + +Identifier <- [a-zA-Z_][a-zA-Z0-9_]* { + return string(c.text), nil +} + +Condition <- expression:OrExpression { + return expression, nil +} + +OrExpression <- ex1:AndExpression ex2:(ws "OR" ws ex:AndExpression { return ex, nil })* { + return combineExpressions(ex1, ex2, LogicalExpressionTypeOr) +} + +AndExpression <- ex1:ComparisonExpression ex2:(ws "AND" ws ex:ComparisonExpression { return ex, nil })* { + return combineExpressions(ex1, ex2, LogicalExpressionTypeAnd) +} + +ComparisonExpression <- left:(Literal / FieldPath) ws op:ComparisonOperator ws right:(Literal / FieldPath) { + return ComparisonExpression{Left:left,Right:right,Operation:string(op.([]uint8))}, nil +} + +Select <- ("select" / "SELECT") + +From <- ("from" / "FROM") + +Where <- ("where" / "WHERE") + +ComparisonOperator <- "=" / "!=" / "<" / "<=" / ">" / ">=" { + return string(c.text), nil +} + +Literal <- FloatLiteral / IntegerLiteral / StringLiteral / BooleanLiteral + +IntegerLiteral <- [0-9]+ { + intValue, _ := strconv.Atoi(string(c.text)) + return Constant{Type: ConstantTypeInteger, Value: intValue}, nil +} +StringLiteral <- "\"" chars:StringCharacter* "\"" { + return Constant{Type: ConstantTypeString,Value: joinStrings(chars.([]interface{}))}, nil +} +FloatLiteral <- [0-9]+"."[0-9]+ { + floatValue, _ := strconv.ParseFloat(string(c.text), 64) + return Constant{Type: ConstantTypeFloat, Value: floatValue}, nil +} +BooleanLiteral <- ("true" / "false") { + boolValue, _ := strconv.ParseBool(string(c.text)) + return Constant{Type: ConstantTypeBoolean, Value: boolValue}, nil +} + +StringCharacter <- !('"' / "\\") . { return string(c.text), nil } + / "\\" seq:EscapeSequenceCharacter { return seq, nil } + +EscapeSequenceCharacter <- char:EscapeCharacter + +EscapeCharacter <- "'" + / '"' + / "\\" + / "b" { return "\b", nil } + / "f" { return "\f", nil } + / "n" { return "\n", nil } + / "r" { return "\r", nil } + / "t" { return "\t", nil } + +non_escape_character <- !(escape_character) char:. + { return string(c.text), nil } + +ws <- [ \t\n\r]* + +EOF <- !. diff --git a/parsers/nosql/nosql_test.go b/parsers/nosql/nosql_test.go new file mode 100644 index 0000000..d288df9 --- /dev/null +++ b/parsers/nosql/nosql_test.go @@ -0,0 +1,147 @@ +package nosql_test + +import ( + "log" + "reflect" + "testing" + + "github.com/pikami/cosmium/parsers/nosql" +) + +// For Parser Debugging +// func Test_ParseTest(t *testing.T) { +// // select c.id, c._self, c._rid, c._ts, [c[\"pk\"]] as _partitionKeyValue from c +// res, err := nosql.Parse("", []byte("select c.id, c._self AS self, c._rid, c._ts FROM c where c.id=\"12345\" AND c.pk=123")) +// if err != nil { +// log.Fatal(err) +// } + +// result, err := json.MarshalIndent(res, "", " ") +// if err != nil { +// fmt.Println(err) +// return +// } + +// fmt.Printf("output:\n%v\n", string(result)) +// } + +func testQueryParse(t *testing.T, query string, expectedQuery nosql.SelectStmt) { + parsedQuery, err := nosql.Parse("", []byte(query)) + if err != nil { + log.Fatal(err) + } + + if !reflect.DeepEqual(parsedQuery, expectedQuery) { + t.Errorf("parsed query does not match expected structure.\nExpected: %+v\nGot: %+v", expectedQuery, parsedQuery) + } +} + +func Test_Parse(t *testing.T) { + t.Run("Shoul parse simple SELECT", func(t *testing.T) { + testQueryParse( + t, + `SELECT c.id, c.pk FROM c`, + nosql.SelectStmt{ + Columns: []nosql.FieldPath{ + {Path: []string{"c", "id"}}, + {Path: []string{"c", "pk"}}, + }, + Table: nosql.Table{Value: "c"}, + }, + ) + }) + + t.Run("Shoul parse SELECT with single WHERE condition", func(t *testing.T) { + testQueryParse( + t, + `select c.id + FROM c + WHERE c.pk=true`, + nosql.SelectStmt{ + Columns: []nosql.FieldPath{ + {Path: []string{"c", "id"}}, + }, + Table: nosql.Table{Value: "c"}, + Filters: nosql.ComparisonExpression{ + Operation: "=", + Left: nosql.FieldPath{Path: []string{"c", "pk"}}, + Right: nosql.Constant{Type: nosql.ConstantTypeBoolean, Value: true}, + }, + }, + ) + }) + + t.Run("Should parse SELECT with multiple WHERE conditions", func(t *testing.T) { + testQueryParse( + t, + `select c.id, c._self AS self, c._rid, c._ts + FROM c + WHERE c.id="12345" OR c.pk=123`, + nosql.SelectStmt{ + Columns: []nosql.FieldPath{ + {Path: []string{"c", "id"}}, + {Path: []string{"c", "_self"}, Alias: "self"}, + {Path: []string{"c", "_rid"}}, + {Path: []string{"c", "_ts"}}, + }, + Table: nosql.Table{Value: "c"}, + Filters: nosql.LogicalExpression{ + Operation: nosql.LogicalExpressionTypeOr, + Expressions: []interface{}{ + nosql.ComparisonExpression{ + Operation: "=", + Left: nosql.FieldPath{Path: []string{"c", "id"}}, + Right: nosql.Constant{Type: nosql.ConstantTypeString, Value: "12345"}, + }, + nosql.ComparisonExpression{ + Operation: "=", + Left: nosql.FieldPath{Path: []string{"c", "pk"}}, + Right: nosql.Constant{Type: nosql.ConstantTypeInteger, Value: 123}, + }, + }, + }, + }, + ) + }) + + t.Run("Shoul correctly parse literals in conditions", func(t *testing.T) { + testQueryParse( + t, + `select c.id + FROM c + WHERE c.boolean=true + AND c.integer=1 + AND c.float=6.9 + AND c.string="hello"`, + nosql.SelectStmt{ + Columns: []nosql.FieldPath{{Path: []string{"c", "id"}, Alias: ""}}, + Table: nosql.Table{Value: "c"}, + Filters: nosql.LogicalExpression{ + Expressions: []interface{}{ + nosql.ComparisonExpression{ + Left: nosql.FieldPath{Path: []string{"c", "boolean"}}, + Right: nosql.Constant{Type: 3, Value: true}, + Operation: "=", + }, + nosql.ComparisonExpression{ + Left: nosql.FieldPath{Path: []string{"c", "integer"}}, + Right: nosql.Constant{Type: 1, Value: 1}, + Operation: "=", + }, + nosql.ComparisonExpression{ + Left: nosql.FieldPath{Path: []string{"c", "float"}}, + Right: nosql.Constant{Type: 2, Value: 6.9}, + Operation: "=", + }, + nosql.ComparisonExpression{ + Left: nosql.FieldPath{Path: []string{"c", "string"}}, + Right: nosql.Constant{Type: 0, Value: "hello"}, + Operation: "=", + }, + }, + Operation: nosql.LogicalExpressionTypeAnd, + }, + }, + ) + }) +}