Implement AVG, COUNT, MAX, MIN, SUM functions

This commit is contained in:
Pijus Kamandulis 2024-03-11 19:10:41 +02:00
parent b72bba86c8
commit 6ed74688ca
7 changed files with 1429 additions and 560 deletions

View File

@ -119,9 +119,23 @@ const (
FunctionCallSetIntersect FunctionCallType = "SetIntersect"
FunctionCallSetUnion FunctionCallType = "SetUnion"
FunctionCallAggregateAvg FunctionCallType = "AggregateAvg"
FunctionCallAggregateCount FunctionCallType = "AggregateCount"
FunctionCallAggregateMax FunctionCallType = "AggregateMax"
FunctionCallAggregateMin FunctionCallType = "AggregateMin"
FunctionCallAggregateSum FunctionCallType = "AggregateSum"
FunctionCallIn FunctionCallType = "In"
)
var AggregateFunctions = []FunctionCallType{
FunctionCallAggregateAvg,
FunctionCallAggregateCount,
FunctionCallAggregateMax,
FunctionCallAggregateMin,
FunctionCallAggregateSum,
}
type FunctionCall struct {
Arguments []interface{}
Type FunctionCallType

View File

@ -0,0 +1,130 @@
package nosql_test
import (
"testing"
"github.com/pikami/cosmium/parsers"
)
func Test_Parse_AggregateFunctions(t *testing.T) {
t.Run("Should parse function AVG()", func(t *testing.T) {
testQueryParse(
t,
`SELECT AVG(c.a1) FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateAvg,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "a1"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
)
})
t.Run("Should parse function COUNT()", func(t *testing.T) {
testQueryParse(
t,
`SELECT COUNT(c.a1) FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateCount,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "a1"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
)
})
t.Run("Should parse function MAX()", func(t *testing.T) {
testQueryParse(
t,
`SELECT MAX(c.a1) FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateMax,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "a1"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
)
})
t.Run("Should parse function MIN()", func(t *testing.T) {
testQueryParse(
t,
`SELECT MIN(c.a1) FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateMin,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "a1"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
)
})
t.Run("Should parse function SUM()", func(t *testing.T) {
testQueryParse(
t,
`SELECT SUM(c.a1) FROM c`,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateSum,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "a1"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
)
})
}

File diff suppressed because it is too large Load Diff

View File

@ -327,6 +327,7 @@ FunctionCall <- StringFunctions
/ TypeCheckingFunctions
/ ArrayFunctions
/ InFunction
/ AggregateFunctions
StringFunctions <- StringEqualsExpression
/ ToStringExpression
@ -356,6 +357,12 @@ TypeCheckingFunctions <- IsDefined
/ IsPrimitive
/ IsString
AggregateFunctions <- AvgAggregateExpression
/ CountAggregateExpression
/ MaxAggregateExpression
/ MinAggregateExpression
/ SumAggregateExpression
ArrayFunctions <- ArrayConcatExpression
/ ArrayLengthExpression
/ ArraySliceExpression
@ -509,6 +516,26 @@ InFunction <- ex1:SelectProperty ws "IN"i ws "(" ws ex2:SelectItem others:(ws ",
return createFunctionCall(parsers.FunctionCallIn, append([]interface{}{ex1, ex2}, others.([]interface{})...))
}
AvgAggregateExpression <- "AVG"i "(" ws ex:SelectItem ws ")" {
return createFunctionCall(parsers.FunctionCallAggregateAvg, []interface{}{ex})
}
CountAggregateExpression <- "COUNT"i "(" ws ex:SelectItem ws ")" {
return createFunctionCall(parsers.FunctionCallAggregateCount, []interface{}{ex})
}
MaxAggregateExpression <- "MAX"i "(" ws ex:SelectItem ws ")" {
return createFunctionCall(parsers.FunctionCallAggregateMax, []interface{}{ex})
}
MinAggregateExpression <- "MIN"i "(" ws ex:SelectItem ws ")" {
return createFunctionCall(parsers.FunctionCallAggregateMin, []interface{}{ex})
}
SumAggregateExpression <- "SUM"i "(" ws ex:SelectItem ws ")" {
return createFunctionCall(parsers.FunctionCallAggregateSum, []interface{}{ex})
}
Integer <- [0-9]+ {
return strconv.Atoi(string(c.text))
}

View File

@ -0,0 +1,131 @@
package memoryexecutor
import (
"math"
"github.com/pikami/cosmium/parsers"
)
func (c memoryExecutorContext) aggregate_Avg(arguments []interface{}, row RowType) interface{} {
selectExpression := arguments[0].(parsers.SelectItem)
sum := 0.0
count := 0
if array, isArray := row.([]RowType); isArray {
for _, item := range array {
value := c.getFieldValue(selectExpression, item)
if numericValue, ok := value.(float64); ok {
sum += numericValue
count++
} else if numericValue, ok := value.(int); ok {
sum += float64(numericValue)
count++
}
}
}
if count > 0 {
return sum / float64(count)
} else {
return nil
}
}
func (c memoryExecutorContext) aggregate_Count(arguments []interface{}, row RowType) interface{} {
selectExpression := arguments[0].(parsers.SelectItem)
count := 0
if array, isArray := row.([]RowType); isArray {
for _, item := range array {
value := c.getFieldValue(selectExpression, item)
if value != nil {
count++
}
}
}
return count
}
func (c memoryExecutorContext) aggregate_Max(arguments []interface{}, row RowType) interface{} {
selectExpression := arguments[0].(parsers.SelectItem)
max := 0.0
count := 0
if array, isArray := row.([]RowType); isArray {
for _, item := range array {
value := c.getFieldValue(selectExpression, item)
if numericValue, ok := value.(float64); ok {
if numericValue > max {
max = numericValue
}
count++
} else if numericValue, ok := value.(int); ok {
if float64(numericValue) > max {
max = float64(numericValue)
}
count++
}
}
}
if count > 0 {
return max
} else {
return nil
}
}
func (c memoryExecutorContext) aggregate_Min(arguments []interface{}, row RowType) interface{} {
selectExpression := arguments[0].(parsers.SelectItem)
min := math.MaxFloat64
count := 0
if array, isArray := row.([]RowType); isArray {
for _, item := range array {
value := c.getFieldValue(selectExpression, item)
if numericValue, ok := value.(float64); ok {
if numericValue < min {
min = numericValue
}
count++
} else if numericValue, ok := value.(int); ok {
if float64(numericValue) < min {
min = float64(numericValue)
}
count++
}
}
}
if count > 0 {
return min
} else {
return nil
}
}
func (c memoryExecutorContext) aggregate_Sum(arguments []interface{}, row RowType) interface{} {
selectExpression := arguments[0].(parsers.SelectItem)
sum := 0.0
count := 0
if array, isArray := row.([]RowType); isArray {
for _, item := range array {
value := c.getFieldValue(selectExpression, item)
if numericValue, ok := value.(float64); ok {
sum += numericValue
count++
} else if numericValue, ok := value.(int); ok {
sum += float64(numericValue)
count++
}
}
}
if count > 0 {
return sum
} else {
return nil
}
}

View File

@ -0,0 +1,210 @@
package memoryexecutor_test
import (
"testing"
"github.com/pikami/cosmium/parsers"
memoryexecutor "github.com/pikami/cosmium/query_executors/memory_executor"
)
func Test_Execute_AggregateFunctions(t *testing.T) {
mockData := []memoryexecutor.RowType{
map[string]interface{}{"id": "123", "number": 123, "key": "a"},
map[string]interface{}{"id": "456", "number": 456, "key": "a"},
map[string]interface{}{"id": "789", "number": 789, "key": "b"},
map[string]interface{}{"id": "no-number", "key": "b"},
}
t.Run("Should execute function AVG()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{Path: []string{"c", "key"}},
{
Alias: "avg",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateAvg,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
GroupBy: []parsers.SelectItem{
{Path: []string{"c", "key"}},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"key": "a", "avg": 289.5},
map[string]interface{}{"key": "b", "avg": 789.0},
},
)
})
t.Run("Should execute function AVG() without GROUP BY clause", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{
Alias: "avg",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateAvg,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"avg": 456.0},
},
)
})
t.Run("Should execute function COUNT()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{Path: []string{"c", "key"}},
{
Alias: "cnt",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateCount,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
GroupBy: []parsers.SelectItem{
{Path: []string{"c", "key"}},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"key": "a", "cnt": 2},
map[string]interface{}{"key": "b", "cnt": 1},
},
)
})
t.Run("Should execute function MAX()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{Path: []string{"c", "key"}},
{
Alias: "max",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateMax,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
GroupBy: []parsers.SelectItem{
{Path: []string{"c", "key"}},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"key": "a", "max": 456.0},
map[string]interface{}{"key": "b", "max": 789.0},
},
)
})
t.Run("Should execute function MIN()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{Path: []string{"c", "key"}},
{
Alias: "min",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateMin,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
GroupBy: []parsers.SelectItem{
{Path: []string{"c", "key"}},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"key": "a", "min": 123.0},
map[string]interface{}{"key": "b", "min": 789.0},
},
)
})
t.Run("Should execute function SUM()", func(t *testing.T) {
testQueryExecute(
t,
parsers.SelectStmt{
SelectItems: []parsers.SelectItem{
{Path: []string{"c", "key"}},
{
Alias: "sum",
Type: parsers.SelectItemTypeFunctionCall,
Value: parsers.FunctionCall{
Type: parsers.FunctionCallAggregateSum,
Arguments: []interface{}{
parsers.SelectItem{
Path: []string{"c", "number"},
Type: parsers.SelectItemTypeField,
},
},
},
},
},
GroupBy: []parsers.SelectItem{
{Path: []string{"c", "key"}},
},
Table: parsers.Table{Value: "c"},
},
mockData,
[]memoryexecutor.RowType{
map[string]interface{}{"key": "a", "sum": 579.0},
map[string]interface{}{"key": "b", "sum": 789.0},
},
)
})
}

View File

@ -8,6 +8,7 @@ import (
"github.com/pikami/cosmium/internal/logger"
"github.com/pikami/cosmium/parsers"
"golang.org/x/exp/slices"
)
type RowType interface{}
@ -45,9 +46,16 @@ func Execute(query parsers.SelectStmt, data []RowType) []RowType {
// Apply select
if !isGroupSelect {
selectedData := make([]RowType, 0)
for _, row := range result {
selectedData = append(selectedData, ctx.selectRow(query.SelectItems, row))
if hasAggregateFunctions(query.SelectItems) {
// When can have aggregate functions without GROUP BY clause,
// we should aggregate all rows in that case
selectedData = append(selectedData, ctx.selectRow(query.SelectItems, result))
} else {
for _, row := range result {
selectedData = append(selectedData, ctx.selectRow(query.SelectItems, row))
}
}
result = selectedData
}
@ -275,6 +283,17 @@ func (c memoryExecutorContext) getFieldValue(field parsers.SelectItem, row RowTy
case parsers.FunctionCallSetUnion:
return c.set_Union(typedValue.Arguments, rowValue)
case parsers.FunctionCallAggregateAvg:
return c.aggregate_Avg(typedValue.Arguments, row)
case parsers.FunctionCallAggregateCount:
return c.aggregate_Count(typedValue.Arguments, row)
case parsers.FunctionCallAggregateMax:
return c.aggregate_Max(typedValue.Arguments, row)
case parsers.FunctionCallAggregateMin:
return c.aggregate_Min(typedValue.Arguments, row)
case parsers.FunctionCallAggregateSum:
return c.aggregate_Sum(typedValue.Arguments, row)
case parsers.FunctionCallIn:
return c.misc_In(typedValue.Arguments, rowValue)
}
@ -431,3 +450,23 @@ func deduplicate(slice []RowType) []RowType {
return result
}
func hasAggregateFunctions(selectItems []parsers.SelectItem) bool {
if selectItems == nil {
return false
}
for _, selectItem := range selectItems {
if selectItem.Type == parsers.SelectItemTypeFunctionCall {
if typedValue, ok := selectItem.Value.(parsers.FunctionCall); ok && slices.Contains[[]parsers.FunctionCallType](parsers.AggregateFunctions, typedValue.Type) {
return true
}
}
if hasAggregateFunctions(selectItem.SelectItems) {
return true
}
}
return false
}