From 83eeb46f996f310ec11b531bfba9a0daa3a9625f Mon Sep 17 00:00:00 2001 From: aniketio-ctrl Date: Tue, 4 Nov 2025 08:35:23 -0800 Subject: [PATCH] feat(sqlstore): added sql formatter for json (#9420) * chore: added sql formatter for json * chore: updated json extract columns * chore: added apend ident * chore: resolved pr comments * chore: resolved pr comments * chore: resolved pr comments * chore: resolved pr comments * chore: minor changes * chore: minor changes * chore: minor changes * chore: minor changes * chore: resolve comments * chore: added append value * chore: added append value * chore: added append value * chore: added append value * chore: added append value * chore: added append value * chore: added append value * chore: added append value --------- Co-authored-by: Vikrant Gupta --- ee/sqlstore/postgressqlstore/formatter.go | 153 ++++++ .../postgressqlstore/formatter_test.go | 500 ++++++++++++++++++ ee/sqlstore/postgressqlstore/provider.go | 24 +- pkg/sqlstore/sqlitesqlstore/formatter.go | 107 ++++ pkg/sqlstore/sqlitesqlstore/formatter_test.go | 422 +++++++++++++++ pkg/sqlstore/sqlitesqlstore/provider.go | 24 +- pkg/sqlstore/sqlstore.go | 34 ++ pkg/sqlstore/sqlstoretest/formatter.go | 107 ++++ pkg/sqlstore/sqlstoretest/provider.go | 20 +- 9 files changed, 1367 insertions(+), 24 deletions(-) create mode 100644 ee/sqlstore/postgressqlstore/formatter.go create mode 100644 ee/sqlstore/postgressqlstore/formatter_test.go create mode 100644 pkg/sqlstore/sqlitesqlstore/formatter.go create mode 100644 pkg/sqlstore/sqlitesqlstore/formatter_test.go create mode 100644 pkg/sqlstore/sqlstoretest/formatter.go diff --git a/ee/sqlstore/postgressqlstore/formatter.go b/ee/sqlstore/postgressqlstore/formatter.go new file mode 100644 index 0000000000..028944bda8 --- /dev/null +++ b/ee/sqlstore/postgressqlstore/formatter.go @@ -0,0 +1,153 @@ +package postgressqlstore + +import ( + "strings" + + "github.com/SigNoz/signoz/pkg/sqlstore" + "github.com/uptrace/bun/schema" +) + +type formatter struct { + bunf schema.Formatter +} + +func newFormatter(dialect schema.Dialect) sqlstore.SQLFormatter { + return &formatter{bunf: schema.NewFormatter(dialect)} +} + +func (f *formatter) JSONExtractString(column, path string) []byte { + var sql []byte + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, f.convertJSONPathToPostgres(path)...) + return sql +} + +func (f *formatter) JSONType(column, path string) []byte { + var sql []byte + sql = append(sql, "jsonb_typeof("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, f.convertJSONPathToPostgresWithMode(path, false)...) + sql = append(sql, ')') + return sql +} + +func (f *formatter) JSONIsArray(column, path string) []byte { + var sql []byte + sql = append(sql, f.JSONType(column, path)...) + sql = append(sql, " = "...) + sql = schema.Append(f.bunf, sql, "array") + return sql +} + +func (f *formatter) JSONArrayElements(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "jsonb_array_elements("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, f.convertJSONPathToPostgresWithMode(path, false)...) + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, []byte(alias) +} + +func (f *formatter) JSONArrayOfStrings(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "jsonb_array_elements_text("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, f.convertJSONPathToPostgresWithMode(path, false)...) + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), "::text"...) +} + +func (f *formatter) JSONKeys(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "jsonb_each("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, f.convertJSONPathToPostgresWithMode(path, false)...) + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), ".key"...) +} + +func (f *formatter) JSONArrayAgg(expression string) []byte { + var sql []byte + sql = append(sql, "jsonb_agg("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} + +func (f *formatter) JSONArrayLiteral(values ...string) []byte { + var sql []byte + sql = append(sql, "jsonb_build_array("...) + for idx, value := range values { + if idx > 0 { + sql = append(sql, ", "...) + } + sql = schema.Append(f.bunf, sql, value) + } + sql = append(sql, ')') + return sql +} + +func (f *formatter) TextToJsonColumn(column string) []byte { + var sql []byte + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, "::jsonb"...) + return sql +} + +func (f *formatter) convertJSONPathToPostgres(jsonPath string) []byte { + return f.convertJSONPathToPostgresWithMode(jsonPath, true) +} + +func (f *formatter) convertJSONPathToPostgresWithMode(jsonPath string, asText bool) []byte { + path := strings.TrimPrefix(strings.TrimPrefix(jsonPath, "$"), ".") + + if path == "" { + return nil + } + + parts := strings.Split(path, ".") + + var validParts []string + for _, part := range parts { + if part != "" { + validParts = append(validParts, part) + } + } + + if len(validParts) == 0 { + return nil + } + + var result []byte + + for idx, part := range validParts { + if idx == len(validParts)-1 { + if asText { + result = append(result, "->>"...) + } else { + result = append(result, "->"...) + } + result = schema.Append(f.bunf, result, part) + return result + } + + result = append(result, "->"...) + result = schema.Append(f.bunf, result, part) + } + + return result +} + +func (f *formatter) LowerExpression(expression string) []byte { + var sql []byte + sql = append(sql, "lower("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} diff --git a/ee/sqlstore/postgressqlstore/formatter_test.go b/ee/sqlstore/postgressqlstore/formatter_test.go new file mode 100644 index 0000000000..84b47227d6 --- /dev/null +++ b/ee/sqlstore/postgressqlstore/formatter_test.go @@ -0,0 +1,500 @@ +package postgressqlstore + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/uptrace/bun/dialect/pgdialect" +) + +func TestJSONExtractString(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.field", + expected: `"data"->>'field'`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.name", + expected: `"metadata"->'user'->>'name'`, + }, + { + name: "deeply nested path", + column: "json_col", + path: "$.level1.level2.level3", + expected: `"json_col"->'level1'->'level2'->>'level3'`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `"json_col"`, + }, + { + name: "empty path", + column: "data", + path: "", + expected: `"data"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.JSONExtractString(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONType(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.field", + expected: `jsonb_typeof("data"->'field')`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.age", + expected: `jsonb_typeof("metadata"->'user'->'age')`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `jsonb_typeof("json_col")`, + }, + { + name: "empty path", + column: "data", + path: "", + expected: `jsonb_typeof("data")`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.JSONType(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONIsArray(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.items", + expected: `jsonb_typeof("data"->'items') = 'array'`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.tags", + expected: `jsonb_typeof("metadata"->'user'->'tags') = 'array'`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `jsonb_typeof("json_col") = 'array'`, + }, + { + name: "empty path", + column: "data", + path: "", + expected: `jsonb_typeof("data") = 'array'`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.JSONIsArray(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONArrayElements(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "elem", + expected: `jsonb_array_elements("data") AS "elem"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "elem", + expected: `jsonb_array_elements("data") AS "elem"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.items", + alias: "item", + expected: `jsonb_array_elements("metadata"->'items') AS "item"`, + }, + { + name: "deeply nested path", + column: "json_col", + path: "$.user.tags", + alias: "tag", + expected: `jsonb_array_elements("json_col"->'user'->'tags') AS "tag"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got, _ := f.JSONArrayElements(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONArrayOfStrings(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "str", + expected: `jsonb_array_elements_text("data") AS "str"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "str", + expected: `jsonb_array_elements_text("data") AS "str"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.strings", + alias: "s", + expected: `jsonb_array_elements_text("metadata"->'strings') AS "s"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got, _ := f.JSONArrayOfStrings(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONKeys(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "k", + expected: `jsonb_each("data") AS "k"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "k", + expected: `jsonb_each("data") AS "k"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.object", + alias: "key", + expected: `jsonb_each("metadata"->'object') AS "key"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got, _ := f.JSONKeys(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONArrayAgg(t *testing.T) { + tests := []struct { + name string + expression string + expected string + }{ + { + name: "simple column", + expression: "id", + expected: "jsonb_agg(id)", + }, + { + name: "expression with function", + expression: "DISTINCT name", + expected: "jsonb_agg(DISTINCT name)", + }, + { + name: "complex expression", + expression: "data->>'field'", + expected: "jsonb_agg(data->>'field')", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.JSONArrayAgg(tt.expression)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONArrayLiteral(t *testing.T) { + tests := []struct { + name string + values []string + expected string + }{ + { + name: "empty array", + values: []string{}, + expected: "jsonb_build_array()", + }, + { + name: "single value", + values: []string{"value1"}, + expected: "jsonb_build_array('value1')", + }, + { + name: "multiple values", + values: []string{"value1", "value2", "value3"}, + expected: "jsonb_build_array('value1', 'value2', 'value3')", + }, + { + name: "values with special characters", + values: []string{"test", "with space", "with-dash"}, + expected: "jsonb_build_array('test', 'with space', 'with-dash')", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.JSONArrayLiteral(tt.values...)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestConvertJSONPathToPostgresWithMode(t *testing.T) { + tests := []struct { + name string + jsonPath string + asText bool + expected string + }{ + { + name: "simple path as text", + jsonPath: "$.field", + asText: true, + expected: "->>'field'", + }, + { + name: "simple path as json", + jsonPath: "$.field", + asText: false, + expected: "->'field'", + }, + { + name: "nested path as text", + jsonPath: "$.user.name", + asText: true, + expected: "->'user'->>'name'", + }, + { + name: "nested path as json", + jsonPath: "$.user.name", + asText: false, + expected: "->'user'->'name'", + }, + { + name: "deeply nested as text", + jsonPath: "$.a.b.c.d", + asText: true, + expected: "->'a'->'b'->'c'->>'d'", + }, + { + name: "root path", + jsonPath: "$", + asText: true, + expected: "", + }, + { + name: "empty path", + jsonPath: "", + asText: true, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()).(*formatter) + got := string(f.convertJSONPathToPostgresWithMode(tt.jsonPath, tt.asText)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestTextToJsonColumn(t *testing.T) { + tests := []struct { + name string + column string + expected string + }{ + { + name: "simple column name", + column: "data", + expected: `"data"::jsonb`, + }, + { + name: "column with underscore", + column: "user_data", + expected: `"user_data"::jsonb`, + }, + { + name: "column with special characters", + column: "json-col", + expected: `"json-col"::jsonb`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.TextToJsonColumn(tt.column)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestLowerExpression(t *testing.T) { + tests := []struct { + name string + expr string + expected string + }{ + { + name: "simple column name", + expr: "name", + expected: "lower(name)", + }, + { + name: "quoted column identifier", + expr: `"column_name"`, + expected: `lower("column_name")`, + }, + { + name: "jsonb text extraction", + expr: "data->>'field'", + expected: "lower(data->>'field')", + }, + { + name: "nested jsonb extraction", + expr: "metadata->'user'->>'name'", + expected: "lower(metadata->'user'->>'name')", + }, + { + name: "jsonb_typeof expression", + expr: "jsonb_typeof(data->'field')", + expected: "lower(jsonb_typeof(data->'field'))", + }, + { + name: "string concatenation", + expr: "first_name || ' ' || last_name", + expected: "lower(first_name || ' ' || last_name)", + }, + { + name: "CAST expression", + expr: "CAST(value AS TEXT)", + expected: "lower(CAST(value AS TEXT))", + }, + { + name: "COALESCE expression", + expr: "COALESCE(name, 'default')", + expected: "lower(COALESCE(name, 'default'))", + }, + { + name: "subquery column", + expr: "users.email", + expected: "lower(users.email)", + }, + { + name: "quoted identifier with special chars", + expr: `"user-name"`, + expected: `lower("user-name")`, + }, + { + name: "jsonb to text cast", + expr: "data::text", + expected: "lower(data::text)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(pgdialect.New()) + got := string(f.LowerExpression(tt.expr)) + assert.Equal(t, tt.expected, got) + }) + } +} diff --git a/ee/sqlstore/postgressqlstore/provider.go b/ee/sqlstore/postgressqlstore/provider.go index 7e3bd22f34..a969ac7fd1 100644 --- a/ee/sqlstore/postgressqlstore/provider.go +++ b/ee/sqlstore/postgressqlstore/provider.go @@ -15,10 +15,11 @@ import ( ) type provider struct { - settings factory.ScopedProviderSettings - sqldb *sql.DB - bundb *sqlstore.BunDB - dialect *dialect + settings factory.ScopedProviderSettings + sqldb *sql.DB + bundb *sqlstore.BunDB + dialect *dialect + formatter sqlstore.SQLFormatter } func NewFactory(hookFactories ...factory.ProviderFactory[sqlstore.SQLStoreHook, sqlstore.Config]) factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config] { @@ -55,11 +56,14 @@ func New(ctx context.Context, providerSettings factory.ProviderSettings, config sqldb := stdlib.OpenDBFromPool(pool) + pgDialect := pgdialect.New() + bunDB := sqlstore.NewBunDB(settings, sqldb, pgDialect, hooks) return &provider{ - settings: settings, - sqldb: sqldb, - bundb: sqlstore.NewBunDB(settings, sqldb, pgdialect.New(), hooks), - dialect: new(dialect), + settings: settings, + sqldb: sqldb, + bundb: bunDB, + dialect: new(dialect), + formatter: newFormatter(bunDB.Dialect()), }, nil } @@ -75,6 +79,10 @@ func (provider *provider) Dialect() sqlstore.SQLDialect { return provider.dialect } +func (provider *provider) Formatter() sqlstore.SQLFormatter { + return provider.formatter +} + func (provider *provider) BunDBCtx(ctx context.Context) bun.IDB { return provider.bundb.BunDBCtx(ctx) } diff --git a/pkg/sqlstore/sqlitesqlstore/formatter.go b/pkg/sqlstore/sqlitesqlstore/formatter.go new file mode 100644 index 0000000000..7448708df0 --- /dev/null +++ b/pkg/sqlstore/sqlitesqlstore/formatter.go @@ -0,0 +1,107 @@ +package sqlitesqlstore + +import ( + "github.com/SigNoz/signoz/pkg/sqlstore" + "github.com/uptrace/bun/schema" +) + +type formatter struct { + bunf schema.Formatter +} + +func newFormatter(dialect schema.Dialect) sqlstore.SQLFormatter { + return &formatter{bunf: schema.NewFormatter(dialect)} +} + +func (f *formatter) JSONExtractString(column, path string) []byte { + var sql []byte + sql = append(sql, "json_extract("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + sql = append(sql, ")"...) + return sql +} + +func (f *formatter) JSONType(column, path string) []byte { + var sql []byte + sql = append(sql, "json_type("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + sql = append(sql, ")"...) + return sql +} + +func (f *formatter) JSONIsArray(column, path string) []byte { + var sql []byte + sql = append(sql, f.JSONType(column, path)...) + sql = append(sql, " = "...) + sql = schema.Append(f.bunf, sql, "array") + return sql +} + +func (f *formatter) JSONArrayElements(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "json_each("...) + sql = f.bunf.AppendIdent(sql, column) + if path != "$" && path != "" { + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + } + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), ".value"...) +} + +func (f *formatter) JSONArrayOfStrings(column, path, alias string) ([]byte, []byte) { + return f.JSONArrayElements(column, path, alias) +} + +func (f *formatter) JSONKeys(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "json_each("...) + sql = f.bunf.AppendIdent(sql, column) + if path != "$" && path != "" { + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + } + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), ".key"...) +} + +func (f *formatter) JSONArrayAgg(expression string) []byte { + var sql []byte + sql = append(sql, "json_group_array("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} + +func (f *formatter) JSONArrayLiteral(values ...string) []byte { + var sql []byte + sql = append(sql, "json_array("...) + for idx, value := range values { + if idx > 0 { + sql = append(sql, ", "...) + } + sql = schema.Append(f.bunf, sql, value) + } + sql = append(sql, ')') + return sql +} + +func (f *formatter) TextToJsonColumn(column string) []byte { + return f.bunf.AppendIdent([]byte{}, column) +} + +func (f *formatter) LowerExpression(expression string) []byte { + var sql []byte + sql = append(sql, "lower("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} diff --git a/pkg/sqlstore/sqlitesqlstore/formatter_test.go b/pkg/sqlstore/sqlitesqlstore/formatter_test.go new file mode 100644 index 0000000000..2f24072129 --- /dev/null +++ b/pkg/sqlstore/sqlitesqlstore/formatter_test.go @@ -0,0 +1,422 @@ +package sqlitesqlstore + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/uptrace/bun/dialect/sqlitedialect" +) + +func TestJSONExtractString(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.field", + expected: `json_extract("data", '$.field')`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.name", + expected: `json_extract("metadata", '$.user.name')`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `json_extract("json_col", '$')`, + }, + { + name: "root path", + column: "json_col", + path: "", + expected: `json_extract("json_col", '')`, + }, + { + name: "array index path", + column: "items", + path: "$.list[0]", + expected: `json_extract("items", '$.list[0]')`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.JSONExtractString(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONType(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.field", + expected: `json_type("data", '$.field')`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.age", + expected: `json_type("metadata", '$.user.age')`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `json_type("json_col", '$')`, + }, + { + name: "root path", + column: "json_col", + path: "", + expected: `json_type("json_col", '')`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.JSONType(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONIsArray(t *testing.T) { + tests := []struct { + name string + column string + path string + expected string + }{ + { + name: "simple path", + column: "data", + path: "$.items", + expected: `json_type("data", '$.items') = 'array'`, + }, + { + name: "nested path", + column: "metadata", + path: "$.user.tags", + expected: `json_type("metadata", '$.user.tags') = 'array'`, + }, + { + name: "root path", + column: "json_col", + path: "$", + expected: `json_type("json_col", '$') = 'array'`, + }, + { + name: "root path", + column: "json_col", + path: "", + expected: `json_type("json_col", '') = 'array'`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.JSONIsArray(tt.column, tt.path)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONArrayElements(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "elem", + expected: `json_each("data") AS "elem"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "elem", + expected: `json_each("data") AS "elem"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.items", + alias: "item", + expected: `json_each("metadata", '$.items') AS "item"`, + }, + { + name: "deeply nested path", + column: "json_col", + path: "$.user.tags", + alias: "tag", + expected: `json_each("json_col", '$.user.tags') AS "tag"`, + }, + { + name: "nested path", + column: "metadata", + path: "", + alias: "item", + expected: `json_each("metadata") AS "item"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got, _ := f.JSONArrayElements(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONArrayOfStrings(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "str", + expected: `json_each("data") AS "str"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "str", + expected: `json_each("data") AS "str"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.strings", + alias: "s", + expected: `json_each("metadata", '$.strings') AS "s"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got, _ := f.JSONArrayOfStrings(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONKeys(t *testing.T) { + tests := []struct { + name string + column string + path string + alias string + expected string + }{ + { + name: "root path with dollar sign", + column: "data", + path: "$", + alias: "k", + expected: `json_each("data") AS "k"`, + }, + { + name: "root path empty", + column: "data", + path: "", + alias: "k", + expected: `json_each("data") AS "k"`, + }, + { + name: "nested path", + column: "metadata", + path: "$.object", + alias: "key", + expected: `json_each("metadata", '$.object') AS "key"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got, _ := f.JSONKeys(tt.column, tt.path, tt.alias) + assert.Equal(t, tt.expected, string(got)) + }) + } +} + +func TestJSONArrayAgg(t *testing.T) { + tests := []struct { + name string + expression string + expected string + }{ + { + name: "simple column", + expression: "id", + expected: "json_group_array(id)", + }, + { + name: "expression with function", + expression: "DISTINCT name", + expected: "json_group_array(DISTINCT name)", + }, + { + name: "complex expression", + expression: "json_extract(data, '$.field')", + expected: "json_group_array(json_extract(data, '$.field'))", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.JSONArrayAgg(tt.expression)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestJSONArrayLiteral(t *testing.T) { + tests := []struct { + name string + values []string + expected string + }{ + { + name: "empty array", + values: []string{}, + expected: "json_array()", + }, + { + name: "single value", + values: []string{"value1"}, + expected: "json_array('value1')", + }, + { + name: "multiple values", + values: []string{"value1", "value2", "value3"}, + expected: "json_array('value1', 'value2', 'value3')", + }, + { + name: "values with special characters", + values: []string{"test", "with space", "with-dash"}, + expected: "json_array('test', 'with space', 'with-dash')", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.JSONArrayLiteral(tt.values...)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestTextToJsonColumn(t *testing.T) { + tests := []struct { + name string + column string + expected string + }{ + { + name: "simple column name", + column: "data", + expected: `"data"`, + }, + { + name: "column with underscore", + column: "user_data", + expected: `"user_data"`, + }, + { + name: "column with special characters", + column: "json-col", + expected: `"json-col"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.TextToJsonColumn(tt.column)) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestLowerExpression(t *testing.T) { + tests := []struct { + name string + expr string + expected string + }{ + { + name: "json_extract expression", + expr: "json_extract(data, '$.field')", + expected: "lower(json_extract(data, '$.field'))", + }, + { + name: "nested json_extract", + expr: "json_extract(metadata, '$.user.name')", + expected: "lower(json_extract(metadata, '$.user.name'))", + }, + { + name: "json_type expression", + expr: "json_type(data, '$.field')", + expected: "lower(json_type(data, '$.field'))", + }, + { + name: "string concatenation", + expr: "first_name || ' ' || last_name", + expected: "lower(first_name || ' ' || last_name)", + }, + { + name: "CAST expression", + expr: "CAST(value AS TEXT)", + expected: "lower(CAST(value AS TEXT))", + }, + { + name: "COALESCE expression", + expr: "COALESCE(name, 'default')", + expected: "lower(COALESCE(name, 'default'))", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := newFormatter(sqlitedialect.New()) + got := string(f.LowerExpression(tt.expr)) + assert.Equal(t, tt.expected, got) + }) + } +} diff --git a/pkg/sqlstore/sqlitesqlstore/provider.go b/pkg/sqlstore/sqlitesqlstore/provider.go index e778f3cf0f..ced82d2e83 100644 --- a/pkg/sqlstore/sqlitesqlstore/provider.go +++ b/pkg/sqlstore/sqlitesqlstore/provider.go @@ -17,10 +17,11 @@ import ( ) type provider struct { - settings factory.ScopedProviderSettings - sqldb *sql.DB - bundb *sqlstore.BunDB - dialect *dialect + settings factory.ScopedProviderSettings + sqldb *sql.DB + bundb *sqlstore.BunDB + dialect *dialect + formatter sqlstore.SQLFormatter } func NewFactory(hookFactories ...factory.ProviderFactory[sqlstore.SQLStoreHook, sqlstore.Config]) factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config] { @@ -54,11 +55,14 @@ func New(ctx context.Context, providerSettings factory.ProviderSettings, config settings.Logger().InfoContext(ctx, "connected to sqlite", "path", config.Sqlite.Path) sqldb.SetMaxOpenConns(config.Connection.MaxOpenConns) + sqliteDialect := sqlitedialect.New() + bunDB := sqlstore.NewBunDB(settings, sqldb, sqliteDialect, hooks) return &provider{ - settings: settings, - sqldb: sqldb, - bundb: sqlstore.NewBunDB(settings, sqldb, sqlitedialect.New(), hooks), - dialect: new(dialect), + settings: settings, + sqldb: sqldb, + bundb: bunDB, + dialect: new(dialect), + formatter: newFormatter(bunDB.Dialect()), }, nil } @@ -74,6 +78,10 @@ func (provider *provider) Dialect() sqlstore.SQLDialect { return provider.dialect } +func (provider *provider) Formatter() sqlstore.SQLFormatter { + return provider.formatter +} + func (provider *provider) BunDBCtx(ctx context.Context) bun.IDB { return provider.bundb.BunDBCtx(ctx) } diff --git a/pkg/sqlstore/sqlstore.go b/pkg/sqlstore/sqlstore.go index 185a92467f..07594874b2 100644 --- a/pkg/sqlstore/sqlstore.go +++ b/pkg/sqlstore/sqlstore.go @@ -20,6 +20,8 @@ type SQLStore interface { // Returns the dialect of the database. Dialect() SQLDialect + Formatter() SQLFormatter + // RunInTxCtx runs the given callback in a transaction. It creates and injects a new context with the transaction. // If a transaction is present in the context, it will be used. RunInTxCtx(ctx context.Context, opts *SQLStoreTxOptions, cb func(ctx context.Context) error) error @@ -86,3 +88,35 @@ type SQLDialect interface { // as an argument. ToggleForeignKeyConstraint(ctx context.Context, bun *bun.DB, enable bool) error } + +type SQLFormatter interface { + // JSONExtractString takes a JSON path (e.g., "$.labels.severity") + JSONExtractString(column, path string) []byte + + // JSONType used to determine the type of the value extracted from the path + JSONType(column, path string) []byte + + // JSONIsArray used to check whether the value is array or not + JSONIsArray(column, path string) []byte + + // JSONArrayElements returns query as well as columns alias to be used for select and where clause + JSONArrayElements(column, path, alias string) ([]byte, []byte) + + // JSONArrayOfStrings returns query as well as columns alias to be used for select and where clause + JSONArrayOfStrings(column, path, alias string) ([]byte, []byte) + + // JSONArrayAgg aggregates values into a JSON array + JSONArrayAgg(expression string) []byte + + // JSONArrayLiteral creates a literal JSON array from the given string values + JSONArrayLiteral(values ...string) []byte + + // JSONKeys return extracted key from json as well as alias to be used for select and where clause + JSONKeys(column, path, alias string) ([]byte, []byte) + + // TextToJsonColumn converts a text column to JSON type + TextToJsonColumn(column string) []byte + + // LowerExpression wraps any SQL expression with lower() function for case-insensitive operations + LowerExpression(expression string) []byte +} diff --git a/pkg/sqlstore/sqlstoretest/formatter.go b/pkg/sqlstore/sqlstoretest/formatter.go new file mode 100644 index 0000000000..e40f640dad --- /dev/null +++ b/pkg/sqlstore/sqlstoretest/formatter.go @@ -0,0 +1,107 @@ +package sqlstoretest + +import ( + "github.com/SigNoz/signoz/pkg/sqlstore" + "github.com/uptrace/bun/schema" +) + +type formatter struct { + bunf schema.Formatter +} + +func newFormatter(dialect schema.Dialect) sqlstore.SQLFormatter { + return &formatter{bunf: schema.NewFormatter(dialect)} +} + +func (f *formatter) JSONExtractString(column, path string) []byte { + var sql []byte + sql = append(sql, "json_extract("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + sql = append(sql, ")"...) + return sql +} + +func (f *formatter) JSONType(column, path string) []byte { + var sql []byte + sql = append(sql, "json_type("...) + sql = f.bunf.AppendIdent(sql, column) + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + sql = append(sql, ")"...) + return sql +} + +func (f *formatter) JSONIsArray(column, path string) []byte { + var sql []byte + sql = append(sql, f.JSONType(column, path)...) + sql = append(sql, " = "...) + sql = schema.Append(f.bunf, sql, "array") + return sql +} + +func (f *formatter) JSONArrayElements(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "json_each("...) + sql = f.bunf.AppendIdent(sql, column) + if path != "$" && path != "" { + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + } + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), ".value"...) +} + +func (f *formatter) JSONArrayOfStrings(column, path, alias string) ([]byte, []byte) { + return f.JSONArrayElements(column, path, alias) +} + +func (f *formatter) JSONKeys(column, path, alias string) ([]byte, []byte) { + var sql []byte + sql = append(sql, "json_each("...) + sql = f.bunf.AppendIdent(sql, column) + if path != "$" && path != "" { + sql = append(sql, ", "...) + sql = schema.Append(f.bunf, sql, path) + } + sql = append(sql, ") AS "...) + sql = f.bunf.AppendIdent(sql, alias) + + return sql, append([]byte(alias), ".key"...) +} + +func (f *formatter) JSONArrayAgg(expression string) []byte { + var sql []byte + sql = append(sql, "json_group_array("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} + +func (f *formatter) JSONArrayLiteral(values ...string) []byte { + var sql []byte + sql = append(sql, "json_array("...) + for idx, value := range values { + if idx > 0 { + sql = append(sql, ", "...) + } + sql = schema.Append(f.bunf, sql, value) + } + sql = append(sql, ')') + return sql +} + +func (f *formatter) TextToJsonColumn(column string) []byte { + return f.bunf.AppendIdent([]byte{}, column) +} + +func (f *formatter) LowerExpression(expression string) []byte { + var sql []byte + sql = append(sql, "lower("...) + sql = append(sql, expression...) + sql = append(sql, ')') + return sql +} diff --git a/pkg/sqlstore/sqlstoretest/provider.go b/pkg/sqlstore/sqlstoretest/provider.go index b008471506..97adf272c1 100644 --- a/pkg/sqlstore/sqlstoretest/provider.go +++ b/pkg/sqlstore/sqlstoretest/provider.go @@ -15,10 +15,11 @@ import ( var _ sqlstore.SQLStore = (*Provider)(nil) type Provider struct { - db *sql.DB - mock sqlmock.Sqlmock - bunDB *bun.DB - dialect *dialect + db *sql.DB + mock sqlmock.Sqlmock + bunDB *bun.DB + dialect *dialect + formatter sqlstore.SQLFormatter } func New(config sqlstore.Config, matcher sqlmock.QueryMatcher) *Provider { @@ -38,10 +39,11 @@ func New(config sqlstore.Config, matcher sqlmock.QueryMatcher) *Provider { } return &Provider{ - db: db, - mock: mock, - bunDB: bunDB, - dialect: new(dialect), + db: db, + mock: mock, + bunDB: bunDB, + dialect: new(dialect), + formatter: newFormatter(bunDB.Dialect()), } } @@ -61,6 +63,8 @@ func (provider *Provider) Dialect() sqlstore.SQLDialect { return provider.dialect } +func (provider *Provider) Formatter() sqlstore.SQLFormatter { return provider.formatter } + func (provider *Provider) BunDBCtx(ctx context.Context) bun.IDB { return provider.bunDB }