feat: scanning string with sql tokenizer

This commit is contained in:
Yehoshua Sandler 2025-03-29 20:17:44 -05:00
parent 6fe01c4821
commit 518316c4f7
3 changed files with 98 additions and 0 deletions

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module query-inter
go 1.23.7
require github.com/DataDog/go-sqllexer v0.1.3

10
go.sum Normal file
View File

@ -0,0 +1,10 @@
github.com/DataDog/go-sqllexer v0.1.3 h1:Kl2T6QVndMEZqQSY8rkoltYP+LVNaA54N+EwAMc9N5w=
github.com/DataDog/go-sqllexer v0.1.3/go.mod h1:KwkYhpFEVIq+BfobkTC1vfqm4gTi65skV/DpDBXtexc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

83
main.go Normal file
View File

@ -0,0 +1,83 @@
package main
import (
"fmt"
"github.com/DataDog/go-sqllexer"
)
type TokenGroupType int
const (
TOKEN_OTHER TokenGroupType = iota
TOKEN_ACT // SELECT, CREATE, etc
TOKEN_FILTER // WHERE, AND, etc
TOKEN_RELATE // joins
TOKEN_DEFINE // column data types, etc
TOKEN_CONTROL // sorting, etc
)
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
sqllexer.ERROR: TOKEN_OTHER,
sqllexer.EOF: TOKEN_OTHER,
sqllexer.SPACE: TOKEN_OTHER,
sqllexer.STRING: TOKEN_DEFINE,
sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE,
sqllexer.NUMBER: TOKEN_DEFINE,
sqllexer.IDENT: TOKEN_ACT, // for table/column names
sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins
sqllexer.OPERATOR: TOKEN_FILTER,
sqllexer.WILDCARD: TOKEN_ACT,
sqllexer.COMMENT: TOKEN_OTHER,
sqllexer.MULTILINE_COMMENT: TOKEN_OTHER,
sqllexer.PUNCTUATION: TOKEN_OTHER,
sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE,
sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER,
sqllexer.BIND_PARAMETER: TOKEN_FILTER,
sqllexer.FUNCTION: TOKEN_DEFINE,
sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE,
sqllexer.UNKNOWN: TOKEN_OTHER,
sqllexer.COMMAND: TOKEN_ACT,
sqllexer.KEYWORD: TOKEN_ACT,
sqllexer.JSON_OP: TOKEN_DEFINE,
sqllexer.BOOLEAN: TOKEN_DEFINE,
sqllexer.NULL: TOKEN_DEFINE,
sqllexer.PROC_INDICATOR: TOKEN_OTHER,
sqllexer.CTE_INDICATOR: TOKEN_OTHER,
sqllexer.ALIAS_INDICATOR: TOKEN_OTHER,
}
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
group, exists := tokenGroupMap[tokenType]
isStart := false
isEnd := false
if !exists {
group = TOKEN_OTHER
}
if group == TOKEN_ACT {
isStart = true
}
if tokenType == sqllexer.EOF ||
(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
isEnd = true
}
return group, isStart, isEnd
}
func main() {
query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
lexer := sqllexer.New(query)
for {
token := lexer.Scan()
tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
if token.Type == sqllexer.EOF {
break
}
}
}