thinking through data structure, please hold

This commit is contained in:
Yehoshua Sandler 2025-03-30 12:17:39 -05:00
parent 518316c4f7
commit d3f9600d58
2 changed files with 115 additions and 62 deletions

View File

@ -1,3 +1,48 @@
# query-interpreter
Core program to interpret query language strings into structured data, and back again.
## SQL Tokens
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings.
Here are the general token types it defines:
```go
type TokenType int
const (
ERROR TokenType = iota
EOF
SPACE // space or newline
STRING // string literal
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
NUMBER // number literal
IDENT // identifier
QUOTED_IDENT // quoted identifier
OPERATOR // operator
WILDCARD // wildcard *
COMMENT // comment
MULTILINE_COMMENT // multiline comment
PUNCTUATION // punctuation
DOLLAR_QUOTED_FUNCTION // dollar quoted function
DOLLAR_QUOTED_STRING // dollar quoted string
POSITIONAL_PARAMETER // numbered parameter
BIND_PARAMETER // bind parameter
FUNCTION // function
SYSTEM_VARIABLE // system variable
UNKNOWN // unknown token
COMMAND // SQL commands like SELECT, INSERT
KEYWORD // Other SQL keywords
JSON_OP // JSON operators
BOOLEAN // boolean literal
NULL // null literal
PROC_INDICATOR // procedure indicator
CTE_INDICATOR // CTE indicator
ALIAS_INDICATOR // alias indicator
)
```
Based on these different token types we will be able to parse out the details of said query into
structs to represent them. From those structs we will be able to modify their details any way that
we choose and reconstruct them into valid SQL statements.

130
main.go
View File

@ -2,82 +2,90 @@ package main
import (
"fmt"
"strings"
"github.com/DataDog/go-sqllexer"
)
type TokenGroupType int
type QueryType int
const (
TOKEN_OTHER TokenGroupType = iota
TOKEN_ACT // SELECT, CREATE, etc
TOKEN_FILTER // WHERE, AND, etc
TOKEN_RELATE // joins
TOKEN_DEFINE // column data types, etc
TOKEN_CONTROL // sorting, etc
NONE QueryType = iota
SELECT
UPDATE
INSERT
DELETE
)
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
sqllexer.ERROR: TOKEN_OTHER,
sqllexer.EOF: TOKEN_OTHER,
sqllexer.SPACE: TOKEN_OTHER,
sqllexer.STRING: TOKEN_DEFINE,
sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE,
sqllexer.NUMBER: TOKEN_DEFINE,
sqllexer.IDENT: TOKEN_ACT, // for table/column names
sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins
sqllexer.OPERATOR: TOKEN_FILTER,
sqllexer.WILDCARD: TOKEN_ACT,
sqllexer.COMMENT: TOKEN_OTHER,
sqllexer.MULTILINE_COMMENT: TOKEN_OTHER,
sqllexer.PUNCTUATION: TOKEN_OTHER,
sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE,
sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER,
sqllexer.BIND_PARAMETER: TOKEN_FILTER,
sqllexer.FUNCTION: TOKEN_DEFINE,
sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE,
sqllexer.UNKNOWN: TOKEN_OTHER,
sqllexer.COMMAND: TOKEN_ACT,
sqllexer.KEYWORD: TOKEN_ACT,
sqllexer.JSON_OP: TOKEN_DEFINE,
sqllexer.BOOLEAN: TOKEN_DEFINE,
sqllexer.NULL: TOKEN_DEFINE,
sqllexer.PROC_INDICATOR: TOKEN_OTHER,
sqllexer.CTE_INDICATOR: TOKEN_OTHER,
sqllexer.ALIAS_INDICATOR: TOKEN_OTHER,
type Query struct {
Type QueryType
FullSql string
IsValid bool
}
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
group, exists := tokenGroupMap[tokenType]
isStart := false
isEnd := false
if !exists {
group = TOKEN_OTHER
}
if group == TOKEN_ACT {
isStart = true
}
if tokenType == sqllexer.EOF ||
(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
isEnd = true
}
return group, isStart, isEnd
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
return (token.Type == sqllexer.EOF || token.Value == ";")
}
func main() {
query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
lexer := sqllexer.New(query)
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
if token.Type != sqllexer.COMMAND {
return NONE
}
var foundType QueryType
switch strings.ToUpper(token.Value) {
case "SELECT":
foundType = SELECT
case "UPDATE":
foundType = UPDATE
case "INSERT":
foundType = INSERT
case "DELETE":
foundType = DELETE
default:
foundType = NONE
}
return foundType
}
func IsCrudSqlStatement(token *sqllexer.Token) bool {
queryType := GetQueryTypeFromToken(token)
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
}
func GetQueryTypeFromSql(sql string) QueryType {
var queryType QueryType
lexer := sqllexer.New(sql)
for {
token := lexer.Scan()
tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
if IsTokenEndOfStatement(token) {
break
}
if token.Type == sqllexer.EOF {
queryType = GetQueryTypeFromToken(token)
if queryType > 0 {
break
}
}
return queryType
}
func main() {
query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
newQuery := GetQueryTypeFromSql(query)
fmt.Println(newQuery)
//lexer := sqllexer.New(query)
//for {
// token := lexer.Scan()
// fmt.Println(token.Value, token.Type)
// if token.Type == sqllexer.EOF {
// break
// }
//}
}