thinking through data structure, please hold

This commit is contained in:
Yehoshua Sandler 2025-03-30 12:17:39 -05:00
parent 518316c4f7
commit d3f9600d58
2 changed files with 115 additions and 62 deletions

View File

@ -1,3 +1,48 @@
# query-interpreter # query-interpreter
Core program to interpret query language strings into structured data, and back again. Core program to interpret query language strings into structured data, and back again.
## SQL Tokens
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings.
Here are the general token types it defines:
```go
type TokenType int
const (
ERROR TokenType = iota
EOF
SPACE // space or newline
STRING // string literal
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
NUMBER // number literal
IDENT // identifier
QUOTED_IDENT // quoted identifier
OPERATOR // operator
WILDCARD // wildcard *
COMMENT // comment
MULTILINE_COMMENT // multiline comment
PUNCTUATION // punctuation
DOLLAR_QUOTED_FUNCTION // dollar quoted function
DOLLAR_QUOTED_STRING // dollar quoted string
POSITIONAL_PARAMETER // numbered parameter
BIND_PARAMETER // bind parameter
FUNCTION // function
SYSTEM_VARIABLE // system variable
UNKNOWN // unknown token
COMMAND // SQL commands like SELECT, INSERT
KEYWORD // Other SQL keywords
JSON_OP // JSON operators
BOOLEAN // boolean literal
NULL // null literal
PROC_INDICATOR // procedure indicator
CTE_INDICATOR // CTE indicator
ALIAS_INDICATOR // alias indicator
)
```
Based on these different token types we will be able to parse out the details of said query into
structs to represent them. From those structs we will be able to modify their details any way that
we choose and reconstruct them into valid SQL statements.

130
main.go
View File

@ -2,82 +2,90 @@ package main
import ( import (
"fmt" "fmt"
"strings"
"github.com/DataDog/go-sqllexer" "github.com/DataDog/go-sqllexer"
) )
type TokenGroupType int type QueryType int
const ( const (
TOKEN_OTHER TokenGroupType = iota NONE QueryType = iota
TOKEN_ACT // SELECT, CREATE, etc SELECT
TOKEN_FILTER // WHERE, AND, etc UPDATE
TOKEN_RELATE // joins INSERT
TOKEN_DEFINE // column data types, etc DELETE
TOKEN_CONTROL // sorting, etc
) )
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{ type Query struct {
sqllexer.ERROR: TOKEN_OTHER, Type QueryType
sqllexer.EOF: TOKEN_OTHER, FullSql string
sqllexer.SPACE: TOKEN_OTHER, IsValid bool
sqllexer.STRING: TOKEN_DEFINE,
sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE,
sqllexer.NUMBER: TOKEN_DEFINE,
sqllexer.IDENT: TOKEN_ACT, // for table/column names
sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins
sqllexer.OPERATOR: TOKEN_FILTER,
sqllexer.WILDCARD: TOKEN_ACT,
sqllexer.COMMENT: TOKEN_OTHER,
sqllexer.MULTILINE_COMMENT: TOKEN_OTHER,
sqllexer.PUNCTUATION: TOKEN_OTHER,
sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE,
sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER,
sqllexer.BIND_PARAMETER: TOKEN_FILTER,
sqllexer.FUNCTION: TOKEN_DEFINE,
sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE,
sqllexer.UNKNOWN: TOKEN_OTHER,
sqllexer.COMMAND: TOKEN_ACT,
sqllexer.KEYWORD: TOKEN_ACT,
sqllexer.JSON_OP: TOKEN_DEFINE,
sqllexer.BOOLEAN: TOKEN_DEFINE,
sqllexer.NULL: TOKEN_DEFINE,
sqllexer.PROC_INDICATOR: TOKEN_OTHER,
sqllexer.CTE_INDICATOR: TOKEN_OTHER,
sqllexer.ALIAS_INDICATOR: TOKEN_OTHER,
} }
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) { func IsTokenEndOfStatement(token *sqllexer.Token) bool {
group, exists := tokenGroupMap[tokenType] return (token.Type == sqllexer.EOF || token.Value == ";")
isStart := false
isEnd := false
if !exists {
group = TOKEN_OTHER
}
if group == TOKEN_ACT {
isStart = true
}
if tokenType == sqllexer.EOF ||
(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
isEnd = true
}
return group, isStart, isEnd
} }
func main() { func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
query := "SELECT * FROM users \n WHERE id = something AND SELECT;" if token.Type != sqllexer.COMMAND {
lexer := sqllexer.New(query) return NONE
}
var foundType QueryType
switch strings.ToUpper(token.Value) {
case "SELECT":
foundType = SELECT
case "UPDATE":
foundType = UPDATE
case "INSERT":
foundType = INSERT
case "DELETE":
foundType = DELETE
default:
foundType = NONE
}
return foundType
}
func IsCrudSqlStatement(token *sqllexer.Token) bool {
queryType := GetQueryTypeFromToken(token)
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
}
func GetQueryTypeFromSql(sql string) QueryType {
var queryType QueryType
lexer := sqllexer.New(sql)
for { for {
token := lexer.Scan() token := lexer.Scan()
tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type) if IsTokenEndOfStatement(token) {
fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd) break
}
if token.Type == sqllexer.EOF { queryType = GetQueryTypeFromToken(token)
if queryType > 0 {
break break
} }
} }
return queryType
}
func main() {
query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
newQuery := GetQueryTypeFromSql(query)
fmt.Println(newQuery)
//lexer := sqllexer.New(query)
//for {
// token := lexer.Scan()
// fmt.Println(token.Value, token.Type)
// if token.Type == sqllexer.EOF {
// break
// }
//}
} }