thinking through data structure, please hold
This commit is contained in:
parent
518316c4f7
commit
d3f9600d58
45
README.md
45
README.md
@ -1,3 +1,48 @@
|
||||
# query-interpreter
|
||||
|
||||
Core program to interpret query language strings into structured data, and back again.
|
||||
|
||||
## SQL Tokens
|
||||
|
||||
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings.
|
||||
Here are the general token types it defines:
|
||||
|
||||
```go
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
ERROR TokenType = iota
|
||||
EOF
|
||||
SPACE // space or newline
|
||||
STRING // string literal
|
||||
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
|
||||
NUMBER // number literal
|
||||
IDENT // identifier
|
||||
QUOTED_IDENT // quoted identifier
|
||||
OPERATOR // operator
|
||||
WILDCARD // wildcard *
|
||||
COMMENT // comment
|
||||
MULTILINE_COMMENT // multiline comment
|
||||
PUNCTUATION // punctuation
|
||||
DOLLAR_QUOTED_FUNCTION // dollar quoted function
|
||||
DOLLAR_QUOTED_STRING // dollar quoted string
|
||||
POSITIONAL_PARAMETER // numbered parameter
|
||||
BIND_PARAMETER // bind parameter
|
||||
FUNCTION // function
|
||||
SYSTEM_VARIABLE // system variable
|
||||
UNKNOWN // unknown token
|
||||
COMMAND // SQL commands like SELECT, INSERT
|
||||
KEYWORD // Other SQL keywords
|
||||
JSON_OP // JSON operators
|
||||
BOOLEAN // boolean literal
|
||||
NULL // null literal
|
||||
PROC_INDICATOR // procedure indicator
|
||||
CTE_INDICATOR // CTE indicator
|
||||
ALIAS_INDICATOR // alias indicator
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
Based on these different token types we will be able to parse out the details of said query into
|
||||
structs to represent them. From those structs we will be able to modify their details any way that
|
||||
we choose and reconstruct them into valid SQL statements.
|
||||
|
130
main.go
130
main.go
@ -2,82 +2,90 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/DataDog/go-sqllexer"
|
||||
)
|
||||
|
||||
type TokenGroupType int
|
||||
type QueryType int
|
||||
|
||||
const (
|
||||
TOKEN_OTHER TokenGroupType = iota
|
||||
TOKEN_ACT // SELECT, CREATE, etc
|
||||
TOKEN_FILTER // WHERE, AND, etc
|
||||
TOKEN_RELATE // joins
|
||||
TOKEN_DEFINE // column data types, etc
|
||||
TOKEN_CONTROL // sorting, etc
|
||||
NONE QueryType = iota
|
||||
SELECT
|
||||
UPDATE
|
||||
INSERT
|
||||
DELETE
|
||||
)
|
||||
|
||||
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
|
||||
sqllexer.ERROR: TOKEN_OTHER,
|
||||
sqllexer.EOF: TOKEN_OTHER,
|
||||
sqllexer.SPACE: TOKEN_OTHER,
|
||||
sqllexer.STRING: TOKEN_DEFINE,
|
||||
sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE,
|
||||
sqllexer.NUMBER: TOKEN_DEFINE,
|
||||
sqllexer.IDENT: TOKEN_ACT, // for table/column names
|
||||
sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins
|
||||
sqllexer.OPERATOR: TOKEN_FILTER,
|
||||
sqllexer.WILDCARD: TOKEN_ACT,
|
||||
sqllexer.COMMENT: TOKEN_OTHER,
|
||||
sqllexer.MULTILINE_COMMENT: TOKEN_OTHER,
|
||||
sqllexer.PUNCTUATION: TOKEN_OTHER,
|
||||
sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
|
||||
sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE,
|
||||
sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER,
|
||||
sqllexer.BIND_PARAMETER: TOKEN_FILTER,
|
||||
sqllexer.FUNCTION: TOKEN_DEFINE,
|
||||
sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE,
|
||||
sqllexer.UNKNOWN: TOKEN_OTHER,
|
||||
sqllexer.COMMAND: TOKEN_ACT,
|
||||
sqllexer.KEYWORD: TOKEN_ACT,
|
||||
sqllexer.JSON_OP: TOKEN_DEFINE,
|
||||
sqllexer.BOOLEAN: TOKEN_DEFINE,
|
||||
sqllexer.NULL: TOKEN_DEFINE,
|
||||
sqllexer.PROC_INDICATOR: TOKEN_OTHER,
|
||||
sqllexer.CTE_INDICATOR: TOKEN_OTHER,
|
||||
sqllexer.ALIAS_INDICATOR: TOKEN_OTHER,
|
||||
type Query struct {
|
||||
Type QueryType
|
||||
FullSql string
|
||||
IsValid bool
|
||||
}
|
||||
|
||||
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
|
||||
group, exists := tokenGroupMap[tokenType]
|
||||
isStart := false
|
||||
isEnd := false
|
||||
|
||||
if !exists {
|
||||
group = TOKEN_OTHER
|
||||
}
|
||||
|
||||
if group == TOKEN_ACT {
|
||||
isStart = true
|
||||
}
|
||||
|
||||
if tokenType == sqllexer.EOF ||
|
||||
(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
|
||||
isEnd = true
|
||||
}
|
||||
|
||||
return group, isStart, isEnd
|
||||
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
|
||||
return (token.Type == sqllexer.EOF || token.Value == ";")
|
||||
}
|
||||
|
||||
func main() {
|
||||
query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
|
||||
lexer := sqllexer.New(query)
|
||||
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
|
||||
if token.Type != sqllexer.COMMAND {
|
||||
return NONE
|
||||
}
|
||||
|
||||
var foundType QueryType
|
||||
switch strings.ToUpper(token.Value) {
|
||||
case "SELECT":
|
||||
foundType = SELECT
|
||||
case "UPDATE":
|
||||
foundType = UPDATE
|
||||
case "INSERT":
|
||||
foundType = INSERT
|
||||
case "DELETE":
|
||||
foundType = DELETE
|
||||
default:
|
||||
foundType = NONE
|
||||
}
|
||||
|
||||
return foundType
|
||||
}
|
||||
|
||||
func IsCrudSqlStatement(token *sqllexer.Token) bool {
|
||||
queryType := GetQueryTypeFromToken(token)
|
||||
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
|
||||
}
|
||||
|
||||
func GetQueryTypeFromSql(sql string) QueryType {
|
||||
var queryType QueryType
|
||||
|
||||
lexer := sqllexer.New(sql)
|
||||
for {
|
||||
token := lexer.Scan()
|
||||
tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
|
||||
fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
|
||||
if IsTokenEndOfStatement(token) {
|
||||
break
|
||||
}
|
||||
|
||||
if token.Type == sqllexer.EOF {
|
||||
queryType = GetQueryTypeFromToken(token)
|
||||
if queryType > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return queryType
|
||||
}
|
||||
|
||||
func main() {
|
||||
query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
|
||||
newQuery := GetQueryTypeFromSql(query)
|
||||
|
||||
fmt.Println(newQuery)
|
||||
|
||||
//lexer := sqllexer.New(query)
|
||||
//for {
|
||||
// token := lexer.Scan()
|
||||
// fmt.Println(token.Value, token.Type)
|
||||
|
||||
// if token.Type == sqllexer.EOF {
|
||||
// break
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user