thinking through data structure, please hold
This commit is contained in:
parent
518316c4f7
commit
d3f9600d58
47
README.md
47
README.md
@ -1,3 +1,48 @@
|
|||||||
# query-interpreter
|
# query-interpreter
|
||||||
|
|
||||||
Core program to interpret query language strings into structured data, and back again.
|
Core program to interpret query language strings into structured data, and back again.
|
||||||
|
|
||||||
|
## SQL Tokens
|
||||||
|
|
||||||
|
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings.
|
||||||
|
Here are the general token types it defines:
|
||||||
|
|
||||||
|
```go
|
||||||
|
type TokenType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ERROR TokenType = iota
|
||||||
|
EOF
|
||||||
|
SPACE // space or newline
|
||||||
|
STRING // string literal
|
||||||
|
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
|
||||||
|
NUMBER // number literal
|
||||||
|
IDENT // identifier
|
||||||
|
QUOTED_IDENT // quoted identifier
|
||||||
|
OPERATOR // operator
|
||||||
|
WILDCARD // wildcard *
|
||||||
|
COMMENT // comment
|
||||||
|
MULTILINE_COMMENT // multiline comment
|
||||||
|
PUNCTUATION // punctuation
|
||||||
|
DOLLAR_QUOTED_FUNCTION // dollar quoted function
|
||||||
|
DOLLAR_QUOTED_STRING // dollar quoted string
|
||||||
|
POSITIONAL_PARAMETER // numbered parameter
|
||||||
|
BIND_PARAMETER // bind parameter
|
||||||
|
FUNCTION // function
|
||||||
|
SYSTEM_VARIABLE // system variable
|
||||||
|
UNKNOWN // unknown token
|
||||||
|
COMMAND // SQL commands like SELECT, INSERT
|
||||||
|
KEYWORD // Other SQL keywords
|
||||||
|
JSON_OP // JSON operators
|
||||||
|
BOOLEAN // boolean literal
|
||||||
|
NULL // null literal
|
||||||
|
PROC_INDICATOR // procedure indicator
|
||||||
|
CTE_INDICATOR // CTE indicator
|
||||||
|
ALIAS_INDICATOR // alias indicator
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Based on these different token types we will be able to parse out the details of said query into
|
||||||
|
structs to represent them. From those structs we will be able to modify their details any way that
|
||||||
|
we choose and reconstruct them into valid SQL statements.
|
||||||
|
130
main.go
130
main.go
@ -2,82 +2,90 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/DataDog/go-sqllexer"
|
"github.com/DataDog/go-sqllexer"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TokenGroupType int
|
type QueryType int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TOKEN_OTHER TokenGroupType = iota
|
NONE QueryType = iota
|
||||||
TOKEN_ACT // SELECT, CREATE, etc
|
SELECT
|
||||||
TOKEN_FILTER // WHERE, AND, etc
|
UPDATE
|
||||||
TOKEN_RELATE // joins
|
INSERT
|
||||||
TOKEN_DEFINE // column data types, etc
|
DELETE
|
||||||
TOKEN_CONTROL // sorting, etc
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
|
type Query struct {
|
||||||
sqllexer.ERROR: TOKEN_OTHER,
|
Type QueryType
|
||||||
sqllexer.EOF: TOKEN_OTHER,
|
FullSql string
|
||||||
sqllexer.SPACE: TOKEN_OTHER,
|
IsValid bool
|
||||||
sqllexer.STRING: TOKEN_DEFINE,
|
|
||||||
sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE,
|
|
||||||
sqllexer.NUMBER: TOKEN_DEFINE,
|
|
||||||
sqllexer.IDENT: TOKEN_ACT, // for table/column names
|
|
||||||
sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins
|
|
||||||
sqllexer.OPERATOR: TOKEN_FILTER,
|
|
||||||
sqllexer.WILDCARD: TOKEN_ACT,
|
|
||||||
sqllexer.COMMENT: TOKEN_OTHER,
|
|
||||||
sqllexer.MULTILINE_COMMENT: TOKEN_OTHER,
|
|
||||||
sqllexer.PUNCTUATION: TOKEN_OTHER,
|
|
||||||
sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
|
|
||||||
sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE,
|
|
||||||
sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER,
|
|
||||||
sqllexer.BIND_PARAMETER: TOKEN_FILTER,
|
|
||||||
sqllexer.FUNCTION: TOKEN_DEFINE,
|
|
||||||
sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE,
|
|
||||||
sqllexer.UNKNOWN: TOKEN_OTHER,
|
|
||||||
sqllexer.COMMAND: TOKEN_ACT,
|
|
||||||
sqllexer.KEYWORD: TOKEN_ACT,
|
|
||||||
sqllexer.JSON_OP: TOKEN_DEFINE,
|
|
||||||
sqllexer.BOOLEAN: TOKEN_DEFINE,
|
|
||||||
sqllexer.NULL: TOKEN_DEFINE,
|
|
||||||
sqllexer.PROC_INDICATOR: TOKEN_OTHER,
|
|
||||||
sqllexer.CTE_INDICATOR: TOKEN_OTHER,
|
|
||||||
sqllexer.ALIAS_INDICATOR: TOKEN_OTHER,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
|
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
|
||||||
group, exists := tokenGroupMap[tokenType]
|
return (token.Type == sqllexer.EOF || token.Value == ";")
|
||||||
isStart := false
|
|
||||||
isEnd := false
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
group = TOKEN_OTHER
|
|
||||||
}
|
|
||||||
|
|
||||||
if group == TOKEN_ACT {
|
|
||||||
isStart = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if tokenType == sqllexer.EOF ||
|
|
||||||
(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
|
|
||||||
isEnd = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return group, isStart, isEnd
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
|
||||||
query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
|
if token.Type != sqllexer.COMMAND {
|
||||||
lexer := sqllexer.New(query)
|
return NONE
|
||||||
|
}
|
||||||
|
|
||||||
|
var foundType QueryType
|
||||||
|
switch strings.ToUpper(token.Value) {
|
||||||
|
case "SELECT":
|
||||||
|
foundType = SELECT
|
||||||
|
case "UPDATE":
|
||||||
|
foundType = UPDATE
|
||||||
|
case "INSERT":
|
||||||
|
foundType = INSERT
|
||||||
|
case "DELETE":
|
||||||
|
foundType = DELETE
|
||||||
|
default:
|
||||||
|
foundType = NONE
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundType
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsCrudSqlStatement(token *sqllexer.Token) bool {
|
||||||
|
queryType := GetQueryTypeFromToken(token)
|
||||||
|
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetQueryTypeFromSql(sql string) QueryType {
|
||||||
|
var queryType QueryType
|
||||||
|
|
||||||
|
lexer := sqllexer.New(sql)
|
||||||
for {
|
for {
|
||||||
token := lexer.Scan()
|
token := lexer.Scan()
|
||||||
tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
|
if IsTokenEndOfStatement(token) {
|
||||||
fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
|
break
|
||||||
|
}
|
||||||
|
|
||||||
if token.Type == sqllexer.EOF {
|
queryType = GetQueryTypeFromToken(token)
|
||||||
|
if queryType > 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return queryType
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
|
||||||
|
newQuery := GetQueryTypeFromSql(query)
|
||||||
|
|
||||||
|
fmt.Println(newQuery)
|
||||||
|
|
||||||
|
//lexer := sqllexer.New(query)
|
||||||
|
//for {
|
||||||
|
// token := lexer.Scan()
|
||||||
|
// fmt.Println(token.Value, token.Type)
|
||||||
|
|
||||||
|
// if token.Type == sqllexer.EOF {
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user