diff --git a/README.md b/README.md index d3f51b3..76c70ba 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,48 @@ # query-interpreter -Core program to interpret query language strings into structured data, and back again. \ No newline at end of file +Core program to interpret query language strings into structured data, and back again. + +## SQL Tokens + +We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings. +Here are the general token types it defines: + +```go +type TokenType int + +const ( + ERROR TokenType = iota + EOF + SPACE // space or newline + STRING // string literal + INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc + NUMBER // number literal + IDENT // identifier + QUOTED_IDENT // quoted identifier + OPERATOR // operator + WILDCARD // wildcard * + COMMENT // comment + MULTILINE_COMMENT // multiline comment + PUNCTUATION // punctuation + DOLLAR_QUOTED_FUNCTION // dollar quoted function + DOLLAR_QUOTED_STRING // dollar quoted string + POSITIONAL_PARAMETER // numbered parameter + BIND_PARAMETER // bind parameter + FUNCTION // function + SYSTEM_VARIABLE // system variable + UNKNOWN // unknown token + COMMAND // SQL commands like SELECT, INSERT + KEYWORD // Other SQL keywords + JSON_OP // JSON operators + BOOLEAN // boolean literal + NULL // null literal + PROC_INDICATOR // procedure indicator + CTE_INDICATOR // CTE indicator + ALIAS_INDICATOR // alias indicator +) + +``` + +Based on these different token types we will be able to parse out the details of said query into +structs to represent them. From those structs we will be able to modify their details any way that +we choose and reconstruct them into valid SQL statements. diff --git a/main.go b/main.go index 3b0129b..ac1db8f 100644 --- a/main.go +++ b/main.go @@ -2,82 +2,90 @@ package main import ( "fmt" + "strings" + "github.com/DataDog/go-sqllexer" ) -type TokenGroupType int +type QueryType int const ( - TOKEN_OTHER TokenGroupType = iota - TOKEN_ACT // SELECT, CREATE, etc - TOKEN_FILTER // WHERE, AND, etc - TOKEN_RELATE // joins - TOKEN_DEFINE // column data types, etc - TOKEN_CONTROL // sorting, etc + NONE QueryType = iota + SELECT + UPDATE + INSERT + DELETE ) -var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{ - sqllexer.ERROR: TOKEN_OTHER, - sqllexer.EOF: TOKEN_OTHER, - sqllexer.SPACE: TOKEN_OTHER, - sqllexer.STRING: TOKEN_DEFINE, - sqllexer.INCOMPLETE_STRING: TOKEN_DEFINE, - sqllexer.NUMBER: TOKEN_DEFINE, - sqllexer.IDENT: TOKEN_ACT, // for table/column names - sqllexer.QUOTED_IDENT: TOKEN_RELATE, // for joins - sqllexer.OPERATOR: TOKEN_FILTER, - sqllexer.WILDCARD: TOKEN_ACT, - sqllexer.COMMENT: TOKEN_OTHER, - sqllexer.MULTILINE_COMMENT: TOKEN_OTHER, - sqllexer.PUNCTUATION: TOKEN_OTHER, - sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE, - sqllexer.DOLLAR_QUOTED_STRING: TOKEN_DEFINE, - sqllexer.POSITIONAL_PARAMETER: TOKEN_FILTER, - sqllexer.BIND_PARAMETER: TOKEN_FILTER, - sqllexer.FUNCTION: TOKEN_DEFINE, - sqllexer.SYSTEM_VARIABLE: TOKEN_DEFINE, - sqllexer.UNKNOWN: TOKEN_OTHER, - sqllexer.COMMAND: TOKEN_ACT, - sqllexer.KEYWORD: TOKEN_ACT, - sqllexer.JSON_OP: TOKEN_DEFINE, - sqllexer.BOOLEAN: TOKEN_DEFINE, - sqllexer.NULL: TOKEN_DEFINE, - sqllexer.PROC_INDICATOR: TOKEN_OTHER, - sqllexer.CTE_INDICATOR: TOKEN_OTHER, - sqllexer.ALIAS_INDICATOR: TOKEN_OTHER, +type Query struct { + Type QueryType + FullSql string + IsValid bool } -func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) { - group, exists := tokenGroupMap[tokenType] - isStart := false - isEnd := false - - if !exists { - group = TOKEN_OTHER - } - - if group == TOKEN_ACT { - isStart = true - } - - if tokenType == sqllexer.EOF || - (group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) { - isEnd = true - } - - return group, isStart, isEnd +func IsTokenEndOfStatement(token *sqllexer.Token) bool { + return (token.Type == sqllexer.EOF || token.Value == ";") } -func main() { - query := "SELECT * FROM users \n WHERE id = something AND SELECT;" - lexer := sqllexer.New(query) +func GetQueryTypeFromToken(token *sqllexer.Token) QueryType { + if token.Type != sqllexer.COMMAND { + return NONE + } + + var foundType QueryType + switch strings.ToUpper(token.Value) { + case "SELECT": + foundType = SELECT + case "UPDATE": + foundType = UPDATE + case "INSERT": + foundType = INSERT + case "DELETE": + foundType = DELETE + default: + foundType = NONE + } + + return foundType +} + +func IsCrudSqlStatement(token *sqllexer.Token) bool { + queryType := GetQueryTypeFromToken(token) + return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change +} + +func GetQueryTypeFromSql(sql string) QueryType { + var queryType QueryType + + lexer := sqllexer.New(sql) for { token := lexer.Scan() - tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type) - fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd) + if IsTokenEndOfStatement(token) { + break + } - if token.Type == sqllexer.EOF { + queryType = GetQueryTypeFromToken(token) + if queryType > 0 { break } } + + return queryType +} + +func main() { + query := "DELETE * FROM users \n WHERE id = something AND SELECT;" + newQuery := GetQueryTypeFromSql(query) + + fmt.Println(newQuery) + + //lexer := sqllexer.New(query) + //for { + // token := lexer.Scan() + // fmt.Println(token.Value, token.Type) + + // if token.Type == sqllexer.EOF { + // break + // } + //} }