From c79fb6f1653b8a3d7b474e1a5b795b91f45b2c1c Mon Sep 17 00:00:00 2001 From: ysandler Date: Sun, 30 Mar 2025 20:56:24 -0500 Subject: [PATCH] feat: succesfully parse a simple select statement --- .aiContexts/SQL_Token_Types.md | 37 ++++++++++ main.go | 89 ++++------------------ q/query.go | 130 +++++++++++++++++++++++++++++++++ q/select.go | 123 +++++++++++++++++++++++++++++++ 4 files changed, 305 insertions(+), 74 deletions(-) create mode 100644 .aiContexts/SQL_Token_Types.md create mode 100644 q/query.go create mode 100644 q/select.go diff --git a/.aiContexts/SQL_Token_Types.md b/.aiContexts/SQL_Token_Types.md new file mode 100644 index 0000000..b9d2a73 --- /dev/null +++ b/.aiContexts/SQL_Token_Types.md @@ -0,0 +1,37 @@ +These are the SQL Token Types that will serve as the foundation on how we interpret SQL strings +and create our `Query` structs + +```go +type TokenType int + +const ( + ERROR TokenType = iota // 0 + EOF // 1 + SPACE // 2 space or newline + STRING // 3 string literal + INCOMPLETE_STRING // 4 incomplete string literal so that we can obfuscate it, e.g. 'abc + NUMBER // 5 number literal + IDENT // 6 identifier column name + QUOTED_IDENT // 7 quoted identifier + OPERATOR // 8 operator like = > < >= <= + WILDCARD // 9 wildcard * + COMMENT // 10 comment + MULTILINE_COMMENT // 11 multiline comment + PUNCTUATION // 12 punctuation such as a comma + DOLLAR_QUOTED_FUNCTION // 13 dollar quoted function + DOLLAR_QUOTED_STRING // 14 dollar quoted string + POSITIONAL_PARAMETER // 15 numbered parameter + BIND_PARAMETER // 16 bind parameter + FUNCTION // 17 function + SYSTEM_VARIABLE // 18 system variable + UNKNOWN // 19 unknown token + COMMAND // 20 SQL commands like SELECT INSERT UPDATE DELETE + KEYWORD // 21 Other SQL keywords like FROM, WHERE, NOT, IS, LIKE + JSON_OP // 22 JSON operators + BOOLEAN // 23 boolean literal + NULL // 24 null literal + PROC_INDICATOR // 25 procedure indicator + CTE_INDICATOR // 26 CTE indicator + ALIAS_INDICATOR // 27 alias indicator +) +``` diff --git a/main.go b/main.go index ac1db8f..cd0c312 100644 --- a/main.go +++ b/main.go @@ -2,90 +2,31 @@ package main import ( "fmt" - "strings" - - "github.com/DataDog/go-sqllexer" + "query-inter/q" + // "github.com/DataDog/go-sqllexer" ) -type QueryType int - -const ( - NONE QueryType = iota - SELECT - UPDATE - INSERT - DELETE -) - -type Query struct { - Type QueryType - FullSql string - IsValid bool -} - -func IsTokenEndOfStatement(token *sqllexer.Token) bool { - return (token.Type == sqllexer.EOF || token.Value == ";") -} - -func GetQueryTypeFromToken(token *sqllexer.Token) QueryType { - if token.Type != sqllexer.COMMAND { - return NONE - } - - var foundType QueryType - switch strings.ToUpper(token.Value) { - case "SELECT": - foundType = SELECT - case "UPDATE": - foundType = UPDATE - case "INSERT": - foundType = INSERT - case "DELETE": - foundType = DELETE - default: - foundType = NONE - } - - return foundType -} - -func IsCrudSqlStatement(token *sqllexer.Token) bool { - queryType := GetQueryTypeFromToken(token) - return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change -} - -func GetQueryTypeFromSql(sql string) QueryType { - var queryType QueryType - - lexer := sqllexer.New(sql) - for { - token := lexer.Scan() - if IsTokenEndOfStatement(token) { - break - } - - queryType = GetQueryTypeFromToken(token) - if queryType > 0 { - break - } - } - - return queryType -} - func main() { - query := "DELETE * FROM users \n WHERE id = something AND SELECT;" - newQuery := GetQueryTypeFromSql(query) + selectQuery := "SELECT id, name, createDate FROM users WHERE name=1;" - fmt.Println(newQuery) + allStatements := q.ExtractSqlStatmentsFromString(selectQuery) + fmt.Println(allStatements) - //lexer := sqllexer.New(query) + //lexer := sqllexer.New(selectQuery) //for { // token := lexer.Scan() // fmt.Println(token.Value, token.Type) - + // // if token.Type == sqllexer.EOF { // break // } //} + + for _, sql := range allStatements { + query := q.ParseSelectStatement(sql) + //fmt.Print(i) + //fmt.Println(query) + fmt.Println(query.GetFullSql()) + } + } diff --git a/q/query.go b/q/query.go new file mode 100644 index 0000000..38040f2 --- /dev/null +++ b/q/query.go @@ -0,0 +1,130 @@ +package q + +import ( + "strings" + + "github.com/DataDog/go-sqllexer" +) + +type Query interface { + GetFullSql() string +} + +type QueryType int + +const ( + NONE QueryType = iota + SELECT + UPDATE + INSERT + DELETE +) + +type Conditional struct { + Key string + Operator string + Value string + DataType string + Extension string // AND, OR, etc +} + +func GetQueryTypeFromToken(token *sqllexer.Token) QueryType { + if token.Type != sqllexer.COMMAND { + return NONE + } + + var foundType QueryType + switch strings.ToUpper(token.Value) { + case "SELECT": + foundType = SELECT + case "UPDATE": + foundType = UPDATE + case "INSERT": + foundType = INSERT + case "DELETE": + foundType = DELETE + default: + foundType = NONE + } + + return foundType +} + +func IsCrudSqlStatement(token *sqllexer.Token) bool { + queryType := GetQueryTypeFromToken(token) + return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change +} + +func IsTokenBeginingOfStatement(currentToken *sqllexer.Token, previousToken *sqllexer.Token) bool { + return IsCrudSqlStatement(currentToken) +} + +func IsTokenEndOfStatement(token *sqllexer.Token) bool { + return (token.Type == sqllexer.EOF || token.Value == ";") +} + +func GetQueryTypeFromSql(sql string) QueryType { + var queryType QueryType + + lexer := sqllexer.New(sql) + for { + token := lexer.Scan() + if IsTokenEndOfStatement(token) { + break + } + + queryType = GetQueryTypeFromToken(token) + if queryType > 0 { + break + } + } + + return queryType +} + +func ExtractSqlStatmentsFromString(sqlString string) []string { + var foundStatments []string + + var isBeginingFound = false + var isEndingFound = false + + var previousScannedToken sqllexer.Token + var currentWorkingStatment = "" + + lexer := sqllexer.New(sqlString) + for { + token := lexer.Scan() + previousScannedToken = *token + + if IsTokenEndOfStatement(token) { + isEndingFound = true + } + + if isEndingFound { + if strings.Trim(currentWorkingStatment, " ") != "" { + foundStatments = append(foundStatments, currentWorkingStatment) + } + + isBeginingFound = false + isEndingFound = false + currentWorkingStatment = "" + + if token.Type == sqllexer.EOF { + break + } else { + continue + } + + } + + if !isBeginingFound && IsTokenBeginingOfStatement(token, &previousScannedToken) { // TODO: add logic that checks if begining is already found, if so an error should happen before here + isBeginingFound = true + } else if !isBeginingFound { + continue + } + + currentWorkingStatment = currentWorkingStatment + token.Value + } + + return foundStatments +} diff --git a/q/select.go b/q/select.go new file mode 100644 index 0000000..00334cc --- /dev/null +++ b/q/select.go @@ -0,0 +1,123 @@ +package q + +import ( + "strings" + + "github.com/DataDog/go-sqllexer" +) + +type Select struct { + Table string + Columns []string + Conditionals []Conditional + IsWildcard bool +} + +func (q *Select) GetFullSql() string { + var workingSqlSlice []string + + workingSqlSlice = append(workingSqlSlice, "SELECT") + + if q.IsWildcard { + workingSqlSlice = append(workingSqlSlice, "*") + } else { + for i, column := range q.Columns { + if i < (len(q.Columns) - 1) { + workingSqlSlice = append(workingSqlSlice, column+",") + } else { + workingSqlSlice = append(workingSqlSlice, column) + } + } + } + + workingSqlSlice = append(workingSqlSlice, "FROM "+q.Table) + + for _, condition := range q.Conditionals { + workingSqlSlice = append(workingSqlSlice, condition.Key) + workingSqlSlice = append(workingSqlSlice, condition.Operator) + workingSqlSlice = append(workingSqlSlice, condition.Value) // TODO: need to account for `AND` and `OR`s and stuff + } + + fullSql := strings.Join(workingSqlSlice, " ") + + return fullSql +} + +func ParseSelectStatement(sql string) Select { + query := Select{} + + passedSELECT := false + passedColumns := false + passedFROM := false + passedTable := false + passedWHERE := false + + var workingConditional = Conditional{} + + var columns []string + lexer := sqllexer.New(sql) + for { + token := lexer.Scan() + if IsTokenEndOfStatement(token) { + break + } + + if !passedSELECT && strings.ToUpper(token.Value) != "SELECT" { + break + } else if !passedSELECT { + passedSELECT = true + continue + } + + if !passedColumns { + if token.Type == sqllexer.WILDCARD { + passedColumns = true + columns = make([]string, 0) + query.IsWildcard = true + } else if token.Type == sqllexer.IDENT { + columns = append(columns, token.Value) + continue + } else if token.Type == sqllexer.PUNCTUATION || token.Type == sqllexer.SPACE { + continue + } else { + passedColumns = true // TODO: make sure that I should be doing this + query.Columns = columns + } + } + + if !passedFROM && strings.ToUpper(token.Value) == "FROM" { + passedFROM = true + } else if !passedFROM { + continue // TODO: make sure to check for other keywords that are allowed + } + + if !passedTable && token.Type == sqllexer.IDENT { + passedTable = true + query.Table = token.Value + } else if !passedTable { + continue + } + + if !passedWHERE && strings.ToUpper(token.Value) == "WHERE" { + passedWHERE = true + } else if !passedWHERE { + continue + } + + if token.Type == sqllexer.IDENT { + workingConditional.Key = token.Value + } else if token.Type == sqllexer.OPERATOR { + workingConditional.Operator = token.Value + } else if token.Type == sqllexer.BOOLEAN || token.Type == sqllexer.NULL || token.Type == sqllexer.STRING || token.Type == sqllexer.NUMBER { + workingConditional.Value = token.Value + } // TODO: add captire for data type + + if workingConditional.Key != "" && workingConditional.Operator != "" && workingConditional.Value != "" { + query.Conditionals = append(query.Conditionals, workingConditional) + workingConditional = Conditional{} + continue + } + } + + return query +}