feat: succesfully parse a simple select statement

This commit is contained in:
Yehoshua Sandler 2025-03-30 20:56:24 -05:00
parent d3f9600d58
commit c79fb6f165
4 changed files with 305 additions and 74 deletions

View File

@ -0,0 +1,37 @@
These are the SQL Token Types that will serve as the foundation on how we interpret SQL strings
and create our `Query` structs
```go
type TokenType int
const (
ERROR TokenType = iota // 0
EOF // 1
SPACE // 2 space or newline
STRING // 3 string literal
INCOMPLETE_STRING // 4 incomplete string literal so that we can obfuscate it, e.g. 'abc
NUMBER // 5 number literal
IDENT // 6 identifier column name
QUOTED_IDENT // 7 quoted identifier
OPERATOR // 8 operator like = > < >= <=
WILDCARD // 9 wildcard *
COMMENT // 10 comment
MULTILINE_COMMENT // 11 multiline comment
PUNCTUATION // 12 punctuation such as a comma
DOLLAR_QUOTED_FUNCTION // 13 dollar quoted function
DOLLAR_QUOTED_STRING // 14 dollar quoted string
POSITIONAL_PARAMETER // 15 numbered parameter
BIND_PARAMETER // 16 bind parameter
FUNCTION // 17 function
SYSTEM_VARIABLE // 18 system variable
UNKNOWN // 19 unknown token
COMMAND // 20 SQL commands like SELECT INSERT UPDATE DELETE
KEYWORD // 21 Other SQL keywords like FROM, WHERE, NOT, IS, LIKE
JSON_OP // 22 JSON operators
BOOLEAN // 23 boolean literal
NULL // 24 null literal
PROC_INDICATOR // 25 procedure indicator
CTE_INDICATOR // 26 CTE indicator
ALIAS_INDICATOR // 27 alias indicator
)
```

89
main.go
View File

@ -2,90 +2,31 @@ package main
import (
"fmt"
"strings"
"github.com/DataDog/go-sqllexer"
"query-inter/q"
// "github.com/DataDog/go-sqllexer"
)
type QueryType int
const (
NONE QueryType = iota
SELECT
UPDATE
INSERT
DELETE
)
type Query struct {
Type QueryType
FullSql string
IsValid bool
}
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
return (token.Type == sqllexer.EOF || token.Value == ";")
}
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
if token.Type != sqllexer.COMMAND {
return NONE
}
var foundType QueryType
switch strings.ToUpper(token.Value) {
case "SELECT":
foundType = SELECT
case "UPDATE":
foundType = UPDATE
case "INSERT":
foundType = INSERT
case "DELETE":
foundType = DELETE
default:
foundType = NONE
}
return foundType
}
func IsCrudSqlStatement(token *sqllexer.Token) bool {
queryType := GetQueryTypeFromToken(token)
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
}
func GetQueryTypeFromSql(sql string) QueryType {
var queryType QueryType
lexer := sqllexer.New(sql)
for {
token := lexer.Scan()
if IsTokenEndOfStatement(token) {
break
}
queryType = GetQueryTypeFromToken(token)
if queryType > 0 {
break
}
}
return queryType
}
func main() {
query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
newQuery := GetQueryTypeFromSql(query)
selectQuery := "SELECT id, name, createDate FROM users WHERE name=1;"
fmt.Println(newQuery)
allStatements := q.ExtractSqlStatmentsFromString(selectQuery)
fmt.Println(allStatements)
//lexer := sqllexer.New(query)
//lexer := sqllexer.New(selectQuery)
//for {
// token := lexer.Scan()
// fmt.Println(token.Value, token.Type)
//
// if token.Type == sqllexer.EOF {
// break
// }
//}
for _, sql := range allStatements {
query := q.ParseSelectStatement(sql)
//fmt.Print(i)
//fmt.Println(query)
fmt.Println(query.GetFullSql())
}
}

130
q/query.go Normal file
View File

@ -0,0 +1,130 @@
package q
import (
"strings"
"github.com/DataDog/go-sqllexer"
)
type Query interface {
GetFullSql() string
}
type QueryType int
const (
NONE QueryType = iota
SELECT
UPDATE
INSERT
DELETE
)
type Conditional struct {
Key string
Operator string
Value string
DataType string
Extension string // AND, OR, etc
}
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
if token.Type != sqllexer.COMMAND {
return NONE
}
var foundType QueryType
switch strings.ToUpper(token.Value) {
case "SELECT":
foundType = SELECT
case "UPDATE":
foundType = UPDATE
case "INSERT":
foundType = INSERT
case "DELETE":
foundType = DELETE
default:
foundType = NONE
}
return foundType
}
func IsCrudSqlStatement(token *sqllexer.Token) bool {
queryType := GetQueryTypeFromToken(token)
return (queryType > 0 && queryType <= 4) // TODO: Update if QueryTypes Change
}
func IsTokenBeginingOfStatement(currentToken *sqllexer.Token, previousToken *sqllexer.Token) bool {
return IsCrudSqlStatement(currentToken)
}
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
return (token.Type == sqllexer.EOF || token.Value == ";")
}
func GetQueryTypeFromSql(sql string) QueryType {
var queryType QueryType
lexer := sqllexer.New(sql)
for {
token := lexer.Scan()
if IsTokenEndOfStatement(token) {
break
}
queryType = GetQueryTypeFromToken(token)
if queryType > 0 {
break
}
}
return queryType
}
func ExtractSqlStatmentsFromString(sqlString string) []string {
var foundStatments []string
var isBeginingFound = false
var isEndingFound = false
var previousScannedToken sqllexer.Token
var currentWorkingStatment = ""
lexer := sqllexer.New(sqlString)
for {
token := lexer.Scan()
previousScannedToken = *token
if IsTokenEndOfStatement(token) {
isEndingFound = true
}
if isEndingFound {
if strings.Trim(currentWorkingStatment, " ") != "" {
foundStatments = append(foundStatments, currentWorkingStatment)
}
isBeginingFound = false
isEndingFound = false
currentWorkingStatment = ""
if token.Type == sqllexer.EOF {
break
} else {
continue
}
}
if !isBeginingFound && IsTokenBeginingOfStatement(token, &previousScannedToken) { // TODO: add logic that checks if begining is already found, if so an error should happen before here
isBeginingFound = true
} else if !isBeginingFound {
continue
}
currentWorkingStatment = currentWorkingStatment + token.Value
}
return foundStatments
}

123
q/select.go Normal file
View File

@ -0,0 +1,123 @@
package q
import (
"strings"
"github.com/DataDog/go-sqllexer"
)
type Select struct {
Table string
Columns []string
Conditionals []Conditional
IsWildcard bool
}
func (q *Select) GetFullSql() string {
var workingSqlSlice []string
workingSqlSlice = append(workingSqlSlice, "SELECT")
if q.IsWildcard {
workingSqlSlice = append(workingSqlSlice, "*")
} else {
for i, column := range q.Columns {
if i < (len(q.Columns) - 1) {
workingSqlSlice = append(workingSqlSlice, column+",")
} else {
workingSqlSlice = append(workingSqlSlice, column)
}
}
}
workingSqlSlice = append(workingSqlSlice, "FROM "+q.Table)
for _, condition := range q.Conditionals {
workingSqlSlice = append(workingSqlSlice, condition.Key)
workingSqlSlice = append(workingSqlSlice, condition.Operator)
workingSqlSlice = append(workingSqlSlice, condition.Value) // TODO: need to account for `AND` and `OR`s and stuff
}
fullSql := strings.Join(workingSqlSlice, " ")
return fullSql
}
func ParseSelectStatement(sql string) Select {
query := Select{}
passedSELECT := false
passedColumns := false
passedFROM := false
passedTable := false
passedWHERE := false
var workingConditional = Conditional{}
var columns []string
lexer := sqllexer.New(sql)
for {
token := lexer.Scan()
if IsTokenEndOfStatement(token) {
break
}
if !passedSELECT && strings.ToUpper(token.Value) != "SELECT" {
break
} else if !passedSELECT {
passedSELECT = true
continue
}
if !passedColumns {
if token.Type == sqllexer.WILDCARD {
passedColumns = true
columns = make([]string, 0)
query.IsWildcard = true
} else if token.Type == sqllexer.IDENT {
columns = append(columns, token.Value)
continue
} else if token.Type == sqllexer.PUNCTUATION || token.Type == sqllexer.SPACE {
continue
} else {
passedColumns = true // TODO: make sure that I should be doing this
query.Columns = columns
}
}
if !passedFROM && strings.ToUpper(token.Value) == "FROM" {
passedFROM = true
} else if !passedFROM {
continue // TODO: make sure to check for other keywords that are allowed
}
if !passedTable && token.Type == sqllexer.IDENT {
passedTable = true
query.Table = token.Value
} else if !passedTable {
continue
}
if !passedWHERE && strings.ToUpper(token.Value) == "WHERE" {
passedWHERE = true
} else if !passedWHERE {
continue
}
if token.Type == sqllexer.IDENT {
workingConditional.Key = token.Value
} else if token.Type == sqllexer.OPERATOR {
workingConditional.Operator = token.Value
} else if token.Type == sqllexer.BOOLEAN || token.Type == sqllexer.NULL || token.Type == sqllexer.STRING || token.Type == sqllexer.NUMBER {
workingConditional.Value = token.Value
} // TODO: add captire for data type
if workingConditional.Key != "" && workingConditional.Operator != "" && workingConditional.Value != "" {
query.Conditionals = append(query.Conditionals, workingConditional)
workingConditional = Conditional{}
continue
}
}
return query
}