Compare commits
	
		
			2 Commits
		
	
	
		
			8e07b63877
			...
			87bc13631a
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 87bc13631a | |||
| e08c84de3a | 
							
								
								
									
										101
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										101
									
								
								README.md
									
									
									
									
									
								
							@ -1,5 +1,9 @@
 | 
			
		||||
# query-interpreter
 | 
			
		||||
 | 
			
		||||
**README LAST UPDATED: 04-15-25**
 | 
			
		||||
 | 
			
		||||
    This project is under active development and is subject to change often and drastically as I am likely an idiot.
 | 
			
		||||
 | 
			
		||||
Core program to interpret query language strings into structured data, and back again.
 | 
			
		||||
 | 
			
		||||
## Data Structure Philosophy
 | 
			
		||||
@ -27,54 +31,69 @@ new statements altogether.
 | 
			
		||||
## SQL Tokens
 | 
			
		||||
 | 
			
		||||
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings. 
 | 
			
		||||
Here are the general token types it defines:
 | 
			
		||||
The general token types it defines can be found [here](/docs/SQL_Token_Types.md)
 | 
			
		||||
 | 
			
		||||
```go
 | 
			
		||||
type TokenType int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
 ERROR TokenType = iota
 | 
			
		||||
 EOF
 | 
			
		||||
 SPACE                  // space or newline
 | 
			
		||||
 STRING                 // string literal
 | 
			
		||||
 INCOMPLETE_STRING      // incomplete string literal so that we can obfuscate it, e.g. 'abc
 | 
			
		||||
 NUMBER                 // number literal
 | 
			
		||||
 IDENT                  // identifier
 | 
			
		||||
 QUOTED_IDENT           // quoted identifier
 | 
			
		||||
 OPERATOR               // operator
 | 
			
		||||
 WILDCARD               // wildcard *
 | 
			
		||||
 COMMENT                // comment
 | 
			
		||||
 MULTILINE_COMMENT      // multiline comment
 | 
			
		||||
 PUNCTUATION            // punctuation
 | 
			
		||||
 DOLLAR_QUOTED_FUNCTION // dollar quoted function
 | 
			
		||||
 DOLLAR_QUOTED_STRING   // dollar quoted string
 | 
			
		||||
 POSITIONAL_PARAMETER   // numbered parameter
 | 
			
		||||
 BIND_PARAMETER         // bind parameter
 | 
			
		||||
 FUNCTION               // function
 | 
			
		||||
 SYSTEM_VARIABLE        // system variable
 | 
			
		||||
 UNKNOWN                // unknown token
 | 
			
		||||
 COMMAND                // SQL commands like SELECT, INSERT
 | 
			
		||||
 KEYWORD                // Other SQL keywords
 | 
			
		||||
 JSON_OP                // JSON operators
 | 
			
		||||
 BOOLEAN                // boolean literal
 | 
			
		||||
 NULL                   // null literal
 | 
			
		||||
 PROC_INDICATOR         // procedure indicator
 | 
			
		||||
 CTE_INDICATOR          // CTE indicator
 | 
			
		||||
 ALIAS_INDICATOR        // alias indicator
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
These are an OK generalizer to start with when trying to parse out SQL, but can not be used 
 | 
			
		||||
without conditional logic that checks what the actual keywords are.
 | 
			
		||||
without some extra conditional logic that checks what the actual values are.
 | 
			
		||||
 | 
			
		||||
Currently we scan through the strings to tokenize it. When steping through the tokens we try
 | 
			
		||||
to determine the type of query we are working with. At that point we assume the over structure 
 | 
			
		||||
Currently we scan through the strings to tokenize it. When stepping through the tokens we try
 | 
			
		||||
to determine the type of query we are working with. At that point we assume the over all structure 
 | 
			
		||||
of the rest of the of the statement to fit a particular format, then parse out the details of 
 | 
			
		||||
the statement into the struct correlating to its data type.
 | 
			
		||||
 | 
			
		||||
Checkout the function `ParseSelectStatement` from `q/select.go` as an example.
 | 
			
		||||
## Scan State
 | 
			
		||||
 | 
			
		||||
As stated, we scan through the strings, processing each each chunk, delineated by spaces and
 | 
			
		||||
punctuation, as a token. To properly interpret the tokens from their broad `token.Type`s, we 
 | 
			
		||||
have to keep state of what else we have processed so far. 
 | 
			
		||||
 | 
			
		||||
This state is determined by a set off flags depending on query type.
 | 
			
		||||
 | 
			
		||||
For example, a Select query will have:
 | 
			
		||||
```go
 | 
			
		||||
	passedSELECT := false
 | 
			
		||||
	passedColumns := false
 | 
			
		||||
	passedFROM := false
 | 
			
		||||
	passedTable := false
 | 
			
		||||
	passedWHERE := false
 | 
			
		||||
	passedConditionals := false
 | 
			
		||||
	passedOrderByKeywords := false
 | 
			
		||||
	passesOrderByColumns := false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The general philosophy for these flags is to name, and use, them in the context of what has 
 | 
			
		||||
already been processed through the scan. Making naming and reading new flags trivial.
 | 
			
		||||
 | 
			
		||||
A `Select` object is shaped as the following:
 | 
			
		||||
```go
 | 
			
		||||
type Select struct {
 | 
			
		||||
	Table        string
 | 
			
		||||
	Columns      []string
 | 
			
		||||
	Conditionals []Conditional
 | 
			
		||||
	OrderBys     []OrderBy
 | 
			
		||||
	IsWildcard   bool
 | 
			
		||||
	IsDistinct   bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//dependency in query.go
 | 
			
		||||
type Conditional struct {
 | 
			
		||||
	Key       string
 | 
			
		||||
	Operator  string
 | 
			
		||||
	Value     string
 | 
			
		||||
	DataType  string
 | 
			
		||||
	Extension string // AND, OR, etc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
type OrderBy struct {
 | 
			
		||||
	Key       string
 | 
			
		||||
	IsDescend bool // SQL queries with no ASC|DESC on their ORDER BY are ASC by default, hence why this bool for the opposite
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Improvement Possibilities
 | 
			
		||||
 | 
			
		||||
- want to to cut down as many `scan`s as possible by injecting functional dependencies
 | 
			
		||||
- Maybe utilize the `lookBehindBuffer` more to cut down the number of state flags in the scans?
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -24,7 +24,7 @@ type Conditional struct {
 | 
			
		||||
	Key       string
 | 
			
		||||
	Operator  string
 | 
			
		||||
	Value     string
 | 
			
		||||
	DataType  string
 | 
			
		||||
	DataType  string // TODO: not something we can parse from string, but find a way to determine this later
 | 
			
		||||
	Extension string // AND, OR, etc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -52,7 +52,7 @@ func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
 | 
			
		||||
 | 
			
		||||
func IsCrudSqlStatement(token *sqllexer.Token) bool {
 | 
			
		||||
	queryType := GetQueryTypeFromToken(token)
 | 
			
		||||
	return (queryType > 0 && queryType <= 4) // TODO:  Update if QueryTypes Change
 | 
			
		||||
	return (queryType > 0 && queryType <= 4)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func IsTokenBeginingOfStatement(currentToken *sqllexer.Token, previousToken *sqllexer.Token) bool {
 | 
			
		||||
@ -114,10 +114,9 @@ func ExtractSqlStatmentsFromString(sqlString string) []string {
 | 
			
		||||
			} else {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if !isBeginingFound && IsTokenBeginingOfStatement(token, &previousScannedToken) { // TODO: add logic that checks if begining is already found, if so an error should happen before here
 | 
			
		||||
		if !isBeginingFound && IsTokenBeginingOfStatement(token, &previousScannedToken) {
 | 
			
		||||
			isBeginingFound = true
 | 
			
		||||
		} else if !isBeginingFound {
 | 
			
		||||
			continue
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										11
									
								
								q/select.go
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								q/select.go
									
									
									
									
									
								
							@ -1,13 +1,11 @@
 | 
			
		||||
package q
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	//	"fmt"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"github.com/DataDog/go-sqllexer"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// 126 rich mar drive
 | 
			
		||||
type Select struct {
 | 
			
		||||
	Table        string
 | 
			
		||||
	Columns      []string
 | 
			
		||||
@ -60,7 +58,6 @@ func mutateSelectFromKeyword(query *Select, keyword string) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO: make this an array of tokens instead
 | 
			
		||||
func unshiftBuffer(buf *[10]sqllexer.Token, value sqllexer.Token) {
 | 
			
		||||
	for i := 9; i >= 1; i-- {
 | 
			
		||||
		buf[i] = buf[i-1]
 | 
			
		||||
@ -80,9 +77,8 @@ func ParseSelectStatement(sql string) Select {
 | 
			
		||||
	passedConditionals := false
 | 
			
		||||
	passedOrderByKeywords := false
 | 
			
		||||
	passesOrderByColumns := false
 | 
			
		||||
	//checkForOrderDirection := false
 | 
			
		||||
 | 
			
		||||
	lookBehindBuffer := [10]sqllexer.Token{} // TODO: make this an array of tokens instead
 | 
			
		||||
	lookBehindBuffer := [10]sqllexer.Token{}
 | 
			
		||||
	var workingConditional = Conditional{}
 | 
			
		||||
 | 
			
		||||
	var columns []string
 | 
			
		||||
@ -124,7 +120,8 @@ func ParseSelectStatement(sql string) Select {
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if !passedFROM && strings.ToUpper(token.Value) == "FROM" { // TODO: make sure to check for other keywords that are allowed
 | 
			
		||||
		// TODO: make sure to check for other keywords that are allowed
 | 
			
		||||
		if !passedFROM && strings.ToUpper(token.Value) == "FROM" {
 | 
			
		||||
			passedFROM = true
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
@ -149,7 +146,7 @@ func ParseSelectStatement(sql string) Select {
 | 
			
		||||
				workingConditional.Operator = token.Value
 | 
			
		||||
			} else if token.Type == sqllexer.BOOLEAN || token.Type == sqllexer.NULL || token.Type == sqllexer.STRING || token.Type == sqllexer.NUMBER {
 | 
			
		||||
				workingConditional.Value = token.Value
 | 
			
		||||
			} // TODO: add captire for data type
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if workingConditional.Key != "" && workingConditional.Operator != "" && workingConditional.Value != "" {
 | 
			
		||||
				query.Conditionals = append(query.Conditionals, workingConditional)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user