thinking through data structure, please hold
This commit is contained in:
		
							parent
							
								
									518316c4f7
								
							
						
					
					
						commit
						d3f9600d58
					
				
							
								
								
									
										45
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										45
									
								
								README.md
									
									
									
									
									
								
							@ -1,3 +1,48 @@
 | 
				
			|||||||
# query-interpreter
 | 
					# query-interpreter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Core program to interpret query language strings into structured data, and back again.
 | 
					Core program to interpret query language strings into structured data, and back again.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## SQL Tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings. 
 | 
				
			||||||
 | 
					Here are the general token types it defines:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```go
 | 
				
			||||||
 | 
					type TokenType int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const (
 | 
				
			||||||
 | 
					 ERROR TokenType = iota
 | 
				
			||||||
 | 
					 EOF
 | 
				
			||||||
 | 
					 SPACE                  // space or newline
 | 
				
			||||||
 | 
					 STRING                 // string literal
 | 
				
			||||||
 | 
					 INCOMPLETE_STRING      // incomplete string literal so that we can obfuscate it, e.g. 'abc
 | 
				
			||||||
 | 
					 NUMBER                 // number literal
 | 
				
			||||||
 | 
					 IDENT                  // identifier
 | 
				
			||||||
 | 
					 QUOTED_IDENT           // quoted identifier
 | 
				
			||||||
 | 
					 OPERATOR               // operator
 | 
				
			||||||
 | 
					 WILDCARD               // wildcard *
 | 
				
			||||||
 | 
					 COMMENT                // comment
 | 
				
			||||||
 | 
					 MULTILINE_COMMENT      // multiline comment
 | 
				
			||||||
 | 
					 PUNCTUATION            // punctuation
 | 
				
			||||||
 | 
					 DOLLAR_QUOTED_FUNCTION // dollar quoted function
 | 
				
			||||||
 | 
					 DOLLAR_QUOTED_STRING   // dollar quoted string
 | 
				
			||||||
 | 
					 POSITIONAL_PARAMETER   // numbered parameter
 | 
				
			||||||
 | 
					 BIND_PARAMETER         // bind parameter
 | 
				
			||||||
 | 
					 FUNCTION               // function
 | 
				
			||||||
 | 
					 SYSTEM_VARIABLE        // system variable
 | 
				
			||||||
 | 
					 UNKNOWN                // unknown token
 | 
				
			||||||
 | 
					 COMMAND                // SQL commands like SELECT, INSERT
 | 
				
			||||||
 | 
					 KEYWORD                // Other SQL keywords
 | 
				
			||||||
 | 
					 JSON_OP                // JSON operators
 | 
				
			||||||
 | 
					 BOOLEAN                // boolean literal
 | 
				
			||||||
 | 
					 NULL                   // null literal
 | 
				
			||||||
 | 
					 PROC_INDICATOR         // procedure indicator
 | 
				
			||||||
 | 
					 CTE_INDICATOR          // CTE indicator
 | 
				
			||||||
 | 
					 ALIAS_INDICATOR        // alias indicator
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Based on these different token types we will be able to parse out the details of said query into 
 | 
				
			||||||
 | 
					structs to represent them. From those structs we will be able to modify their details any way that 
 | 
				
			||||||
 | 
					we choose and reconstruct them into valid SQL statements.
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										118
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										118
									
								
								main.go
									
									
									
									
									
								
							@ -2,82 +2,90 @@ package main
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"github.com/DataDog/go-sqllexer"
 | 
						"github.com/DataDog/go-sqllexer"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type TokenGroupType int
 | 
					type QueryType int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
	TOKEN_OTHER   TokenGroupType = iota
 | 
						NONE QueryType = iota
 | 
				
			||||||
	TOKEN_ACT                    // SELECT, CREATE, etc
 | 
						SELECT
 | 
				
			||||||
	TOKEN_FILTER                 // WHERE, AND, etc
 | 
						UPDATE
 | 
				
			||||||
	TOKEN_RELATE                 // joins
 | 
						INSERT
 | 
				
			||||||
	TOKEN_DEFINE                 // column data types, etc
 | 
						DELETE
 | 
				
			||||||
	TOKEN_CONTROL                // sorting, etc
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
 | 
					type Query struct {
 | 
				
			||||||
	sqllexer.ERROR:                  TOKEN_OTHER,
 | 
						Type    QueryType
 | 
				
			||||||
	sqllexer.EOF:                    TOKEN_OTHER,
 | 
						FullSql string
 | 
				
			||||||
	sqllexer.SPACE:                  TOKEN_OTHER,
 | 
						IsValid bool
 | 
				
			||||||
	sqllexer.STRING:                 TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.INCOMPLETE_STRING:      TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.NUMBER:                 TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.IDENT:                  TOKEN_ACT,    // for table/column names
 | 
					 | 
				
			||||||
	sqllexer.QUOTED_IDENT:           TOKEN_RELATE, // for joins
 | 
					 | 
				
			||||||
	sqllexer.OPERATOR:               TOKEN_FILTER,
 | 
					 | 
				
			||||||
	sqllexer.WILDCARD:               TOKEN_ACT,
 | 
					 | 
				
			||||||
	sqllexer.COMMENT:                TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.MULTILINE_COMMENT:      TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.PUNCTUATION:            TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.DOLLAR_QUOTED_STRING:   TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.POSITIONAL_PARAMETER:   TOKEN_FILTER,
 | 
					 | 
				
			||||||
	sqllexer.BIND_PARAMETER:         TOKEN_FILTER,
 | 
					 | 
				
			||||||
	sqllexer.FUNCTION:               TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.SYSTEM_VARIABLE:        TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.UNKNOWN:                TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.COMMAND:                TOKEN_ACT,
 | 
					 | 
				
			||||||
	sqllexer.KEYWORD:                TOKEN_ACT,
 | 
					 | 
				
			||||||
	sqllexer.JSON_OP:                TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.BOOLEAN:                TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.NULL:                   TOKEN_DEFINE,
 | 
					 | 
				
			||||||
	sqllexer.PROC_INDICATOR:         TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.CTE_INDICATOR:          TOKEN_OTHER,
 | 
					 | 
				
			||||||
	sqllexer.ALIAS_INDICATOR:        TOKEN_OTHER,
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
 | 
					func IsTokenEndOfStatement(token *sqllexer.Token) bool {
 | 
				
			||||||
	group, exists := tokenGroupMap[tokenType]
 | 
						return (token.Type == sqllexer.EOF || token.Value == ";")
 | 
				
			||||||
	isStart := false
 | 
					 | 
				
			||||||
	isEnd := false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if !exists {
 | 
					 | 
				
			||||||
		group = TOKEN_OTHER
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if group == TOKEN_ACT {
 | 
					func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
 | 
				
			||||||
		isStart = true
 | 
						if token.Type != sqllexer.COMMAND {
 | 
				
			||||||
 | 
							return NONE
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if tokenType == sqllexer.EOF ||
 | 
						var foundType QueryType
 | 
				
			||||||
		(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
 | 
						switch strings.ToUpper(token.Value) {
 | 
				
			||||||
		isEnd = true
 | 
						case "SELECT":
 | 
				
			||||||
 | 
							foundType = SELECT
 | 
				
			||||||
 | 
						case "UPDATE":
 | 
				
			||||||
 | 
							foundType = UPDATE
 | 
				
			||||||
 | 
						case "INSERT":
 | 
				
			||||||
 | 
							foundType = INSERT
 | 
				
			||||||
 | 
						case "DELETE":
 | 
				
			||||||
 | 
							foundType = DELETE
 | 
				
			||||||
 | 
						default:
 | 
				
			||||||
 | 
							foundType = NONE
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return group, isStart, isEnd
 | 
						return foundType
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func main() {
 | 
					func IsCrudSqlStatement(token *sqllexer.Token) bool {
 | 
				
			||||||
	query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
 | 
						queryType := GetQueryTypeFromToken(token)
 | 
				
			||||||
	lexer := sqllexer.New(query)
 | 
						return (queryType > 0 && queryType <= 4) // TODO:  Update if QueryTypes Change
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func GetQueryTypeFromSql(sql string) QueryType {
 | 
				
			||||||
 | 
						var queryType QueryType
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lexer := sqllexer.New(sql)
 | 
				
			||||||
	for {
 | 
						for {
 | 
				
			||||||
		token := lexer.Scan()
 | 
							token := lexer.Scan()
 | 
				
			||||||
		tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
 | 
							if IsTokenEndOfStatement(token) {
 | 
				
			||||||
		fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
 | 
								break
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if token.Type == sqllexer.EOF {
 | 
							queryType = GetQueryTypeFromToken(token)
 | 
				
			||||||
 | 
							if queryType > 0 {
 | 
				
			||||||
			break
 | 
								break
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return queryType
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func main() {
 | 
				
			||||||
 | 
						query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
 | 
				
			||||||
 | 
						newQuery := GetQueryTypeFromSql(query)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fmt.Println(newQuery)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//lexer := sqllexer.New(query)
 | 
				
			||||||
 | 
						//for {
 | 
				
			||||||
 | 
						//	token := lexer.Scan()
 | 
				
			||||||
 | 
						//	fmt.Println(token.Value, token.Type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//	if token.Type == sqllexer.EOF {
 | 
				
			||||||
 | 
						//		break
 | 
				
			||||||
 | 
						//	}
 | 
				
			||||||
 | 
						//}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user