thinking through data structure, please hold
This commit is contained in:
		
							parent
							
								
									518316c4f7
								
							
						
					
					
						commit
						d3f9600d58
					
				
							
								
								
									
										47
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										47
									
								
								README.md
									
									
									
									
									
								
							@ -1,3 +1,48 @@
 | 
			
		||||
# query-interpreter
 | 
			
		||||
 | 
			
		||||
Core program to interpret query language strings into structured data, and back again.
 | 
			
		||||
Core program to interpret query language strings into structured data, and back again.
 | 
			
		||||
 | 
			
		||||
## SQL Tokens
 | 
			
		||||
 | 
			
		||||
We are currently using DataDog's SQL Tokenizer `sqllexer` to scan through SQL strings. 
 | 
			
		||||
Here are the general token types it defines:
 | 
			
		||||
 | 
			
		||||
```go
 | 
			
		||||
type TokenType int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
 ERROR TokenType = iota
 | 
			
		||||
 EOF
 | 
			
		||||
 SPACE                  // space or newline
 | 
			
		||||
 STRING                 // string literal
 | 
			
		||||
 INCOMPLETE_STRING      // incomplete string literal so that we can obfuscate it, e.g. 'abc
 | 
			
		||||
 NUMBER                 // number literal
 | 
			
		||||
 IDENT                  // identifier
 | 
			
		||||
 QUOTED_IDENT           // quoted identifier
 | 
			
		||||
 OPERATOR               // operator
 | 
			
		||||
 WILDCARD               // wildcard *
 | 
			
		||||
 COMMENT                // comment
 | 
			
		||||
 MULTILINE_COMMENT      // multiline comment
 | 
			
		||||
 PUNCTUATION            // punctuation
 | 
			
		||||
 DOLLAR_QUOTED_FUNCTION // dollar quoted function
 | 
			
		||||
 DOLLAR_QUOTED_STRING   // dollar quoted string
 | 
			
		||||
 POSITIONAL_PARAMETER   // numbered parameter
 | 
			
		||||
 BIND_PARAMETER         // bind parameter
 | 
			
		||||
 FUNCTION               // function
 | 
			
		||||
 SYSTEM_VARIABLE        // system variable
 | 
			
		||||
 UNKNOWN                // unknown token
 | 
			
		||||
 COMMAND                // SQL commands like SELECT, INSERT
 | 
			
		||||
 KEYWORD                // Other SQL keywords
 | 
			
		||||
 JSON_OP                // JSON operators
 | 
			
		||||
 BOOLEAN                // boolean literal
 | 
			
		||||
 NULL                   // null literal
 | 
			
		||||
 PROC_INDICATOR         // procedure indicator
 | 
			
		||||
 CTE_INDICATOR          // CTE indicator
 | 
			
		||||
 ALIAS_INDICATOR        // alias indicator
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Based on these different token types we will be able to parse out the details of said query into 
 | 
			
		||||
structs to represent them. From those structs we will be able to modify their details any way that 
 | 
			
		||||
we choose and reconstruct them into valid SQL statements.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										130
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										130
									
								
								main.go
									
									
									
									
									
								
							@ -2,82 +2,90 @@ package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"github.com/DataDog/go-sqllexer"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type TokenGroupType int
 | 
			
		||||
type QueryType int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	TOKEN_OTHER   TokenGroupType = iota
 | 
			
		||||
	TOKEN_ACT                    // SELECT, CREATE, etc
 | 
			
		||||
	TOKEN_FILTER                 // WHERE, AND, etc
 | 
			
		||||
	TOKEN_RELATE                 // joins
 | 
			
		||||
	TOKEN_DEFINE                 // column data types, etc
 | 
			
		||||
	TOKEN_CONTROL                // sorting, etc
 | 
			
		||||
	NONE QueryType = iota
 | 
			
		||||
	SELECT
 | 
			
		||||
	UPDATE
 | 
			
		||||
	INSERT
 | 
			
		||||
	DELETE
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var tokenGroupMap = map[sqllexer.TokenType]TokenGroupType{
 | 
			
		||||
	sqllexer.ERROR:                  TOKEN_OTHER,
 | 
			
		||||
	sqllexer.EOF:                    TOKEN_OTHER,
 | 
			
		||||
	sqllexer.SPACE:                  TOKEN_OTHER,
 | 
			
		||||
	sqllexer.STRING:                 TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.INCOMPLETE_STRING:      TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.NUMBER:                 TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.IDENT:                  TOKEN_ACT,    // for table/column names
 | 
			
		||||
	sqllexer.QUOTED_IDENT:           TOKEN_RELATE, // for joins
 | 
			
		||||
	sqllexer.OPERATOR:               TOKEN_FILTER,
 | 
			
		||||
	sqllexer.WILDCARD:               TOKEN_ACT,
 | 
			
		||||
	sqllexer.COMMENT:                TOKEN_OTHER,
 | 
			
		||||
	sqllexer.MULTILINE_COMMENT:      TOKEN_OTHER,
 | 
			
		||||
	sqllexer.PUNCTUATION:            TOKEN_OTHER,
 | 
			
		||||
	sqllexer.DOLLAR_QUOTED_FUNCTION: TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.DOLLAR_QUOTED_STRING:   TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.POSITIONAL_PARAMETER:   TOKEN_FILTER,
 | 
			
		||||
	sqllexer.BIND_PARAMETER:         TOKEN_FILTER,
 | 
			
		||||
	sqllexer.FUNCTION:               TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.SYSTEM_VARIABLE:        TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.UNKNOWN:                TOKEN_OTHER,
 | 
			
		||||
	sqllexer.COMMAND:                TOKEN_ACT,
 | 
			
		||||
	sqllexer.KEYWORD:                TOKEN_ACT,
 | 
			
		||||
	sqllexer.JSON_OP:                TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.BOOLEAN:                TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.NULL:                   TOKEN_DEFINE,
 | 
			
		||||
	sqllexer.PROC_INDICATOR:         TOKEN_OTHER,
 | 
			
		||||
	sqllexer.CTE_INDICATOR:          TOKEN_OTHER,
 | 
			
		||||
	sqllexer.ALIAS_INDICATOR:        TOKEN_OTHER,
 | 
			
		||||
type Query struct {
 | 
			
		||||
	Type    QueryType
 | 
			
		||||
	FullSql string
 | 
			
		||||
	IsValid bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GetTokenGroupType(tokenType sqllexer.TokenType) (TokenGroupType, bool, bool) {
 | 
			
		||||
	group, exists := tokenGroupMap[tokenType]
 | 
			
		||||
	isStart := false
 | 
			
		||||
	isEnd := false
 | 
			
		||||
 | 
			
		||||
	if !exists {
 | 
			
		||||
		group = TOKEN_OTHER
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if group == TOKEN_ACT {
 | 
			
		||||
		isStart = true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if tokenType == sqllexer.EOF ||
 | 
			
		||||
		(group == TOKEN_OTHER && tokenType == sqllexer.PUNCTUATION) {
 | 
			
		||||
		isEnd = true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return group, isStart, isEnd
 | 
			
		||||
func IsTokenEndOfStatement(token *sqllexer.Token) bool {
 | 
			
		||||
	return (token.Type == sqllexer.EOF || token.Value == ";")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	query := "SELECT * FROM users \n WHERE id = something AND SELECT;"
 | 
			
		||||
	lexer := sqllexer.New(query)
 | 
			
		||||
func GetQueryTypeFromToken(token *sqllexer.Token) QueryType {
 | 
			
		||||
	if token.Type != sqllexer.COMMAND {
 | 
			
		||||
		return NONE
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var foundType QueryType
 | 
			
		||||
	switch strings.ToUpper(token.Value) {
 | 
			
		||||
	case "SELECT":
 | 
			
		||||
		foundType = SELECT
 | 
			
		||||
	case "UPDATE":
 | 
			
		||||
		foundType = UPDATE
 | 
			
		||||
	case "INSERT":
 | 
			
		||||
		foundType = INSERT
 | 
			
		||||
	case "DELETE":
 | 
			
		||||
		foundType = DELETE
 | 
			
		||||
	default:
 | 
			
		||||
		foundType = NONE
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return foundType
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func IsCrudSqlStatement(token *sqllexer.Token) bool {
 | 
			
		||||
	queryType := GetQueryTypeFromToken(token)
 | 
			
		||||
	return (queryType > 0 && queryType <= 4) // TODO:  Update if QueryTypes Change
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GetQueryTypeFromSql(sql string) QueryType {
 | 
			
		||||
	var queryType QueryType
 | 
			
		||||
 | 
			
		||||
	lexer := sqllexer.New(sql)
 | 
			
		||||
	for {
 | 
			
		||||
		token := lexer.Scan()
 | 
			
		||||
		tokenGroup, isStart, isEnd := GetTokenGroupType(token.Type)
 | 
			
		||||
		fmt.Println(token.Value, token.Type, tokenGroup, isStart, isEnd)
 | 
			
		||||
		if IsTokenEndOfStatement(token) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if token.Type == sqllexer.EOF {
 | 
			
		||||
		queryType = GetQueryTypeFromToken(token)
 | 
			
		||||
		if queryType > 0 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return queryType
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	query := "DELETE * FROM users \n WHERE id = something AND SELECT;"
 | 
			
		||||
	newQuery := GetQueryTypeFromSql(query)
 | 
			
		||||
 | 
			
		||||
	fmt.Println(newQuery)
 | 
			
		||||
 | 
			
		||||
	//lexer := sqllexer.New(query)
 | 
			
		||||
	//for {
 | 
			
		||||
	//	token := lexer.Scan()
 | 
			
		||||
	//	fmt.Println(token.Value, token.Type)
 | 
			
		||||
 | 
			
		||||
	//	if token.Type == sqllexer.EOF {
 | 
			
		||||
	//		break
 | 
			
		||||
	//	}
 | 
			
		||||
	//}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user