321 lines
7.2 KiB
Go
321 lines
7.2 KiB
Go
package boolean
|
|
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
"fmt"
|
|
"errors"
|
|
"slices"
|
|
"log/slog"
|
|
)
|
|
|
|
type TokenType int
|
|
type ExprType int
|
|
|
|
const (
|
|
EXPR_NUMERIC_CONSTANT ExprType = iota
|
|
EXPR_BOOLEAN_CONSTNAT
|
|
|
|
TOKEN_ERR TokenType = iota
|
|
TOKEN_UNARY_OPERATOR
|
|
TOKEN_BINARY_OPERATOR
|
|
TOKEN_AMBIGUOUS_OPERATOR
|
|
TOKEN_LEFT_PAREN
|
|
TOKEN_RIGHT_PAREN
|
|
TOKEN_SYMBOL
|
|
TOKEN_NUMERIC_LITERAL
|
|
TOKEN_BOOLEAN_LITERAL
|
|
)
|
|
|
|
type Token struct {
|
|
Text string
|
|
Type TokenType
|
|
Index uint
|
|
}
|
|
|
|
type TokenStream struct {
|
|
Tokens []Token
|
|
Position int
|
|
}
|
|
func (stream *TokenStream) Peek(n int) ([]Token, error) {
|
|
if stream.Position >= len(stream.Tokens) {
|
|
return nil, io.EOF
|
|
}
|
|
return stream.Tokens[stream.Position:stream.Position+n], nil
|
|
}
|
|
func (stream *TokenStream) Read(n int) ([]Token, error) {
|
|
if stream.Position >= len(stream.Tokens) {
|
|
return nil, io.EOF
|
|
}
|
|
ret := stream.Tokens[stream.Position:stream.Position+n]
|
|
stream.Position += len(ret)
|
|
return ret, nil
|
|
}
|
|
|
|
func NewTokenStream(tokens []Token) *TokenStream {
|
|
return &TokenStream{
|
|
Tokens: tokens,
|
|
Position: 0,
|
|
}
|
|
}
|
|
|
|
type OperatorTree struct {
|
|
TokenType
|
|
Next map[byte]OperatorTree
|
|
}
|
|
|
|
func SymbolValidStart(char byte) bool {
|
|
if char >= 97 && char <= 122 {
|
|
return true
|
|
} else if char >= 65 && char <= 90 {
|
|
return true
|
|
} else if char == '_' {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func SymbolValidCont(char byte) bool {
|
|
if SymbolValidStart(char) {
|
|
return true
|
|
} else if char >= 48 && char <= 57 {
|
|
return true
|
|
} else if char == '-' {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func NumericValid(char byte) bool {
|
|
if char >= 48 && char <= 57 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
var (
|
|
Whitepace = []byte{' ', '\t', '\n', '\r'}
|
|
OperatorTokens = map[byte]OperatorTree{
|
|
'.': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'~': {
|
|
TokenType: TOKEN_UNARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'&': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'|': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'^': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'+': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'*': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: map[byte]OperatorTree{
|
|
'*': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
},
|
|
},
|
|
'/': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'-': {
|
|
TokenType: TOKEN_AMBIGUOUS_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
'!': {
|
|
TokenType: TOKEN_ERR,
|
|
Next: map[byte]OperatorTree{
|
|
'=': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
},
|
|
},
|
|
'=': {
|
|
TokenType: TOKEN_ERR,
|
|
Next: map[byte]OperatorTree{
|
|
'=': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
},
|
|
},
|
|
'>': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: map[byte]OperatorTree{
|
|
'=': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
},
|
|
},
|
|
'<': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: map[byte]OperatorTree{
|
|
'=': {
|
|
TokenType: TOKEN_BINARY_OPERATOR,
|
|
Next: nil,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
)
|
|
|
|
func TokenizeOperator(stream *bufio.Reader, node *OperatorTree, current string) (TokenType, string, error) {
|
|
next_chars, err := stream.Peek(1)
|
|
if errors.Is(err, io.EOF) {
|
|
return node.TokenType, current, nil
|
|
} else if err != nil {
|
|
return TOKEN_ERR, current, fmt.Errorf("TokenizeOperator peek error: %w", err)
|
|
}
|
|
|
|
next_node, continues := node.Next[next_chars[0]]
|
|
if continues == true {
|
|
_, err := stream.ReadByte()
|
|
if err != nil {
|
|
return TOKEN_ERR, current, fmt.Errorf("TokenizeOperator consume error: %w", err)
|
|
}
|
|
return TokenizeOperator(stream, &next_node, current + string(next_chars))
|
|
} else {
|
|
return node.TokenType, current, nil
|
|
}
|
|
}
|
|
|
|
func Tokenize(stream *bufio.Reader) (*TokenStream, error) {
|
|
tokens := []Token{}
|
|
var position uint = 0
|
|
|
|
for true {
|
|
char, err := stream.ReadByte()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("tokenize read error: %w", err)
|
|
}
|
|
|
|
if slices.Contains(Whitepace, char) {
|
|
slog.Debug("tokenizer", "whitespace", char)
|
|
position += 1
|
|
continue
|
|
} else if node, is_operator := OperatorTokens[char]; is_operator == true{
|
|
token_type, string, err := TokenizeOperator(stream, &node, string(char))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
slog.Debug("tokenizer", "operator", string)
|
|
tokens = append(tokens, Token{
|
|
Type: token_type,
|
|
Text: string,
|
|
Index: position,
|
|
})
|
|
position += uint(len(string))
|
|
} else if NumericValid(char) {
|
|
literal := string(char)
|
|
decimal := false
|
|
|
|
for true {
|
|
next_chars, err := stream.Peek(1)
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("numeric peek error: %w", err)
|
|
} else if NumericValid(next_chars[0]) {
|
|
_, err := stream.ReadByte()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("numeric read error: %w", err)
|
|
}
|
|
literal += string(next_chars)
|
|
} else if next_chars[0] == '.' {
|
|
if decimal == true {
|
|
break
|
|
}
|
|
decimal = true
|
|
_, err := stream.ReadByte()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("numeric read error: %w", err)
|
|
}
|
|
literal += string(next_chars)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
slog.Debug("tokenizer", "numeric", literal)
|
|
tokens = append(tokens, Token{
|
|
Type: TOKEN_NUMERIC_LITERAL,
|
|
Text: literal,
|
|
Index: position,
|
|
})
|
|
position += uint(len(literal))
|
|
} else if SymbolValidStart(char) {
|
|
symbol := string(char)
|
|
for true {
|
|
next_chars, err := stream.Peek(1)
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("symbol peek error: %w", err)
|
|
} else if SymbolValidCont(next_chars[0]) == true {
|
|
_, err := stream.ReadByte()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("symbol read error: %w", err)
|
|
}
|
|
symbol += string(next_chars)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
slog.Debug("tokenizer", "symbol", symbol)
|
|
token_type := TOKEN_SYMBOL
|
|
switch symbol {
|
|
case "TRUE":
|
|
token_type = TOKEN_BOOLEAN_LITERAL
|
|
case "FALSE":
|
|
token_type = TOKEN_BOOLEAN_LITERAL
|
|
}
|
|
tokens = append(tokens, Token{
|
|
Type: token_type,
|
|
Text: symbol,
|
|
Index: position,
|
|
})
|
|
position += uint(len(symbol))
|
|
} else {
|
|
switch char {
|
|
case '(':
|
|
tokens = append(tokens, Token{
|
|
Type: TOKEN_LEFT_PAREN,
|
|
Text: "(",
|
|
Index: position,
|
|
})
|
|
position += 1
|
|
case ')':
|
|
tokens = append(tokens, Token{
|
|
Type: TOKEN_RIGHT_PAREN,
|
|
Text: ")",
|
|
Index: position,
|
|
})
|
|
position += 1
|
|
default:
|
|
return nil, fmt.Errorf("tokenize unexpected character: %c", char)
|
|
}
|
|
}
|
|
}
|
|
|
|
return NewTokenStream(tokens), nil
|
|
}
|
|
|