package boolean import ( "bufio" "io" "fmt" "errors" "slices" "log/slog" ) type TokenType int type ExprType int const ( EXPR_NUMERIC_CONSTANT ExprType = iota EXPR_BOOLEAN_CONSTNAT TOKEN_ERR TokenType = iota TOKEN_UNARY_OPERATOR TOKEN_BINARY_OPERATOR TOKEN_AMBIGUOUS_OPERATOR TOKEN_LEFT_PAREN TOKEN_RIGHT_PAREN TOKEN_SYMBOL TOKEN_NUMERIC_LITERAL TOKEN_BOOLEAN_LITERAL ) type Token struct { Text string Type TokenType Index uint } type TokenStream struct { Tokens []Token Position int } func (stream *TokenStream) Peek(n int) ([]Token, error) { if stream.Position >= len(stream.Tokens) { return nil, io.EOF } return stream.Tokens[stream.Position:stream.Position+n], nil } func (stream *TokenStream) Read(n int) ([]Token, error) { if stream.Position >= len(stream.Tokens) { return nil, io.EOF } ret := stream.Tokens[stream.Position:stream.Position+n] stream.Position += len(ret) return ret, nil } func NewTokenStream(tokens []Token) *TokenStream { return &TokenStream{ Tokens: tokens, Position: 0, } } type OperatorTree struct { TokenType Next map[byte]OperatorTree } func SymbolValidStart(char byte) bool { if char >= 97 && char <= 122 { return true } else if char >= 65 && char <= 90 { return true } else if char == '_' { return true } return false } func SymbolValidCont(char byte) bool { if SymbolValidStart(char) { return true } else if char >= 48 && char <= 57 { return true } else if char == '-' { return true } return false } func NumericValid(char byte) bool { if char >= 48 && char <= 57 { return true } return false } var ( Whitepace = []byte{' ', '\t', '\n', '\r'} OperatorTokens = map[byte]OperatorTree{ '.': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '~': { TokenType: TOKEN_UNARY_OPERATOR, Next: nil, }, '&': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '|': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '^': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '+': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '*': { TokenType: TOKEN_BINARY_OPERATOR, Next: map[byte]OperatorTree{ '*': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, }, }, '/': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, '-': { TokenType: TOKEN_AMBIGUOUS_OPERATOR, Next: nil, }, '!': { TokenType: TOKEN_ERR, Next: map[byte]OperatorTree{ '=': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, }, }, '=': { TokenType: TOKEN_ERR, Next: map[byte]OperatorTree{ '=': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, }, }, '>': { TokenType: TOKEN_BINARY_OPERATOR, Next: map[byte]OperatorTree{ '=': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, }, }, '<': { TokenType: TOKEN_BINARY_OPERATOR, Next: map[byte]OperatorTree{ '=': { TokenType: TOKEN_BINARY_OPERATOR, Next: nil, }, }, }, } ) func TokenizeOperator(stream *bufio.Reader, node *OperatorTree, current string) (TokenType, string, error) { next_chars, err := stream.Peek(1) if errors.Is(err, io.EOF) { return node.TokenType, current, nil } else if err != nil { return TOKEN_ERR, current, fmt.Errorf("TokenizeOperator peek error: %w", err) } next_node, continues := node.Next[next_chars[0]] if continues == true { _, err := stream.ReadByte() if err != nil { return TOKEN_ERR, current, fmt.Errorf("TokenizeOperator consume error: %w", err) } return TokenizeOperator(stream, &next_node, current + string(next_chars)) } else { return node.TokenType, current, nil } } func Tokenize(stream *bufio.Reader) (*TokenStream, error) { tokens := []Token{} var position uint = 0 for true { char, err := stream.ReadByte() if errors.Is(err, io.EOF) { break } else if err != nil { return nil, fmt.Errorf("tokenize read error: %w", err) } if slices.Contains(Whitepace, char) { slog.Debug("tokenizer", "whitespace", char) position += 1 continue } else if node, is_operator := OperatorTokens[char]; is_operator == true{ token_type, string, err := TokenizeOperator(stream, &node, string(char)) if err != nil { return nil, err } slog.Debug("tokenizer", "operator", string) tokens = append(tokens, Token{ Type: token_type, Text: string, Index: position, }) position += uint(len(string)) } else if NumericValid(char) { literal := string(char) decimal := false for true { next_chars, err := stream.Peek(1) if errors.Is(err, io.EOF) { break } else if err != nil { return nil, fmt.Errorf("numeric peek error: %w", err) } else if NumericValid(next_chars[0]) { _, err := stream.ReadByte() if err != nil { return nil, fmt.Errorf("numeric read error: %w", err) } literal += string(next_chars) } else if next_chars[0] == '.' { if decimal == true { break } decimal = true _, err := stream.ReadByte() if err != nil { return nil, fmt.Errorf("numeric read error: %w", err) } literal += string(next_chars) } else { break } } slog.Debug("tokenizer", "numeric", literal) tokens = append(tokens, Token{ Type: TOKEN_NUMERIC_LITERAL, Text: literal, Index: position, }) position += uint(len(literal)) } else if SymbolValidStart(char) { symbol := string(char) for true { next_chars, err := stream.Peek(1) if errors.Is(err, io.EOF) { break } else if err != nil { return nil, fmt.Errorf("symbol peek error: %w", err) } else if SymbolValidCont(next_chars[0]) == true { _, err := stream.ReadByte() if err != nil { return nil, fmt.Errorf("symbol read error: %w", err) } symbol += string(next_chars) } else { break } } slog.Debug("tokenizer", "symbol", symbol) token_type := TOKEN_SYMBOL switch symbol { case "TRUE": token_type = TOKEN_BOOLEAN_LITERAL case "FALSE": token_type = TOKEN_BOOLEAN_LITERAL } tokens = append(tokens, Token{ Type: token_type, Text: symbol, Index: position, }) position += uint(len(symbol)) } else { switch char { case '(': tokens = append(tokens, Token{ Type: TOKEN_LEFT_PAREN, Text: "(", Index: position, }) position += 1 case ')': tokens = append(tokens, Token{ Type: TOKEN_RIGHT_PAREN, Text: ")", Index: position, }) position += 1 default: return nil, fmt.Errorf("tokenize unexpected character: %c", char) } } } return NewTokenStream(tokens), nil }