package main import ( "slices" "strconv" "strings" "unicode" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} type LexType uint32 const ( Type_Identifier LexType = iota Type_Keyword Type_Separator Type_Literal Type_Operator ) type Keyword uint32 var Keywords []string = []string{"import", "void", "return", "true", "false", "if", "else"} const ( Keyword_Import Keyword = iota Keyword_Void Keyword_Return Keyword_True KeyWord_False Keyword_If Keyword_Else ) type Separator uint32 var Separators []rune = []rune{'(', ')', '{', '}', ';', ','} const ( Separator_OpenParen Separator = iota Separator_CloseParen Separator_OpenCurly Separator_CloseCurly Separator_Semicolon Separator_Comma ) type Operator uint32 var Operators []string = []string{"=", ">", "<", "!", "+", "-", "*", "/", "%", "==", ">=", "<=", "!=", "+=", "-=", "*=", "/=", "%="} const ( Operator_Equals Operator = iota Operator_Greater Operator_Less Operator_Not Operator_Plus Operator_Minus Operator_Multiply Operator_Divide Operator_Modulo Operator_EqualsEquals Operator_GreaterEquals Operator_LessEquals Operator_NotEquals Operator_PlusEquals Operator_MinusEquals Operator_MultiplyEquals Operator_DivideEquals Operator_ModuloEquals ) type LiteralType uint32 const ( Literal_String LiteralType = iota Literal_Number Literal_Boolean ) type LexToken struct { Type LexType Position uint64 Value any } type Literal struct { Type LiteralType Primitive PrimitiveType Value any } type Lexer struct { Runes []rune LastTokenPosition uint64 Position uint64 } func (l *Lexer) error(message string) error { return CompilerError{Position: l.LastTokenPosition, Message: message} } func (l *Lexer) peekRune() *rune { if len(l.Runes) == 0 { return nil } return &l.Runes[0] } func (l *Lexer) tryOperator() Operator { var foundOp Operator = InvalidValue var foundOpLen int = 0 str := string(l.Runes) for i, operator := range Operators { operatorLen := len([]rune(operator)) if operatorLen <= foundOpLen { continue } if strings.HasPrefix(str, operator) { foundOp = Operator(i) foundOpLen = len([]rune(operator)) } } for i := 0; i < foundOpLen; i++ { l.nextRune() } return foundOp } func (l *Lexer) nextRune() *rune { if len(l.Runes) == 0 { return nil } r := l.Runes[0] l.Runes = l.Runes[1:] l.Position++ return &r } func (l *Lexer) stringLiteral() (string, error) { l.LastTokenPosition = l.Position openQuote := l.nextRune() if openQuote == nil || *openQuote != '"' { return "", l.error("expected \"") } literal := "" for { r := l.nextRune() if r == nil { return "", l.error("unexpected end of file") } if *r == '"' { break } if *r == '\\' { escaped := l.nextRune() if escaped == nil { return "", l.error("unmatched escape sequence") } literal += string(*escaped) continue } literal += string(*r) } return literal, nil } // TODO: maybe this method should directly return LexToken func (l *Lexer) nextToken() (*LexToken, error) { // Skip whitespace for { r := l.peekRune() if r == nil { return nil, nil } if !slices.Contains(Whitespace, *r) { break } l.nextRune() } l.LastTokenPosition = l.Position r := l.peekRune() if r == nil { return nil, nil } if *r == '"' { literal, err := l.stringLiteral() if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: literal}}, nil } op := l.tryOperator() if op != InvalidValue { return &LexToken{Type: Type_Operator, Position: l.LastTokenPosition, Value: op}, nil } token := "" for { r := l.peekRune() if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) { break } token += string(*l.nextRune()) } if len(token) == 0 { if len(l.Runes) == 0 { return nil, nil } token = string(*l.nextRune()) } runes := []rune(token) if unicode.IsDigit([]rune(token)[0]) { // TODO: hexadecimal/binary/octal constants var numberType PrimitiveType = InvalidValue var rawNumber string = token for i, name := range PRIMITIVE_TYPE_NAMES { if strings.HasSuffix(token, name) { numberType = PrimitiveType(i) rawNumber = token[:len(token)-len(name)] } } containsDot := slices.Contains(runes, '.') if numberType == InvalidValue { if containsDot { numberType = Primitive_F64 } else { numberType = Primitive_I64 } } if containsDot && !isFloatingPoint(numberType) { return nil, l.error("dot in non floating-point constant") } number, err := parseNumber(rawNumber, numberType) if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } if len(runes) == 1 { if idx := slices.Index(Separators, runes[0]); idx != -1 { return &LexToken{Type: Type_Separator, Position: l.LastTokenPosition, Value: Separator(idx)}, nil } } if idx := slices.Index(Keywords, token); idx != -1 { return &LexToken{Type: Type_Keyword, Position: l.LastTokenPosition, Value: Keyword(idx)}, nil } return &LexToken{Type: Type_Identifier, Position: l.LastTokenPosition, Value: token}, nil } func parseNumber(raw string, numberType PrimitiveType) (any, error) { // TODO: return compiler errors if isSignedInt(numberType) { return strconv.ParseInt(raw, 10, getBits(numberType)) } if isUnsignedInt(numberType) { return strconv.ParseUint(raw, 10, getBits(numberType)) } if isFloatingPoint(numberType) { return strconv.ParseFloat(raw, getBits(numberType)) } panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()") } func lexer(program string) ([]LexToken, error) { var tokens []LexToken lexer := Lexer{Runes: []rune(program)} for { token, err := lexer.nextToken() if err != nil { return nil, err } if token == nil { break } tokens = append(tokens, *token) } return tokens, nil }