package main import ( "fmt" "slices" "strconv" "strings" "unicode" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} type LexType uint32 const ( Type_Identifier LexType = iota Type_Keyword Type_Separator Type_Literal Type_Operator ) type Keyword uint32 var Keywords []string = []string{"import", "module", "void", "return", "true", "false", "if", "else", "raw", "while"} const ( Keyword_Import Keyword = iota Keyword_Module Keyword_Void Keyword_Return Keyword_True KeyWord_False Keyword_If Keyword_Else Keyword_Raw Keyword_While ) type Separator uint32 var Separators []string = []string{"(", ")", "{", "}", "[", "]", ";", ",", "."} const ( Separator_OpenParen Separator = iota Separator_CloseParen Separator_OpenCurly Separator_CloseCurly Separator_OpenSquare Separator_CloseSquare Separator_Semicolon Separator_Comma Separator_Dot ) type Operator uint32 var Operators []string = []string{"=", ">", "<", "!", "+", "-", "*", "/", "%", "==", ">=", "<=", "!=", "+=", "-=", "*=", "/=", "%=", "++", "--", "~"} const ( Operator_Equals Operator = iota Operator_Greater Operator_Less Operator_Not Operator_Plus Operator_Minus Operator_Multiply Operator_Divide Operator_Modulo Operator_EqualsEquals Operator_GreaterEquals Operator_LessEquals Operator_NotEquals Operator_PlusEquals Operator_MinusEquals Operator_MultiplyEquals Operator_DivideEquals Operator_ModuloEquals Operator_PlusPlus Operator_MinusMinus Operator_BitwiseNot ) type LiteralType uint32 const ( Literal_String LiteralType = iota Literal_Number Literal_Boolean ) type LexToken struct { Type LexType Position TokenPosition Value any } type TokenPosition struct { SourceFile string Position uint64 } type Literal struct { Type LiteralType Primitive PrimitiveType Value any } type Lexer struct { Runes []rune LastTokenPosition uint64 SourceFile string Position uint64 } func unknownPosition() TokenPosition { return TokenPosition{SourceFile: "", Position: 0} } func (l *Lexer) error(message string) error { return CompilerError{Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Message: message} } func (l *Lexer) peekRune() *rune { if len(l.Runes) == 0 { return nil } return &l.Runes[0] } func (l *Lexer) tryOperator() Operator { var foundOp Operator = InvalidValue var foundOpLen int = 0 str := string(l.Runes) for i, operator := range Operators { operatorLen := len([]rune(operator)) if operatorLen <= foundOpLen { continue } if strings.HasPrefix(str, operator) { foundOp = Operator(i) foundOpLen = len([]rune(operator)) } } for i := 0; i < foundOpLen; i++ { l.nextRune() } return foundOp } func (l *Lexer) trySeparator() Separator { var foundSep Separator = InvalidValue var foundSepLen int = 0 str := string(l.Runes) for i, separator := range Separators { separatorLen := len([]rune(separator)) if separatorLen <= foundSepLen { continue } if strings.HasPrefix(str, separator) { foundSep = Separator(i) foundSepLen = len([]rune(separator)) } } for i := 0; i < foundSepLen; i++ { l.nextRune() } return foundSep } func (l *Lexer) hasNext(choices ...string) bool { str := string(l.Runes) for _, c := range choices { if strings.HasPrefix(str, c) { return true } } return false } func (l *Lexer) nextRune() *rune { if len(l.Runes) == 0 { return nil } r := l.Runes[0] l.Runes = l.Runes[1:] l.Position++ return &r } func (l *Lexer) stringLiteral() (string, error) { l.LastTokenPosition = l.Position openQuote := l.nextRune() if openQuote == nil || *openQuote != '"' { return "", l.error("expected \"") } literal := "" for { r := l.nextRune() if r == nil { return "", l.error("unexpected end of file") } if *r == '"' { break } if *r == '\\' { escaped := l.nextRune() if escaped == nil { return "", l.error("unmatched escape sequence") } literal += string(*escaped) continue } literal += string(*r) } return literal, nil } func (l *Lexer) nextToken() (*LexToken, error) { // Skip whitespace for { r := l.peekRune() if r == nil { return nil, nil } if !slices.Contains(Whitespace, *r) { break } l.nextRune() } l.LastTokenPosition = l.Position r := l.peekRune() if r == nil { return nil, nil } if *r == '"' { literal, err := l.stringLiteral() if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: literal}}, nil } op := l.tryOperator() if op != InvalidValue { return &LexToken{Type: Type_Operator, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: op}, nil } sep := l.trySeparator() if sep != InvalidValue { return &LexToken{Type: Type_Separator, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: sep}, nil } token := "" for { r := l.peekRune() if r == nil || slices.Contains(Whitespace, *r) || l.hasNext(Separators...) || l.hasNext(Operators...) { break } token += string(*l.nextRune()) } if len(token) == 0 { if len(l.Runes) == 0 { return nil, nil } token = string(*l.nextRune()) } runes := []rune(token) if unicode.IsDigit([]rune(token)[0]) { var numberType PrimitiveType = InvalidValue var rawNumber string = token for i, name := range PRIMITIVE_TYPE_NAMES { if strings.HasSuffix(token, name) { numberType = PrimitiveType(i) rawNumber = token[:len(token)-len(name)] } } containsDot := slices.Contains(runes, '.') if numberType == InvalidValue { if containsDot { numberType = Primitive_F64 } else { numberType = Primitive_I64 } } if containsDot && !isFloatingPoint(numberType) { return nil, l.error("dot in non floating-point constant") } number, err := l.parseNumber(rawNumber, numberType) if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } if idx := slices.Index(Keywords, token); idx != -1 { return &LexToken{Type: Type_Keyword, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: Keyword(idx)}, nil } return &LexToken{Type: Type_Identifier, Position: TokenPosition{SourceFile: l.SourceFile, Position: l.LastTokenPosition}, Value: token}, nil } func (l *Lexer) parseNumber(raw string, numberType PrimitiveType) (any, error) { if numberType == Primitive_Bool { return nil, l.error("bool not allowed as number suffix") } base := 10 if strings.HasPrefix(raw, "0x") { raw = raw[2:] base = 16 } else if strings.HasPrefix(raw, "0o") { raw = raw[2:] base = 8 } else if strings.HasPrefix(raw, "0b") { raw = raw[2:] base = 2 } if isSignedInt(numberType) { num, err := strconv.ParseInt(raw, base, getBits(numberType)) if err != nil { return nil, l.error("failed to parse literal: " + err.Error()) } return num, nil } if isUnsignedInt(numberType) { num, err := strconv.ParseUint(raw, base, getBits(numberType)) if err != nil { return nil, l.error("failed to parse literal: " + err.Error()) } return num, nil } if isFloatingPoint(numberType) { if base != 10 { return nil, l.error("non base 10 float literals are not supported") } num, err := strconv.ParseFloat(raw, getBits(numberType)) if err != nil { return nil, l.error("failed to parse literal: " + err.Error()) } return num, nil } panic(fmt.Sprintf("Unhandled type %s in parseNumber()", numberType)) } func lexer(sourceFile string, source string) ([]LexToken, error) { var tokens []LexToken lexer := Lexer{SourceFile: sourceFile, Runes: []rune(source)} for { token, err := lexer.nextToken() if err != nil { return nil, err } if token == nil { break } tokens = append(tokens, *token) } return tokens, nil }