package main import ( "slices" "strconv" "strings" "unicode" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} type LexType uint32 const ( Type_Identifier LexType = iota Type_Keyword Type_Separator Type_Literal Type_Operator ) type Keyword uint32 var Keywords []string = []string{"import", "void", "return", "true", "false"} const ( Keyword_Import Keyword = iota Keyword_Void Keyword_Return Keyword_True KeyWord_False ) type Separator uint32 var Separators []rune = []rune{'(', ')', '{', '}', ';', ','} const ( Separator_OpenParen Separator = iota Separator_CloseParen Separator_OpenCurly Separator_CloseCurly Separator_Semicolon Separator_Comma ) type Operator uint32 var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'} const ( Operator_Equals Operator = iota Operator_Greater Operator_Less Operator_Not Operator_Plus Operator_Minus Operator_Multiply Operator_Divide Operator_Modulo ) type LiteralType uint32 const ( Literal_String LiteralType = iota Literal_Number Literal_Boolean ) type LexToken struct { Type LexType Position uint64 Value any } type Literal struct { Type LiteralType Primitive PrimitiveType Value any } type Lexer struct { Runes []rune LastTokenPosition uint64 Position uint64 } func (l *Lexer) error(message string) error { return CompilerError{Position: l.LastTokenPosition, Message: message} } func (l *Lexer) peekRune() *rune { if len(l.Runes) == 0 { return nil } return &l.Runes[0] } func (l *Lexer) nextRune() *rune { if len(l.Runes) == 0 { return nil } r := l.Runes[0] l.Runes = l.Runes[1:] l.Position++ return &r } func (l *Lexer) stringLiteral() (string, error) { l.LastTokenPosition = l.Position openQuote := l.nextRune() if openQuote == nil || *openQuote != '"' { return "", l.error("expected \"") } literal := "" for { r := l.nextRune() if r == nil { return "", l.error("unexpected end of file") } if *r == '"' { break } if *r == '\\' { escaped := l.nextRune() if escaped == nil { return "", l.error("unmatched escape sequence") } literal += string(*escaped) continue } literal += string(*r) } return literal, nil } // TODO: maybe this method should directly return LexToken func (l *Lexer) nextToken() (string, error) { // Skip whitespace for { r := l.peekRune() if r == nil { return "", nil } if !slices.Contains(Whitespace, *r) { break } l.nextRune() } l.LastTokenPosition = l.Position r := l.peekRune() if r == nil { return "", nil } if *r == '"' { literal, err := l.stringLiteral() if err != nil { return "", err } return "\"" + literal + "\"", nil } token := "" for { r := l.peekRune() if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) || slices.Contains(Operators, *r) { break } token += string(*l.nextRune()) } if len(token) == 0 && len(l.Runes) != 0 { return string(*l.nextRune()), nil } return token, nil } func parseNumber(raw string, numberType PrimitiveType) (any, error) { // TODO: return compiler errors if isSignedInt(numberType) { return strconv.ParseInt(raw, 10, getBits(numberType)) } if isUnsignedInt(numberType) { return strconv.ParseUint(raw, 10, getBits(numberType)) } if isFloatingPoint(numberType) { return strconv.ParseFloat(raw, getBits(numberType)) } panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()") } func (l *Lexer) parseToken(token string) (*LexToken, error) { if strings.HasPrefix(token, "\"") { return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil } runes := []rune(token) if unicode.IsDigit([]rune(token)[0]) { // TODO: hexadecimal/binary/octal constants var numberType PrimitiveType = InvalidValue var rawNumber string = token for i, name := range PRIMITIVE_TYPE_NAMES { if strings.HasSuffix(token, name) { numberType = PrimitiveType(i) rawNumber = token[:len(token)-len(name)] } } containsDot := slices.Contains(runes, '.') if numberType == InvalidValue { if containsDot { numberType = Primitive_F64 } else { numberType = Primitive_I64 } } if containsDot && !isFloatingPoint(numberType) { return nil, l.error("dot in non floating-point constant") } number, err := parseNumber(rawNumber, numberType) if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } if len(runes) == 1 { if idx := slices.Index(Separators, runes[0]); idx != -1 { return &LexToken{Type: Type_Separator, Position: l.LastTokenPosition, Value: Separator(idx)}, nil } if idx := slices.Index(Operators, runes[0]); idx != -1 { return &LexToken{Type: Type_Operator, Position: l.LastTokenPosition, Value: Operator(idx)}, nil } } if idx := slices.Index(Keywords, token); idx != -1 { return &LexToken{Type: Type_Keyword, Position: l.LastTokenPosition, Value: Keyword(idx)}, nil } return &LexToken{Type: Type_Identifier, Position: l.LastTokenPosition, Value: token}, nil } func lexer(program string) ([]LexToken, error) { var tokens []LexToken lexer := Lexer{Runes: []rune(program)} for { token, err := lexer.nextToken() if err != nil { return nil, err } if len(token) == 0 { break } lexToken, err := lexer.parseToken(token) if err != nil { return nil, err } tokens = append(tokens, *lexToken) } return tokens, nil }