package main import ( "errors" "slices" "strconv" "strings" "unicode" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} var Separators []rune = []rune{'(', ')', '{', '}', ';', ','} var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'} type LexType uint32 const ( Type_Identifier LexType = iota Type_Keyword Type_Separator Type_Literal ) type Keyword uint32 const ( Keyword_Import Keyword = iota Keyword_Void ) type Separator uint32 const ( Separator_OpenParen Separator = iota Separator_CloseParen Separator_OpenCurly Separator_CloseCurly Separator_Semicolon Separator_Comma ) type LiteralType uint32 const ( Literal_String LiteralType = iota Literal_Number ) type LexToken struct { Type LexType Position uint64 Value any } type Literal struct { Type LiteralType Primitive PrimitiveType Value any } func stringLiteral(runes []rune) (string, []rune, error) { idx := 1 // Always starts with " literal := "" for idx < len(runes) && runes[idx] != '"' { if runes[idx] == '\\' { if idx == len(runes)-1 { return "", nil, errors.New("unmatched escape sequence") } // TODO \n, \r, \uXXXX, ... escape sequences idx++ } literal += string(runes[idx]) idx++ } if idx == len(runes) { return "", nil, errors.New("unclosed string literal") } idx++ return literal, runes[idx:], nil } // source -> token, remaining source, error func nextToken(program string) (string, string, error) { // Skip whitespace start := 0 runes := []rune(program) for start < len(runes) && slices.Contains(Whitespace, runes[start]) { start++ } if start == len(runes) { return "", "", nil } if runes[start] == '"' { // String literal literal, remaining, err := stringLiteral(runes[start:]) if err != nil { return "", "", err } return "\"" + literal + "\"", string(remaining), nil } end := start for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) { end++ } if start == end { end++ } return string(runes[start:end]), string(runes[end:]), nil } func parseNumber(raw string, numberType PrimitiveType) (any, error) { if isSignedInt(numberType) { return strconv.ParseInt(raw, 10, getBits(numberType)) } if isUnsignedInt(numberType) { return strconv.ParseUint(raw, 10, getBits(numberType)) } if isFloatingPoint(numberType) { return strconv.ParseFloat(raw, getBits(numberType)) } panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()") } func parseToken(token string) (*LexToken, error) { if strings.HasPrefix(token, "\"") { return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil } runes := []rune(token) startsWithMinus := runes[0] == '-' if startsWithMinus || unicode.IsDigit([]rune(token)[0]) { // TODO: hexadecimal/binary/octal constants var numberType PrimitiveType = InvalidValue var rawNumber string = token for i, name := range NumberTypeNames { if strings.HasSuffix(token, name) { numberType = PrimitiveType(i) rawNumber = token[:len(token)-len(name)] } } containsDot := slices.Contains(runes, '.') if numberType == InvalidValue { if containsDot { numberType = Primitive_F64 } else if startsWithMinus { numberType = Primitive_I64 } else { numberType = Primitive_U64 } } if containsDot && !isFloatingPoint(numberType) { return nil, errors.New("dot in non floating-point constant") } number, err := parseNumber(rawNumber, numberType) if err != nil { return nil, err } return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } switch token { case "void": return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil case "import": return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil case "(": return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil case ")": return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil case "{": return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil case "}": return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil case ";": return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil case ",": return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil default: return &LexToken{Type: Type_Identifier, Value: token}, nil } } func lexer(program string) ([]LexToken, error) { var tokens []LexToken for len(program) > 0 { token, rest, err := nextToken(program) if err != nil { return nil, err } if len(token) == 0 { break } lexToken, err := parseToken(token) if err != nil { return nil, err } program = rest tokens = append(tokens, *lexToken) } return tokens, nil }