package main import ( "errors" "slices" "strings" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} var Separators []rune = []rune{'(', ')', '{', '}', ';'} type LexType uint32 const ( Type_Identifier LexType = iota Type_Keyword Type_Separator Type_LiteralString Type_LiteralNumber ) type Keyword uint32 const ( Keyword_Import Keyword = iota Keyword_Void ) type Separator uint32 const ( Separator_OpenParen Separator = iota Separator_CloseParen Separator_OpenCurly Separator_CloseCurly Separator_Semicolon ) type LiteralType uint32 const ( Literal_String LiteralType = iota Literal_Number ) type LexToken struct { Type LexType Position uint64 Value any } type Literal struct { Type LiteralType Value any } func stringLiteral(runes []rune) (string, []rune, error) { idx := 1 // Always starts with " literal := "" for idx < len(runes) && runes[idx] != '"' { if runes[idx] == '\\' { if idx == len(runes)-1 { return "", nil, errors.New("unmatched escape sequence") } // TODO \n, \r, \uXXXX, ... escape sequences idx++ } literal += string(runes[idx]) idx++ } if idx == len(runes) { return "", nil, errors.New("unclosed string literal") } idx++ return literal, runes[idx:], nil } // source -> token, remaining source, error func nextToken(program string) (string, string, error) { // Skip whitespace start := 0 runes := []rune(program) for start < len(runes) && slices.Contains(Whitespace, runes[start]) { start++ } if start == len(runes) { return "", "", nil } if runes[start] == '"' { // String literal literal, remaining, err := stringLiteral(runes[start:]) if err != nil { return "", "", err } return "\"" + literal + "\"", string(remaining), nil } end := start for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) { end++ } if start == end { end++ } return string(runes[start:end]), string(runes[end:]), nil } func parseToken(token string) (LexToken, error) { if strings.HasPrefix(token, "\"") { return LexToken{Type: Type_LiteralString, Value: token[1 : len(token)-1]}, nil } switch token { case "void": return LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil case "import": return LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil case "(": return LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil case ")": return LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil case "{": return LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil case "}": return LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil case ";": return LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil default: return LexToken{Type: Type_Identifier, Value: token}, nil } } func lexer(program string) ([]LexToken, error) { var tokens []LexToken for len(program) > 0 { token, rest, err := nextToken(program) if err != nil { return nil, err } if len(token) == 0 { break } lexToken, err := parseToken(token) if err != nil { return nil, err } program = rest tokens = append(tokens, lexToken) } return tokens, nil }