From 0318a830992b3f87599570446e354d3d6d4e1f9c Mon Sep 17 00:00:00 2001 From: MrLetsplay Date: Wed, 13 Mar 2024 17:17:09 +0100 Subject: [PATCH] Rework lexer structure --- lexer.go | 170 ++++++++++++++++++++++++++++++++++-------------------- main.go | 2 +- parser.go | 76 +++++++++++++++--------- 3 files changed, 159 insertions(+), 89 deletions(-) diff --git a/lexer.go b/lexer.go index 4fc10c5..d857c88 100644 --- a/lexer.go +++ b/lexer.go @@ -58,65 +58,112 @@ type Literal struct { Value any } -func stringLiteral(runes []rune) (string, []rune, error) { - idx := 1 // Always starts with " - literal := "" - for idx < len(runes) && runes[idx] != '"' { - if runes[idx] == '\\' { - if idx == len(runes)-1 { - return "", nil, errors.New("unmatched escape sequence") - } - - // TODO \n, \r, \uXXXX, ... escape sequences - - idx++ - } - - literal += string(runes[idx]) - idx++ - } - - if idx == len(runes) { - return "", nil, errors.New("unclosed string literal") - } - - idx++ - return literal, runes[idx:], nil +type Lexer struct { + Runes []rune + Position uint64 } -// source -> token, remaining source, error -func nextToken(program string) (string, string, error) { - // Skip whitespace - start := 0 - runes := []rune(program) - for start < len(runes) && slices.Contains(Whitespace, runes[start]) { - start++ +func (l *Lexer) error(message string) error { + return errors.New(message + " (at " + strconv.FormatUint(l.Position, 10) + ")") +} + +func (l *Lexer) peekRune() *rune { + if len(l.Runes) == 0 { + return nil } - if start == len(runes) { - return "", "", nil + return &l.Runes[0] +} + +func (l *Lexer) nextRune() *rune { + if len(l.Runes) == 0 { + return nil } - if runes[start] == '"' { - // String literal - literal, remaining, err := stringLiteral(runes[start:]) - if err != nil { - return "", "", err + r := l.Runes[0] + l.Runes = l.Runes[1:] + l.Position++ + return &r +} + +func (l *Lexer) stringLiteral() (string, error) { + openQuote := l.nextRune() + if openQuote == nil || *openQuote != '"' { + return "", l.error("expected \"") + } + + literal := "" + for { + r := l.nextRune() + if r == nil { + return "", l.error("unexpected end of file") } - return "\"" + literal + "\"", string(remaining), nil + if *r == '"' { + break + } + + if *r == '\\' { + escaped := l.nextRune() + if escaped == nil { + return "", l.error("unmatched escape sequence") + } + + literal += string(*escaped) + continue + } + + literal += string(*r) } - end := start - for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) { - end++ + return literal, nil +} + +// TODO: maybe this method should directly return LexToken +func (l *Lexer) nextToken() (string, error) { + // Skip whitespace + for { + r := l.peekRune() + if r == nil { + return "", nil + } + + if !slices.Contains(Whitespace, *r) { + break + } + + l.nextRune() } - if start == end { - end++ + r := l.peekRune() + if r == nil { + return "", nil } - return string(runes[start:end]), string(runes[end:]), nil + if *r == '"' { + literal, err := l.stringLiteral() + if err != nil { + return "", err + } + + return "\"" + literal + "\"", nil + } + + token := "" + for { + r := l.peekRune() + if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) { + break + } + + token += string(*l.nextRune()) + } + + if len(token) == 0 && len(l.Runes) > 0 { + return string(*l.nextRune()), nil + } + + return token, nil } func parseNumber(raw string, numberType PrimitiveType) (any, error) { @@ -135,9 +182,9 @@ func parseNumber(raw string, numberType PrimitiveType) (any, error) { panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()") } -func parseToken(token string) (*LexToken, error) { +func (l *Lexer) parseToken(token string) (*LexToken, error) { if strings.HasPrefix(token, "\"") { - return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil + return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil } runes := []rune(token) @@ -175,36 +222,38 @@ func parseToken(token string) (*LexToken, error) { return nil, err } - return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil + return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } switch token { case "void": - return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil + return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Void}, nil case "import": - return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil + return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Import}, nil case "(": - return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenParen}, nil case ")": - return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseParen}, nil case "{": - return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenCurly}, nil case "}": - return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseCurly}, nil case ";": - return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Semicolon}, nil case ",": - return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil + return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Comma}, nil default: - return &LexToken{Type: Type_Identifier, Value: token}, nil + return &LexToken{Type: Type_Identifier, Position: l.Position, Value: token}, nil } } func lexer(program string) ([]LexToken, error) { var tokens []LexToken - for len(program) > 0 { - token, rest, err := nextToken(program) + lexer := Lexer{Runes: []rune(program)} + + for { + token, err := lexer.nextToken() if err != nil { return nil, err } @@ -213,12 +262,11 @@ func lexer(program string) ([]LexToken, error) { break } - lexToken, err := parseToken(token) + lexToken, err := lexer.parseToken(token) if err != nil { return nil, err } - program = rest tokens = append(tokens, *lexToken) } diff --git a/main.go b/main.go index c80b813..958aa35 100644 --- a/main.go +++ b/main.go @@ -23,7 +23,7 @@ func main() { log.Printf("Tokens:\n%+#v\n\n", tokens) - parser := Parser{tokens} + parser := Parser{Tokens: tokens} parsed, err := parser.parseFile() if err != nil { log.Fatalln(err) diff --git a/parser.go b/parser.go index 2479aaa..dbe3b3e 100644 --- a/parser.go +++ b/parser.go @@ -2,8 +2,8 @@ package main import ( "errors" - "log" "slices" + "strconv" "strings" ) @@ -131,24 +131,30 @@ type ParsedFile struct { } type Parser struct { - tokens []LexToken + Tokens []LexToken + Position uint64 +} + +func (p *Parser) error(message string) error { + return errors.New(message + " (at " + strconv.FormatUint(p.Position, 10) + ")") } func (p *Parser) peekToken() *LexToken { - if len(p.tokens) == 0 { + if len(p.Tokens) == 0 { return nil } - return &p.tokens[0] + return &p.Tokens[0] } func (p *Parser) nextToken() *LexToken { - if len(p.tokens) == 0 { + if len(p.Tokens) == 0 { return nil } - token := p.tokens[0] - p.tokens = p.tokens[1:] + token := p.Tokens[0] + p.Tokens = p.Tokens[1:] + p.Position = token.Position return &token } @@ -160,7 +166,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) { separator := p.nextToken() if separator == nil || separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) { - return InvalidValue, errors.New("expected one of " + strings.Join(separatorNames, " ")) + return InvalidValue, p.error("expected one of " + strings.Join(separatorNames, " ")) } return separator.Value.(Separator), nil @@ -169,7 +175,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) { func (p *Parser) expectIdentifier() (string, error) { identifier := p.nextToken() if identifier == nil || identifier.Type != Type_Separator && identifier.Type != Type_Identifier { - return "", errors.New("expected identifier") + return "", p.error("expected identifier") } return identifier.Value.(string), nil @@ -180,12 +186,12 @@ func (p *Parser) expectImport() (*Import, error) { importToken := p.nextToken() if importToken == nil || importToken.Type != Type_Keyword || importToken.Value.(Keyword) != Keyword_Import { - return nil, errors.New("expected import") + return nil, p.error("expected import") } identifier := p.nextToken() if identifier == nil || identifier.Type != Type_Identifier { - return nil, errors.New("expected identifier") + return nil, p.error("expected identifier") } _, err = p.expectSeparator(Separator_Semicolon) @@ -201,7 +207,7 @@ func (p *Parser) expectType() (*Type, error) { tok := p.nextToken() if tok == nil { - return nil, errors.New("expected type") + return nil, p.error("expected type") } if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void { @@ -232,7 +238,7 @@ func (p *Parser) expectType() (*Type, error) { } if len(types) == 0 { - return nil, errors.New("empty tuple") + return nil, p.error("empty tuple") } return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, nil @@ -242,7 +248,32 @@ func (p *Parser) expectType() (*Type, error) { return &Type{Type: Type_Named, Value: tok.Value}, nil } - return nil, errors.New("expected type") + return nil, p.error("expected type") +} + +func (p *Parser) expectBlock() (*Block, error) { + _, err := p.expectSeparator(Separator_OpenCurly) + if err != nil { + return nil, err + } + + var statements []Statement + for { + token := p.peekToken() + if token == nil { + return nil, p.error("expected statement or }") + } + + if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseCurly { + p.nextToken() + break + } + + // TODO: parse statement + p.nextToken() + } + + return &Block{Statements: statements}, nil } func (p *Parser) expectFunction() (*ParsedFunction, error) { @@ -251,7 +282,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) { var name string var parameters []ParsedParameter var returnType *Type - var body Block + var body *Block returnType, err = p.expectType() if err != nil { @@ -271,7 +302,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) { for { token := p.peekToken() if token == nil { - return nil, errors.New("incomplete function declaration") + return nil, p.error("incomplete function declaration") } if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseParen { @@ -301,19 +332,12 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) { parameters = append(parameters, ParsedParameter{Name: paramName, Type: *paramType}) } - _, err = p.expectSeparator(Separator_OpenCurly) + body, err = p.expectBlock() if err != nil { return nil, err } - // TODO: body, closing curly - - _, err = p.expectSeparator(Separator_CloseCurly) - if err != nil { - return nil, err - } - - return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: body}, nil + return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: *body}, nil } func (p *Parser) parseFile() (*ParsedFile, error) { @@ -328,8 +352,6 @@ func (p *Parser) parseFile() (*ParsedFile, error) { break } - log.Printf("%+#v\n", token) - if token.Type == Type_Keyword && token.Value.(Keyword) == Keyword_Import { var parsedImport *Import parsedImport, err = p.expectImport()