Rework lexer structure

This commit is contained in:
MrLetsplay 2024-03-13 17:17:09 +01:00
parent f07754d79a
commit 0318a83099
Signed by: mr
SSH Key Fingerprint: SHA256:92jBH80vpXyaZHjaIl47pjRq+Yt7XGTArqQg1V7hSqg
3 changed files with 159 additions and 89 deletions

170
lexer.go
View File

@ -58,65 +58,112 @@ type Literal struct {
Value any
}
func stringLiteral(runes []rune) (string, []rune, error) {
idx := 1 // Always starts with "
literal := ""
for idx < len(runes) && runes[idx] != '"' {
if runes[idx] == '\\' {
if idx == len(runes)-1 {
return "", nil, errors.New("unmatched escape sequence")
}
// TODO \n, \r, \uXXXX, ... escape sequences
idx++
}
literal += string(runes[idx])
idx++
}
if idx == len(runes) {
return "", nil, errors.New("unclosed string literal")
}
idx++
return literal, runes[idx:], nil
type Lexer struct {
Runes []rune
Position uint64
}
// source -> token, remaining source, error
func nextToken(program string) (string, string, error) {
// Skip whitespace
start := 0
runes := []rune(program)
for start < len(runes) && slices.Contains(Whitespace, runes[start]) {
start++
func (l *Lexer) error(message string) error {
return errors.New(message + " (at " + strconv.FormatUint(l.Position, 10) + ")")
}
func (l *Lexer) peekRune() *rune {
if len(l.Runes) == 0 {
return nil
}
if start == len(runes) {
return "", "", nil
return &l.Runes[0]
}
func (l *Lexer) nextRune() *rune {
if len(l.Runes) == 0 {
return nil
}
if runes[start] == '"' {
// String literal
literal, remaining, err := stringLiteral(runes[start:])
if err != nil {
return "", "", err
r := l.Runes[0]
l.Runes = l.Runes[1:]
l.Position++
return &r
}
func (l *Lexer) stringLiteral() (string, error) {
openQuote := l.nextRune()
if openQuote == nil || *openQuote != '"' {
return "", l.error("expected \"")
}
literal := ""
for {
r := l.nextRune()
if r == nil {
return "", l.error("unexpected end of file")
}
return "\"" + literal + "\"", string(remaining), nil
if *r == '"' {
break
}
if *r == '\\' {
escaped := l.nextRune()
if escaped == nil {
return "", l.error("unmatched escape sequence")
}
literal += string(*escaped)
continue
}
literal += string(*r)
}
end := start
for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) {
end++
return literal, nil
}
// TODO: maybe this method should directly return LexToken
func (l *Lexer) nextToken() (string, error) {
// Skip whitespace
for {
r := l.peekRune()
if r == nil {
return "", nil
}
if !slices.Contains(Whitespace, *r) {
break
}
l.nextRune()
}
if start == end {
end++
r := l.peekRune()
if r == nil {
return "", nil
}
return string(runes[start:end]), string(runes[end:]), nil
if *r == '"' {
literal, err := l.stringLiteral()
if err != nil {
return "", err
}
return "\"" + literal + "\"", nil
}
token := ""
for {
r := l.peekRune()
if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) {
break
}
token += string(*l.nextRune())
}
if len(token) == 0 && len(l.Runes) > 0 {
return string(*l.nextRune()), nil
}
return token, nil
}
func parseNumber(raw string, numberType PrimitiveType) (any, error) {
@ -135,9 +182,9 @@ func parseNumber(raw string, numberType PrimitiveType) (any, error) {
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
}
func parseToken(token string) (*LexToken, error) {
func (l *Lexer) parseToken(token string) (*LexToken, error) {
if strings.HasPrefix(token, "\"") {
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
}
runes := []rune(token)
@ -175,36 +222,38 @@ func parseToken(token string) (*LexToken, error) {
return nil, err
}
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
}
switch token {
case "void":
return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Void}, nil
case "import":
return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Import}, nil
case "(":
return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenParen}, nil
case ")":
return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseParen}, nil
case "{":
return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenCurly}, nil
case "}":
return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseCurly}, nil
case ";":
return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Semicolon}, nil
case ",":
return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Comma}, nil
default:
return &LexToken{Type: Type_Identifier, Value: token}, nil
return &LexToken{Type: Type_Identifier, Position: l.Position, Value: token}, nil
}
}
func lexer(program string) ([]LexToken, error) {
var tokens []LexToken
for len(program) > 0 {
token, rest, err := nextToken(program)
lexer := Lexer{Runes: []rune(program)}
for {
token, err := lexer.nextToken()
if err != nil {
return nil, err
}
@ -213,12 +262,11 @@ func lexer(program string) ([]LexToken, error) {
break
}
lexToken, err := parseToken(token)
lexToken, err := lexer.parseToken(token)
if err != nil {
return nil, err
}
program = rest
tokens = append(tokens, *lexToken)
}

View File

@ -23,7 +23,7 @@ func main() {
log.Printf("Tokens:\n%+#v\n\n", tokens)
parser := Parser{tokens}
parser := Parser{Tokens: tokens}
parsed, err := parser.parseFile()
if err != nil {
log.Fatalln(err)

View File

@ -2,8 +2,8 @@ package main
import (
"errors"
"log"
"slices"
"strconv"
"strings"
)
@ -131,24 +131,30 @@ type ParsedFile struct {
}
type Parser struct {
tokens []LexToken
Tokens []LexToken
Position uint64
}
func (p *Parser) error(message string) error {
return errors.New(message + " (at " + strconv.FormatUint(p.Position, 10) + ")")
}
func (p *Parser) peekToken() *LexToken {
if len(p.tokens) == 0 {
if len(p.Tokens) == 0 {
return nil
}
return &p.tokens[0]
return &p.Tokens[0]
}
func (p *Parser) nextToken() *LexToken {
if len(p.tokens) == 0 {
if len(p.Tokens) == 0 {
return nil
}
token := p.tokens[0]
p.tokens = p.tokens[1:]
token := p.Tokens[0]
p.Tokens = p.Tokens[1:]
p.Position = token.Position
return &token
}
@ -160,7 +166,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) {
separator := p.nextToken()
if separator == nil || separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) {
return InvalidValue, errors.New("expected one of " + strings.Join(separatorNames, " "))
return InvalidValue, p.error("expected one of " + strings.Join(separatorNames, " "))
}
return separator.Value.(Separator), nil
@ -169,7 +175,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) {
func (p *Parser) expectIdentifier() (string, error) {
identifier := p.nextToken()
if identifier == nil || identifier.Type != Type_Separator && identifier.Type != Type_Identifier {
return "", errors.New("expected identifier")
return "", p.error("expected identifier")
}
return identifier.Value.(string), nil
@ -180,12 +186,12 @@ func (p *Parser) expectImport() (*Import, error) {
importToken := p.nextToken()
if importToken == nil || importToken.Type != Type_Keyword || importToken.Value.(Keyword) != Keyword_Import {
return nil, errors.New("expected import")
return nil, p.error("expected import")
}
identifier := p.nextToken()
if identifier == nil || identifier.Type != Type_Identifier {
return nil, errors.New("expected identifier")
return nil, p.error("expected identifier")
}
_, err = p.expectSeparator(Separator_Semicolon)
@ -201,7 +207,7 @@ func (p *Parser) expectType() (*Type, error) {
tok := p.nextToken()
if tok == nil {
return nil, errors.New("expected type")
return nil, p.error("expected type")
}
if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void {
@ -232,7 +238,7 @@ func (p *Parser) expectType() (*Type, error) {
}
if len(types) == 0 {
return nil, errors.New("empty tuple")
return nil, p.error("empty tuple")
}
return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, nil
@ -242,7 +248,32 @@ func (p *Parser) expectType() (*Type, error) {
return &Type{Type: Type_Named, Value: tok.Value}, nil
}
return nil, errors.New("expected type")
return nil, p.error("expected type")
}
func (p *Parser) expectBlock() (*Block, error) {
_, err := p.expectSeparator(Separator_OpenCurly)
if err != nil {
return nil, err
}
var statements []Statement
for {
token := p.peekToken()
if token == nil {
return nil, p.error("expected statement or }")
}
if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseCurly {
p.nextToken()
break
}
// TODO: parse statement
p.nextToken()
}
return &Block{Statements: statements}, nil
}
func (p *Parser) expectFunction() (*ParsedFunction, error) {
@ -251,7 +282,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
var name string
var parameters []ParsedParameter
var returnType *Type
var body Block
var body *Block
returnType, err = p.expectType()
if err != nil {
@ -271,7 +302,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
for {
token := p.peekToken()
if token == nil {
return nil, errors.New("incomplete function declaration")
return nil, p.error("incomplete function declaration")
}
if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseParen {
@ -301,19 +332,12 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
parameters = append(parameters, ParsedParameter{Name: paramName, Type: *paramType})
}
_, err = p.expectSeparator(Separator_OpenCurly)
body, err = p.expectBlock()
if err != nil {
return nil, err
}
// TODO: body, closing curly
_, err = p.expectSeparator(Separator_CloseCurly)
if err != nil {
return nil, err
}
return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: body}, nil
return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: *body}, nil
}
func (p *Parser) parseFile() (*ParsedFile, error) {
@ -328,8 +352,6 @@ func (p *Parser) parseFile() (*ParsedFile, error) {
break
}
log.Printf("%+#v\n", token)
if token.Type == Type_Keyword && token.Value.(Keyword) == Keyword_Import {
var parsedImport *Import
parsedImport, err = p.expectImport()