Rework lexer structure
This commit is contained in:
parent
f07754d79a
commit
0318a83099
170
lexer.go
170
lexer.go
@ -58,65 +58,112 @@ type Literal struct {
|
||||
Value any
|
||||
}
|
||||
|
||||
func stringLiteral(runes []rune) (string, []rune, error) {
|
||||
idx := 1 // Always starts with "
|
||||
literal := ""
|
||||
for idx < len(runes) && runes[idx] != '"' {
|
||||
if runes[idx] == '\\' {
|
||||
if idx == len(runes)-1 {
|
||||
return "", nil, errors.New("unmatched escape sequence")
|
||||
}
|
||||
|
||||
// TODO \n, \r, \uXXXX, ... escape sequences
|
||||
|
||||
idx++
|
||||
}
|
||||
|
||||
literal += string(runes[idx])
|
||||
idx++
|
||||
}
|
||||
|
||||
if idx == len(runes) {
|
||||
return "", nil, errors.New("unclosed string literal")
|
||||
}
|
||||
|
||||
idx++
|
||||
return literal, runes[idx:], nil
|
||||
type Lexer struct {
|
||||
Runes []rune
|
||||
Position uint64
|
||||
}
|
||||
|
||||
// source -> token, remaining source, error
|
||||
func nextToken(program string) (string, string, error) {
|
||||
// Skip whitespace
|
||||
start := 0
|
||||
runes := []rune(program)
|
||||
for start < len(runes) && slices.Contains(Whitespace, runes[start]) {
|
||||
start++
|
||||
func (l *Lexer) error(message string) error {
|
||||
return errors.New(message + " (at " + strconv.FormatUint(l.Position, 10) + ")")
|
||||
}
|
||||
|
||||
func (l *Lexer) peekRune() *rune {
|
||||
if len(l.Runes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if start == len(runes) {
|
||||
return "", "", nil
|
||||
return &l.Runes[0]
|
||||
}
|
||||
|
||||
func (l *Lexer) nextRune() *rune {
|
||||
if len(l.Runes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if runes[start] == '"' {
|
||||
// String literal
|
||||
literal, remaining, err := stringLiteral(runes[start:])
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
r := l.Runes[0]
|
||||
l.Runes = l.Runes[1:]
|
||||
l.Position++
|
||||
return &r
|
||||
}
|
||||
|
||||
func (l *Lexer) stringLiteral() (string, error) {
|
||||
openQuote := l.nextRune()
|
||||
if openQuote == nil || *openQuote != '"' {
|
||||
return "", l.error("expected \"")
|
||||
}
|
||||
|
||||
literal := ""
|
||||
for {
|
||||
r := l.nextRune()
|
||||
if r == nil {
|
||||
return "", l.error("unexpected end of file")
|
||||
}
|
||||
|
||||
return "\"" + literal + "\"", string(remaining), nil
|
||||
if *r == '"' {
|
||||
break
|
||||
}
|
||||
|
||||
if *r == '\\' {
|
||||
escaped := l.nextRune()
|
||||
if escaped == nil {
|
||||
return "", l.error("unmatched escape sequence")
|
||||
}
|
||||
|
||||
literal += string(*escaped)
|
||||
continue
|
||||
}
|
||||
|
||||
literal += string(*r)
|
||||
}
|
||||
|
||||
end := start
|
||||
for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) {
|
||||
end++
|
||||
return literal, nil
|
||||
}
|
||||
|
||||
// TODO: maybe this method should directly return LexToken
|
||||
func (l *Lexer) nextToken() (string, error) {
|
||||
// Skip whitespace
|
||||
for {
|
||||
r := l.peekRune()
|
||||
if r == nil {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if !slices.Contains(Whitespace, *r) {
|
||||
break
|
||||
}
|
||||
|
||||
l.nextRune()
|
||||
}
|
||||
|
||||
if start == end {
|
||||
end++
|
||||
r := l.peekRune()
|
||||
if r == nil {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
return string(runes[start:end]), string(runes[end:]), nil
|
||||
if *r == '"' {
|
||||
literal, err := l.stringLiteral()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "\"" + literal + "\"", nil
|
||||
}
|
||||
|
||||
token := ""
|
||||
for {
|
||||
r := l.peekRune()
|
||||
if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) {
|
||||
break
|
||||
}
|
||||
|
||||
token += string(*l.nextRune())
|
||||
}
|
||||
|
||||
if len(token) == 0 && len(l.Runes) > 0 {
|
||||
return string(*l.nextRune()), nil
|
||||
}
|
||||
|
||||
return token, nil
|
||||
}
|
||||
|
||||
func parseNumber(raw string, numberType PrimitiveType) (any, error) {
|
||||
@ -135,9 +182,9 @@ func parseNumber(raw string, numberType PrimitiveType) (any, error) {
|
||||
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
|
||||
}
|
||||
|
||||
func parseToken(token string) (*LexToken, error) {
|
||||
func (l *Lexer) parseToken(token string) (*LexToken, error) {
|
||||
if strings.HasPrefix(token, "\"") {
|
||||
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
|
||||
return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
|
||||
}
|
||||
|
||||
runes := []rune(token)
|
||||
@ -175,36 +222,38 @@ func parseToken(token string) (*LexToken, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
|
||||
return &LexToken{Type: Type_Literal, Position: l.Position, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
|
||||
}
|
||||
|
||||
switch token {
|
||||
case "void":
|
||||
return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
|
||||
return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Void}, nil
|
||||
case "import":
|
||||
return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
|
||||
return &LexToken{Type: Type_Keyword, Position: l.Position, Value: Keyword_Import}, nil
|
||||
case "(":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenParen}, nil
|
||||
case ")":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseParen}, nil
|
||||
case "{":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_OpenCurly}, nil
|
||||
case "}":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_CloseCurly}, nil
|
||||
case ";":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Semicolon}, nil
|
||||
case ",":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil
|
||||
return &LexToken{Type: Type_Separator, Position: l.Position, Value: Separator_Comma}, nil
|
||||
default:
|
||||
return &LexToken{Type: Type_Identifier, Value: token}, nil
|
||||
return &LexToken{Type: Type_Identifier, Position: l.Position, Value: token}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func lexer(program string) ([]LexToken, error) {
|
||||
var tokens []LexToken
|
||||
|
||||
for len(program) > 0 {
|
||||
token, rest, err := nextToken(program)
|
||||
lexer := Lexer{Runes: []rune(program)}
|
||||
|
||||
for {
|
||||
token, err := lexer.nextToken()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -213,12 +262,11 @@ func lexer(program string) ([]LexToken, error) {
|
||||
break
|
||||
}
|
||||
|
||||
lexToken, err := parseToken(token)
|
||||
lexToken, err := lexer.parseToken(token)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
program = rest
|
||||
tokens = append(tokens, *lexToken)
|
||||
}
|
||||
|
||||
|
2
main.go
2
main.go
@ -23,7 +23,7 @@ func main() {
|
||||
|
||||
log.Printf("Tokens:\n%+#v\n\n", tokens)
|
||||
|
||||
parser := Parser{tokens}
|
||||
parser := Parser{Tokens: tokens}
|
||||
parsed, err := parser.parseFile()
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
|
76
parser.go
76
parser.go
@ -2,8 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -131,24 +131,30 @@ type ParsedFile struct {
|
||||
}
|
||||
|
||||
type Parser struct {
|
||||
tokens []LexToken
|
||||
Tokens []LexToken
|
||||
Position uint64
|
||||
}
|
||||
|
||||
func (p *Parser) error(message string) error {
|
||||
return errors.New(message + " (at " + strconv.FormatUint(p.Position, 10) + ")")
|
||||
}
|
||||
|
||||
func (p *Parser) peekToken() *LexToken {
|
||||
if len(p.tokens) == 0 {
|
||||
if len(p.Tokens) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &p.tokens[0]
|
||||
return &p.Tokens[0]
|
||||
}
|
||||
|
||||
func (p *Parser) nextToken() *LexToken {
|
||||
if len(p.tokens) == 0 {
|
||||
if len(p.Tokens) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
token := p.tokens[0]
|
||||
p.tokens = p.tokens[1:]
|
||||
token := p.Tokens[0]
|
||||
p.Tokens = p.Tokens[1:]
|
||||
p.Position = token.Position
|
||||
return &token
|
||||
}
|
||||
|
||||
@ -160,7 +166,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) {
|
||||
|
||||
separator := p.nextToken()
|
||||
if separator == nil || separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) {
|
||||
return InvalidValue, errors.New("expected one of " + strings.Join(separatorNames, " "))
|
||||
return InvalidValue, p.error("expected one of " + strings.Join(separatorNames, " "))
|
||||
}
|
||||
|
||||
return separator.Value.(Separator), nil
|
||||
@ -169,7 +175,7 @@ func (p *Parser) expectSeparator(separators ...Separator) (Separator, error) {
|
||||
func (p *Parser) expectIdentifier() (string, error) {
|
||||
identifier := p.nextToken()
|
||||
if identifier == nil || identifier.Type != Type_Separator && identifier.Type != Type_Identifier {
|
||||
return "", errors.New("expected identifier")
|
||||
return "", p.error("expected identifier")
|
||||
}
|
||||
|
||||
return identifier.Value.(string), nil
|
||||
@ -180,12 +186,12 @@ func (p *Parser) expectImport() (*Import, error) {
|
||||
|
||||
importToken := p.nextToken()
|
||||
if importToken == nil || importToken.Type != Type_Keyword || importToken.Value.(Keyword) != Keyword_Import {
|
||||
return nil, errors.New("expected import")
|
||||
return nil, p.error("expected import")
|
||||
}
|
||||
|
||||
identifier := p.nextToken()
|
||||
if identifier == nil || identifier.Type != Type_Identifier {
|
||||
return nil, errors.New("expected identifier")
|
||||
return nil, p.error("expected identifier")
|
||||
}
|
||||
|
||||
_, err = p.expectSeparator(Separator_Semicolon)
|
||||
@ -201,7 +207,7 @@ func (p *Parser) expectType() (*Type, error) {
|
||||
|
||||
tok := p.nextToken()
|
||||
if tok == nil {
|
||||
return nil, errors.New("expected type")
|
||||
return nil, p.error("expected type")
|
||||
}
|
||||
|
||||
if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void {
|
||||
@ -232,7 +238,7 @@ func (p *Parser) expectType() (*Type, error) {
|
||||
}
|
||||
|
||||
if len(types) == 0 {
|
||||
return nil, errors.New("empty tuple")
|
||||
return nil, p.error("empty tuple")
|
||||
}
|
||||
|
||||
return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, nil
|
||||
@ -242,7 +248,32 @@ func (p *Parser) expectType() (*Type, error) {
|
||||
return &Type{Type: Type_Named, Value: tok.Value}, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("expected type")
|
||||
return nil, p.error("expected type")
|
||||
}
|
||||
|
||||
func (p *Parser) expectBlock() (*Block, error) {
|
||||
_, err := p.expectSeparator(Separator_OpenCurly)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var statements []Statement
|
||||
for {
|
||||
token := p.peekToken()
|
||||
if token == nil {
|
||||
return nil, p.error("expected statement or }")
|
||||
}
|
||||
|
||||
if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseCurly {
|
||||
p.nextToken()
|
||||
break
|
||||
}
|
||||
|
||||
// TODO: parse statement
|
||||
p.nextToken()
|
||||
}
|
||||
|
||||
return &Block{Statements: statements}, nil
|
||||
}
|
||||
|
||||
func (p *Parser) expectFunction() (*ParsedFunction, error) {
|
||||
@ -251,7 +282,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
|
||||
var name string
|
||||
var parameters []ParsedParameter
|
||||
var returnType *Type
|
||||
var body Block
|
||||
var body *Block
|
||||
|
||||
returnType, err = p.expectType()
|
||||
if err != nil {
|
||||
@ -271,7 +302,7 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
|
||||
for {
|
||||
token := p.peekToken()
|
||||
if token == nil {
|
||||
return nil, errors.New("incomplete function declaration")
|
||||
return nil, p.error("incomplete function declaration")
|
||||
}
|
||||
|
||||
if token.Type == Type_Separator && token.Value.(Separator) == Separator_CloseParen {
|
||||
@ -301,19 +332,12 @@ func (p *Parser) expectFunction() (*ParsedFunction, error) {
|
||||
parameters = append(parameters, ParsedParameter{Name: paramName, Type: *paramType})
|
||||
}
|
||||
|
||||
_, err = p.expectSeparator(Separator_OpenCurly)
|
||||
body, err = p.expectBlock()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO: body, closing curly
|
||||
|
||||
_, err = p.expectSeparator(Separator_CloseCurly)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: body}, nil
|
||||
return &ParsedFunction{Name: name, Parameters: parameters, ReturnType: *returnType, Body: *body}, nil
|
||||
}
|
||||
|
||||
func (p *Parser) parseFile() (*ParsedFile, error) {
|
||||
@ -328,8 +352,6 @@ func (p *Parser) parseFile() (*ParsedFile, error) {
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("%+#v\n", token)
|
||||
|
||||
if token.Type == Type_Keyword && token.Value.(Keyword) == Keyword_Import {
|
||||
var parsedImport *Import
|
||||
parsedImport, err = p.expectImport()
|
||||
|
Loading…
Reference in New Issue
Block a user