2024-03-10 22:48:57 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"slices"
|
2024-03-11 22:05:36 +01:00
|
|
|
"strconv"
|
2024-03-10 22:48:57 +01:00
|
|
|
"strings"
|
2024-03-11 22:05:36 +01:00
|
|
|
"unicode"
|
2024-03-10 22:48:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
var Whitespace []rune = []rune{' ', '\t', '\r', '\n'}
|
|
|
|
|
|
|
|
type LexType uint32
|
|
|
|
|
|
|
|
const (
|
|
|
|
Type_Identifier LexType = iota
|
|
|
|
Type_Keyword
|
|
|
|
Type_Separator
|
2024-03-11 22:05:36 +01:00
|
|
|
Type_Literal
|
2024-03-13 23:26:20 +01:00
|
|
|
Type_Operator
|
2024-03-10 22:48:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type Keyword uint32
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
var Keywords []string = []string{"import", "void", "return", "true", "false", "if", "else"}
|
2024-03-14 16:42:22 +01:00
|
|
|
|
2024-03-10 22:48:57 +01:00
|
|
|
const (
|
|
|
|
Keyword_Import Keyword = iota
|
|
|
|
Keyword_Void
|
2024-03-13 23:26:20 +01:00
|
|
|
Keyword_Return
|
2024-03-14 16:42:22 +01:00
|
|
|
Keyword_True
|
|
|
|
KeyWord_False
|
2024-03-23 14:03:20 +01:00
|
|
|
Keyword_If
|
|
|
|
Keyword_Else
|
2024-03-10 22:48:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type Separator uint32
|
|
|
|
|
2024-03-14 16:42:22 +01:00
|
|
|
var Separators []rune = []rune{'(', ')', '{', '}', ';', ','}
|
|
|
|
|
2024-03-10 22:48:57 +01:00
|
|
|
const (
|
|
|
|
Separator_OpenParen Separator = iota
|
|
|
|
Separator_CloseParen
|
|
|
|
Separator_OpenCurly
|
|
|
|
Separator_CloseCurly
|
|
|
|
Separator_Semicolon
|
2024-03-11 22:05:36 +01:00
|
|
|
Separator_Comma
|
2024-03-10 22:48:57 +01:00
|
|
|
)
|
|
|
|
|
2024-03-13 23:26:20 +01:00
|
|
|
type Operator uint32
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
var Operators []string = []string{"=", ">", "<", "!", "+", "-", "*", "/", "%", "==", ">=", "<=", "!=", "+=", "-=", "*=", "/=", "%="}
|
2024-03-14 16:42:22 +01:00
|
|
|
|
2024-03-13 23:26:20 +01:00
|
|
|
const (
|
|
|
|
Operator_Equals Operator = iota
|
|
|
|
Operator_Greater
|
|
|
|
Operator_Less
|
|
|
|
Operator_Not
|
|
|
|
Operator_Plus
|
|
|
|
Operator_Minus
|
|
|
|
Operator_Multiply
|
|
|
|
Operator_Divide
|
|
|
|
Operator_Modulo
|
2024-03-23 14:03:20 +01:00
|
|
|
Operator_EqualsEquals
|
|
|
|
Operator_GreaterEquals
|
|
|
|
Operator_LessEquals
|
|
|
|
Operator_NotEquals
|
|
|
|
Operator_PlusEquals
|
|
|
|
Operator_MinusEquals
|
|
|
|
Operator_MultiplyEquals
|
|
|
|
Operator_DivideEquals
|
|
|
|
Operator_ModuloEquals
|
2024-03-13 23:26:20 +01:00
|
|
|
)
|
|
|
|
|
2024-03-10 22:48:57 +01:00
|
|
|
type LiteralType uint32
|
|
|
|
|
|
|
|
const (
|
|
|
|
Literal_String LiteralType = iota
|
|
|
|
Literal_Number
|
2024-03-14 16:42:22 +01:00
|
|
|
Literal_Boolean
|
2024-03-10 22:48:57 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type LexToken struct {
|
|
|
|
Type LexType
|
|
|
|
Position uint64
|
|
|
|
Value any
|
|
|
|
}
|
|
|
|
|
|
|
|
type Literal struct {
|
2024-03-11 22:05:36 +01:00
|
|
|
Type LiteralType
|
|
|
|
Primitive PrimitiveType
|
|
|
|
Value any
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
type Lexer struct {
|
2024-03-21 19:55:05 +01:00
|
|
|
Runes []rune
|
|
|
|
LastTokenPosition uint64
|
|
|
|
Position uint64
|
2024-03-13 17:17:09 +01:00
|
|
|
}
|
2024-03-10 22:48:57 +01:00
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
func (l *Lexer) error(message string) error {
|
2024-03-21 19:55:05 +01:00
|
|
|
return CompilerError{Position: l.LastTokenPosition, Message: message}
|
2024-03-13 17:17:09 +01:00
|
|
|
}
|
2024-03-10 22:48:57 +01:00
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
func (l *Lexer) peekRune() *rune {
|
|
|
|
if len(l.Runes) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return &l.Runes[0]
|
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
func (l *Lexer) tryOperator() Operator {
|
|
|
|
var foundOp Operator = InvalidValue
|
|
|
|
var foundOpLen int = 0
|
|
|
|
|
|
|
|
str := string(l.Runes)
|
|
|
|
for i, operator := range Operators {
|
|
|
|
operatorLen := len([]rune(operator))
|
|
|
|
if operatorLen <= foundOpLen {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.HasPrefix(str, operator) {
|
|
|
|
foundOp = Operator(i)
|
|
|
|
foundOpLen = len([]rune(operator))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < foundOpLen; i++ {
|
|
|
|
l.nextRune()
|
|
|
|
}
|
|
|
|
|
|
|
|
return foundOp
|
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
func (l *Lexer) nextRune() *rune {
|
|
|
|
if len(l.Runes) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
r := l.Runes[0]
|
|
|
|
l.Runes = l.Runes[1:]
|
|
|
|
l.Position++
|
|
|
|
return &r
|
|
|
|
}
|
2024-03-10 22:48:57 +01:00
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
func (l *Lexer) stringLiteral() (string, error) {
|
2024-03-21 19:55:05 +01:00
|
|
|
l.LastTokenPosition = l.Position
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
openQuote := l.nextRune()
|
|
|
|
if openQuote == nil || *openQuote != '"' {
|
|
|
|
return "", l.error("expected \"")
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
literal := ""
|
|
|
|
for {
|
|
|
|
r := l.nextRune()
|
|
|
|
if r == nil {
|
|
|
|
return "", l.error("unexpected end of file")
|
|
|
|
}
|
|
|
|
|
|
|
|
if *r == '"' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
if *r == '\\' {
|
|
|
|
escaped := l.nextRune()
|
|
|
|
if escaped == nil {
|
|
|
|
return "", l.error("unmatched escape sequence")
|
|
|
|
}
|
|
|
|
|
|
|
|
literal += string(*escaped)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
literal += string(*r)
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
return literal, nil
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
// TODO: maybe this method should directly return LexToken
|
2024-03-23 14:03:20 +01:00
|
|
|
func (l *Lexer) nextToken() (*LexToken, error) {
|
2024-03-10 22:48:57 +01:00
|
|
|
// Skip whitespace
|
2024-03-13 17:17:09 +01:00
|
|
|
for {
|
|
|
|
r := l.peekRune()
|
|
|
|
if r == nil {
|
2024-03-23 14:03:20 +01:00
|
|
|
return nil, nil
|
2024-03-13 17:17:09 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if !slices.Contains(Whitespace, *r) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
l.nextRune()
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-21 19:55:05 +01:00
|
|
|
l.LastTokenPosition = l.Position
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
r := l.peekRune()
|
|
|
|
if r == nil {
|
2024-03-23 14:03:20 +01:00
|
|
|
return nil, nil
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
if *r == '"' {
|
|
|
|
literal, err := l.stringLiteral()
|
2024-03-10 22:48:57 +01:00
|
|
|
if err != nil {
|
2024-03-23 14:03:20 +01:00
|
|
|
return nil, err
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: literal}}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
op := l.tryOperator()
|
|
|
|
if op != InvalidValue {
|
|
|
|
return &LexToken{Type: Type_Operator, Position: l.LastTokenPosition, Value: op}, nil
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
token := ""
|
|
|
|
for {
|
|
|
|
r := l.peekRune()
|
2024-03-23 14:03:20 +01:00
|
|
|
if r == nil || slices.Contains(Whitespace, *r) || slices.Contains(Separators, *r) {
|
2024-03-13 17:17:09 +01:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
token += string(*l.nextRune())
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
if len(token) == 0 {
|
|
|
|
if len(l.Runes) == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
2024-03-11 22:05:36 +01:00
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
token = string(*l.nextRune())
|
2024-03-11 22:05:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
runes := []rune(token)
|
2024-03-20 19:26:48 +01:00
|
|
|
if unicode.IsDigit([]rune(token)[0]) {
|
2024-03-11 22:05:36 +01:00
|
|
|
// TODO: hexadecimal/binary/octal constants
|
|
|
|
|
|
|
|
var numberType PrimitiveType = InvalidValue
|
|
|
|
var rawNumber string = token
|
2024-03-17 19:55:28 +01:00
|
|
|
for i, name := range PRIMITIVE_TYPE_NAMES {
|
2024-03-11 22:05:36 +01:00
|
|
|
if strings.HasSuffix(token, name) {
|
|
|
|
numberType = PrimitiveType(i)
|
|
|
|
rawNumber = token[:len(token)-len(name)]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
containsDot := slices.Contains(runes, '.')
|
|
|
|
|
|
|
|
if numberType == InvalidValue {
|
|
|
|
if containsDot {
|
|
|
|
numberType = Primitive_F64
|
|
|
|
} else {
|
2024-03-20 19:26:48 +01:00
|
|
|
numberType = Primitive_I64
|
2024-03-11 22:05:36 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if containsDot && !isFloatingPoint(numberType) {
|
2024-03-21 19:55:05 +01:00
|
|
|
return nil, l.error("dot in non floating-point constant")
|
2024-03-11 22:05:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
number, err := parseNumber(rawNumber, numberType)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-03-21 19:55:05 +01:00
|
|
|
return &LexToken{Type: Type_Literal, Position: l.LastTokenPosition, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-14 16:42:22 +01:00
|
|
|
if len(runes) == 1 {
|
|
|
|
if idx := slices.Index(Separators, runes[0]); idx != -1 {
|
2024-03-21 19:55:05 +01:00
|
|
|
return &LexToken{Type: Type_Separator, Position: l.LastTokenPosition, Value: Separator(idx)}, nil
|
2024-03-14 16:42:22 +01:00
|
|
|
}
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
2024-03-14 16:42:22 +01:00
|
|
|
|
|
|
|
if idx := slices.Index(Keywords, token); idx != -1 {
|
2024-03-21 19:55:05 +01:00
|
|
|
return &LexToken{Type: Type_Keyword, Position: l.LastTokenPosition, Value: Keyword(idx)}, nil
|
2024-03-14 16:42:22 +01:00
|
|
|
}
|
|
|
|
|
2024-03-21 19:55:05 +01:00
|
|
|
return &LexToken{Type: Type_Identifier, Position: l.LastTokenPosition, Value: token}, nil
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
func parseNumber(raw string, numberType PrimitiveType) (any, error) {
|
|
|
|
// TODO: return compiler errors
|
|
|
|
if isSignedInt(numberType) {
|
|
|
|
return strconv.ParseInt(raw, 10, getBits(numberType))
|
|
|
|
}
|
|
|
|
|
|
|
|
if isUnsignedInt(numberType) {
|
|
|
|
return strconv.ParseUint(raw, 10, getBits(numberType))
|
|
|
|
}
|
|
|
|
|
|
|
|
if isFloatingPoint(numberType) {
|
|
|
|
return strconv.ParseFloat(raw, getBits(numberType))
|
|
|
|
}
|
|
|
|
|
|
|
|
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
|
|
|
|
}
|
|
|
|
|
2024-03-10 22:48:57 +01:00
|
|
|
func lexer(program string) ([]LexToken, error) {
|
|
|
|
var tokens []LexToken
|
|
|
|
|
2024-03-13 17:17:09 +01:00
|
|
|
lexer := Lexer{Runes: []rune(program)}
|
|
|
|
|
|
|
|
for {
|
|
|
|
token, err := lexer.nextToken()
|
2024-03-10 22:48:57 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
if token == nil {
|
2024-03-10 22:48:57 +01:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2024-03-23 14:03:20 +01:00
|
|
|
tokens = append(tokens, *token)
|
2024-03-10 22:48:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return tokens, nil
|
|
|
|
}
|