227 lines
4.8 KiB
Go
227 lines
4.8 KiB
Go
package main
|
|
|
|
import (
|
|
"errors"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
var Whitespace []rune = []rune{' ', '\t', '\r', '\n'}
|
|
var Separators []rune = []rune{'(', ')', '{', '}', ';', ','}
|
|
var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'}
|
|
|
|
type LexType uint32
|
|
|
|
const (
|
|
Type_Identifier LexType = iota
|
|
Type_Keyword
|
|
Type_Separator
|
|
Type_Literal
|
|
)
|
|
|
|
type Keyword uint32
|
|
|
|
const (
|
|
Keyword_Import Keyword = iota
|
|
Keyword_Void
|
|
)
|
|
|
|
type Separator uint32
|
|
|
|
const (
|
|
Separator_OpenParen Separator = iota
|
|
Separator_CloseParen
|
|
Separator_OpenCurly
|
|
Separator_CloseCurly
|
|
Separator_Semicolon
|
|
Separator_Comma
|
|
)
|
|
|
|
type LiteralType uint32
|
|
|
|
const (
|
|
Literal_String LiteralType = iota
|
|
Literal_Number
|
|
)
|
|
|
|
type LexToken struct {
|
|
Type LexType
|
|
Position uint64
|
|
Value any
|
|
}
|
|
|
|
type Literal struct {
|
|
Type LiteralType
|
|
Primitive PrimitiveType
|
|
Value any
|
|
}
|
|
|
|
func stringLiteral(runes []rune) (string, []rune, error) {
|
|
idx := 1 // Always starts with "
|
|
literal := ""
|
|
for idx < len(runes) && runes[idx] != '"' {
|
|
if runes[idx] == '\\' {
|
|
if idx == len(runes)-1 {
|
|
return "", nil, errors.New("unmatched escape sequence")
|
|
}
|
|
|
|
// TODO \n, \r, \uXXXX, ... escape sequences
|
|
|
|
idx++
|
|
}
|
|
|
|
literal += string(runes[idx])
|
|
idx++
|
|
}
|
|
|
|
if idx == len(runes) {
|
|
return "", nil, errors.New("unclosed string literal")
|
|
}
|
|
|
|
idx++
|
|
return literal, runes[idx:], nil
|
|
}
|
|
|
|
// source -> token, remaining source, error
|
|
func nextToken(program string) (string, string, error) {
|
|
// Skip whitespace
|
|
start := 0
|
|
runes := []rune(program)
|
|
for start < len(runes) && slices.Contains(Whitespace, runes[start]) {
|
|
start++
|
|
}
|
|
|
|
if start == len(runes) {
|
|
return "", "", nil
|
|
}
|
|
|
|
if runes[start] == '"' {
|
|
// String literal
|
|
literal, remaining, err := stringLiteral(runes[start:])
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
return "\"" + literal + "\"", string(remaining), nil
|
|
}
|
|
|
|
end := start
|
|
for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) {
|
|
end++
|
|
}
|
|
|
|
if start == end {
|
|
end++
|
|
}
|
|
|
|
return string(runes[start:end]), string(runes[end:]), nil
|
|
}
|
|
|
|
func parseNumber(raw string, numberType PrimitiveType) (any, error) {
|
|
if isSignedInt(numberType) {
|
|
return strconv.ParseInt(raw, 10, getBits(numberType))
|
|
}
|
|
|
|
if isUnsignedInt(numberType) {
|
|
return strconv.ParseUint(raw, 10, getBits(numberType))
|
|
}
|
|
|
|
if isFloatingPoint(numberType) {
|
|
return strconv.ParseFloat(raw, getBits(numberType))
|
|
}
|
|
|
|
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
|
|
}
|
|
|
|
func parseToken(token string) (*LexToken, error) {
|
|
if strings.HasPrefix(token, "\"") {
|
|
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
|
|
}
|
|
|
|
runes := []rune(token)
|
|
startsWithMinus := runes[0] == '-'
|
|
if startsWithMinus || unicode.IsDigit([]rune(token)[0]) {
|
|
// TODO: hexadecimal/binary/octal constants
|
|
|
|
var numberType PrimitiveType = InvalidValue
|
|
var rawNumber string = token
|
|
for i, name := range NumberTypeNames {
|
|
if strings.HasSuffix(token, name) {
|
|
numberType = PrimitiveType(i)
|
|
rawNumber = token[:len(token)-len(name)]
|
|
}
|
|
}
|
|
|
|
containsDot := slices.Contains(runes, '.')
|
|
|
|
if numberType == InvalidValue {
|
|
if containsDot {
|
|
numberType = Primitive_F64
|
|
} else if startsWithMinus {
|
|
numberType = Primitive_I64
|
|
} else {
|
|
numberType = Primitive_U64
|
|
}
|
|
}
|
|
|
|
if containsDot && !isFloatingPoint(numberType) {
|
|
return nil, errors.New("dot in non floating-point constant")
|
|
}
|
|
|
|
number, err := parseNumber(rawNumber, numberType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
|
|
}
|
|
|
|
switch token {
|
|
case "void":
|
|
return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
|
|
case "import":
|
|
return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
|
|
case "(":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
|
|
case ")":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
|
|
case "{":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
|
|
case "}":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
|
|
case ";":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
|
|
case ",":
|
|
return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil
|
|
default:
|
|
return &LexToken{Type: Type_Identifier, Value: token}, nil
|
|
}
|
|
}
|
|
|
|
func lexer(program string) ([]LexToken, error) {
|
|
var tokens []LexToken
|
|
|
|
for len(program) > 0 {
|
|
token, rest, err := nextToken(program)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(token) == 0 {
|
|
break
|
|
}
|
|
|
|
lexToken, err := parseToken(token)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
program = rest
|
|
tokens = append(tokens, *lexToken)
|
|
}
|
|
|
|
return tokens, nil
|
|
}
|