Basic parsing (WIP)
This commit is contained in:
parent
1ce2505427
commit
c22fa66492
129
compiler.go
129
compiler.go
@ -1,129 +0,0 @@
|
||||
package main
|
||||
|
||||
type PrimitiveType uint32
|
||||
|
||||
const (
|
||||
Primitive_I8 PrimitiveType = iota
|
||||
Primitive_I16
|
||||
Primitive_I32
|
||||
Primitive_I64
|
||||
Primitive_U8
|
||||
Primitive_U16
|
||||
Primitive_U32
|
||||
Primitive_U64
|
||||
)
|
||||
|
||||
type TypeType uint32
|
||||
|
||||
const (
|
||||
Type_Primitive TypeType = iota
|
||||
Type_Named
|
||||
Type_Array
|
||||
Type_Tuple
|
||||
)
|
||||
|
||||
type Type struct {
|
||||
Type TypeType
|
||||
Value any
|
||||
}
|
||||
|
||||
type NamedType struct {
|
||||
TypeName string
|
||||
}
|
||||
|
||||
type ArrayType struct {
|
||||
ElementType Type
|
||||
}
|
||||
|
||||
type TupleType struct {
|
||||
Types []Type
|
||||
}
|
||||
|
||||
type StatementType uint32
|
||||
|
||||
const (
|
||||
Statement_Expression StatementType = iota
|
||||
Statement_Block
|
||||
Statement_Return
|
||||
Statement_DeclareLocalVariable
|
||||
)
|
||||
|
||||
type Statement struct {
|
||||
Type StatementType
|
||||
Value any
|
||||
}
|
||||
|
||||
type BlockStatement struct {
|
||||
Block Block
|
||||
}
|
||||
|
||||
type ReturnStatement struct {
|
||||
Value *Expression
|
||||
}
|
||||
|
||||
type DeclareLocalVariableStatement struct {
|
||||
Variable string
|
||||
Initializer Expression
|
||||
}
|
||||
|
||||
type ExpressionType uint32
|
||||
|
||||
const (
|
||||
Expression_Assignment ExpressionType = iota
|
||||
Expression_Literal
|
||||
Expression_VariableReference
|
||||
Expression_Arithmetic
|
||||
)
|
||||
|
||||
type Expression struct {
|
||||
Type ExpressionType
|
||||
Value any
|
||||
}
|
||||
|
||||
type AssignmentExpression struct {
|
||||
Variable string
|
||||
Value Expression
|
||||
}
|
||||
|
||||
type LiteralExpression struct {
|
||||
Type PrimitiveType
|
||||
Value any
|
||||
}
|
||||
|
||||
type VariableReferenceExpression struct {
|
||||
Variable string
|
||||
}
|
||||
|
||||
type ArithmeticOperation uint32
|
||||
|
||||
const (
|
||||
Arithmetic_Add ArithmeticOperation = iota
|
||||
Arithmetic_Sub
|
||||
Arithmetic_Mul
|
||||
Arithmetic_Div
|
||||
Arithmetic_Mod
|
||||
)
|
||||
|
||||
type ArithmeticExpression struct {
|
||||
Operation ArithmeticOperation
|
||||
Left Expression
|
||||
Right Expression
|
||||
}
|
||||
|
||||
type Block struct {
|
||||
Statements []Statement
|
||||
}
|
||||
|
||||
type CompiledFunction struct {
|
||||
Parameters []Type
|
||||
ReturnValue Type
|
||||
Body Block
|
||||
}
|
||||
|
||||
type CompiledFile struct {
|
||||
Functions []CompiledFunction
|
||||
}
|
||||
|
||||
func compiler() (*CompiledFile, error) {
|
||||
return nil, nil
|
||||
}
|
@ -1,6 +1,12 @@
|
||||
import lang.stdlib;
|
||||
|
||||
(u8, u8) a(u8 a, u8 b) {
|
||||
return a, b
|
||||
}
|
||||
|
||||
void main() {
|
||||
u8 a = 1u8;
|
||||
f32 b = 1.5f32;
|
||||
f32 c = 1.6;
|
||||
println("Hello World");
|
||||
}
|
||||
|
88
lexer.go
88
lexer.go
@ -3,11 +3,14 @@ package main
|
||||
import (
|
||||
"errors"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var Whitespace []rune = []rune{' ', '\t', '\r', '\n'}
|
||||
var Separators []rune = []rune{'(', ')', '{', '}', ';'}
|
||||
var Separators []rune = []rune{'(', ')', '{', '}', ';', ','}
|
||||
var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'}
|
||||
|
||||
type LexType uint32
|
||||
|
||||
@ -15,8 +18,7 @@ const (
|
||||
Type_Identifier LexType = iota
|
||||
Type_Keyword
|
||||
Type_Separator
|
||||
Type_LiteralString
|
||||
Type_LiteralNumber
|
||||
Type_Literal
|
||||
)
|
||||
|
||||
type Keyword uint32
|
||||
@ -34,6 +36,7 @@ const (
|
||||
Separator_OpenCurly
|
||||
Separator_CloseCurly
|
||||
Separator_Semicolon
|
||||
Separator_Comma
|
||||
)
|
||||
|
||||
type LiteralType uint32
|
||||
@ -51,6 +54,7 @@ type LexToken struct {
|
||||
|
||||
type Literal struct {
|
||||
Type LiteralType
|
||||
Primitive PrimitiveType
|
||||
Value any
|
||||
}
|
||||
|
||||
@ -115,28 +119,84 @@ func nextToken(program string) (string, string, error) {
|
||||
return string(runes[start:end]), string(runes[end:]), nil
|
||||
}
|
||||
|
||||
func parseToken(token string) (LexToken, error) {
|
||||
func parseNumber(raw string, numberType PrimitiveType) (any, error) {
|
||||
if isSignedInt(numberType) {
|
||||
return strconv.ParseInt(raw, 10, getBits(numberType))
|
||||
}
|
||||
|
||||
if isUnsignedInt(numberType) {
|
||||
return strconv.ParseUint(raw, 10, getBits(numberType))
|
||||
}
|
||||
|
||||
if isFloatingPoint(numberType) {
|
||||
return strconv.ParseFloat(raw, getBits(numberType))
|
||||
}
|
||||
|
||||
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
|
||||
}
|
||||
|
||||
func parseToken(token string) (*LexToken, error) {
|
||||
if strings.HasPrefix(token, "\"") {
|
||||
return LexToken{Type: Type_LiteralString, Value: token[1 : len(token)-1]}, nil
|
||||
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
|
||||
}
|
||||
|
||||
runes := []rune(token)
|
||||
startsWithMinus := runes[0] == '-'
|
||||
if startsWithMinus || unicode.IsDigit([]rune(token)[0]) {
|
||||
// TODO: hexadecimal/binary/octal constants
|
||||
|
||||
var numberType PrimitiveType = InvalidValue
|
||||
var rawNumber string = token
|
||||
for i, name := range NumberTypeNames {
|
||||
if strings.HasSuffix(token, name) {
|
||||
numberType = PrimitiveType(i)
|
||||
rawNumber = token[:len(token)-len(name)]
|
||||
}
|
||||
}
|
||||
|
||||
containsDot := slices.Contains(runes, '.')
|
||||
|
||||
if numberType == InvalidValue {
|
||||
if containsDot {
|
||||
numberType = Primitive_F64
|
||||
} else if startsWithMinus {
|
||||
numberType = Primitive_I64
|
||||
} else {
|
||||
numberType = Primitive_U64
|
||||
}
|
||||
}
|
||||
|
||||
if containsDot && !isFloatingPoint(numberType) {
|
||||
return nil, errors.New("dot in non floating-point constant")
|
||||
}
|
||||
|
||||
number, err := parseNumber(rawNumber, numberType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
|
||||
}
|
||||
|
||||
switch token {
|
||||
case "void":
|
||||
return LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
|
||||
return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
|
||||
case "import":
|
||||
return LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
|
||||
return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
|
||||
case "(":
|
||||
return LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
|
||||
case ")":
|
||||
return LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
|
||||
case "{":
|
||||
return LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
|
||||
case "}":
|
||||
return LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
|
||||
case ";":
|
||||
return LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
|
||||
case ",":
|
||||
return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil
|
||||
default:
|
||||
return LexToken{Type: Type_Identifier, Value: token}, nil
|
||||
return &LexToken{Type: Type_Identifier, Value: token}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -159,7 +219,7 @@ func lexer(program string) ([]LexToken, error) {
|
||||
}
|
||||
|
||||
program = rest
|
||||
tokens = append(tokens, lexToken)
|
||||
tokens = append(tokens, *lexToken)
|
||||
}
|
||||
|
||||
return tokens, nil
|
||||
|
11
main.go
11
main.go
@ -16,10 +16,17 @@ func main() {
|
||||
log.Fatalln("Cannot open input file.", err)
|
||||
}
|
||||
|
||||
ast, err := lexer(string(content))
|
||||
tokens, err := lexer(string(content))
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
log.Printf("%+#v\n", ast)
|
||||
log.Printf("Tokens:\n%+#v\n\n", tokens)
|
||||
|
||||
parsed, err := parser(tokens)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
log.Printf("Parsed:\n%+#v\n\n", parsed)
|
||||
}
|
||||
|
261
parser.go
Normal file
261
parser.go
Normal file
@ -0,0 +1,261 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TypeType uint32
|
||||
|
||||
const (
|
||||
Type_Primitive TypeType = iota
|
||||
Type_Void
|
||||
Type_Named
|
||||
Type_Array
|
||||
Type_Tuple
|
||||
)
|
||||
|
||||
type Type struct {
|
||||
Type TypeType
|
||||
Value any
|
||||
}
|
||||
|
||||
type NamedType struct {
|
||||
TypeName string
|
||||
}
|
||||
|
||||
type ArrayType struct {
|
||||
ElementType Type
|
||||
}
|
||||
|
||||
type TupleType struct {
|
||||
Types []Type
|
||||
}
|
||||
|
||||
type StatementType uint32
|
||||
|
||||
const (
|
||||
Statement_Expression StatementType = iota
|
||||
Statement_Block
|
||||
Statement_Return
|
||||
Statement_DeclareLocalVariable
|
||||
)
|
||||
|
||||
type Statement struct {
|
||||
Type StatementType
|
||||
Value any
|
||||
}
|
||||
|
||||
type BlockStatement struct {
|
||||
Block Block
|
||||
}
|
||||
|
||||
type ReturnStatement struct {
|
||||
Value *Expression
|
||||
}
|
||||
|
||||
type DeclareLocalVariableStatement struct {
|
||||
Variable string
|
||||
Initializer Expression
|
||||
}
|
||||
|
||||
type ExpressionType uint32
|
||||
|
||||
const (
|
||||
Expression_Assignment ExpressionType = iota
|
||||
Expression_Literal
|
||||
Expression_VariableReference
|
||||
Expression_Arithmetic
|
||||
)
|
||||
|
||||
type Expression struct {
|
||||
Type ExpressionType
|
||||
Value any
|
||||
}
|
||||
|
||||
type AssignmentExpression struct {
|
||||
Variable string
|
||||
Value Expression
|
||||
}
|
||||
|
||||
type LiteralExpression struct {
|
||||
Type PrimitiveType
|
||||
Value any
|
||||
}
|
||||
|
||||
type VariableReferenceExpression struct {
|
||||
Variable string
|
||||
}
|
||||
|
||||
type ArithmeticOperation uint32
|
||||
|
||||
const (
|
||||
Arithmetic_Add ArithmeticOperation = iota
|
||||
Arithmetic_Sub
|
||||
Arithmetic_Mul
|
||||
Arithmetic_Div
|
||||
Arithmetic_Mod
|
||||
)
|
||||
|
||||
type ArithmeticExpression struct {
|
||||
Operation ArithmeticOperation
|
||||
Left Expression
|
||||
Right Expression
|
||||
}
|
||||
|
||||
type Block struct {
|
||||
Statements []Statement
|
||||
}
|
||||
|
||||
type ParsedFunction struct {
|
||||
Parameters []Type
|
||||
ReturnType Type
|
||||
Body Block
|
||||
}
|
||||
|
||||
type Import struct {
|
||||
Import string
|
||||
}
|
||||
|
||||
type ParsedFile struct {
|
||||
Imports []Import
|
||||
Functions []ParsedFunction
|
||||
}
|
||||
|
||||
func expectSeparator(tokens []LexToken, separators ...Separator) (Separator, []LexToken, error) {
|
||||
var separatorNames []string
|
||||
for _, sep := range separators {
|
||||
separatorNames = append(separatorNames, string(Separators[sep]))
|
||||
}
|
||||
|
||||
if len(tokens) == 0 {
|
||||
return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, " "))
|
||||
}
|
||||
|
||||
separator := tokens[0]
|
||||
if separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) {
|
||||
return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, ", "))
|
||||
}
|
||||
|
||||
return separator.Value.(Separator), tokens[1:], nil
|
||||
}
|
||||
|
||||
func parseImport(tokens []LexToken) (*Import, []LexToken, error) {
|
||||
var err error
|
||||
|
||||
if len(tokens) < 3 {
|
||||
return nil, nil, errors.New("incomplete import")
|
||||
}
|
||||
|
||||
// tokens[0] == import keyword
|
||||
|
||||
identifier := tokens[1]
|
||||
if identifier.Type != Type_Identifier {
|
||||
return nil, nil, errors.New("expected identifier")
|
||||
}
|
||||
|
||||
_, tokens, err = expectSeparator(tokens[2:], Separator_Semicolon)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return &Import{Import: identifier.Value.(string)}, tokens, nil
|
||||
}
|
||||
|
||||
func parseType(tokens []LexToken) (*Type, []LexToken, error) {
|
||||
var err error
|
||||
|
||||
if len(tokens) == 0 {
|
||||
return nil, nil, errors.New("expected type")
|
||||
}
|
||||
|
||||
tok := tokens[0]
|
||||
if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void {
|
||||
return &Type{Type: Type_Void, Value: nil}, tokens[1:], nil
|
||||
}
|
||||
|
||||
if tok.Type == Type_Separator && tok.Value.(Separator) == Separator_OpenParen {
|
||||
// Tuple type
|
||||
var types []Type
|
||||
tokens = tokens[1:]
|
||||
for {
|
||||
var parsedType *Type
|
||||
parsedType, tokens, err = parseType(tokens)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
types = append(types, *parsedType)
|
||||
|
||||
var sep Separator
|
||||
sep, tokens, err = expectSeparator(tokens, Separator_Comma, Separator_CloseParen)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if sep == Separator_CloseParen {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(types) == 0 {
|
||||
return nil, nil, errors.New("empty tuple")
|
||||
}
|
||||
|
||||
return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, tokens, nil
|
||||
}
|
||||
|
||||
if tok.Type == Type_Identifier {
|
||||
return &Type{Type: Type_Named, Value: tok.Value}, tokens[1:], nil
|
||||
}
|
||||
|
||||
return nil, nil, errors.New("expected type")
|
||||
}
|
||||
|
||||
func parseFunction(tokens []LexToken) (*ParsedFunction, []LexToken, error) {
|
||||
var err error
|
||||
|
||||
var parameters []Type
|
||||
var returnType *Type
|
||||
var body Block
|
||||
|
||||
returnType, tokens, err = parseType(tokens)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// TODO: function name, parameters, body
|
||||
|
||||
return &ParsedFunction{Parameters: parameters, ReturnType: *returnType, Body: body}, tokens, nil
|
||||
}
|
||||
|
||||
func parser(tokens []LexToken) (*ParsedFile, error) {
|
||||
var err error
|
||||
|
||||
var functions []ParsedFunction
|
||||
var imports []Import
|
||||
|
||||
for len(tokens) > 0 {
|
||||
if tokens[0].Type == Type_Keyword && tokens[0].Value.(Keyword) == Keyword_Import {
|
||||
var parsedImport *Import
|
||||
parsedImport, tokens, err = parseImport(tokens)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
imports = append(imports, *parsedImport)
|
||||
continue
|
||||
}
|
||||
|
||||
var parsedFunction *ParsedFunction
|
||||
parsedFunction, tokens, err = parseFunction(tokens)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
functions = append(functions, *parsedFunction)
|
||||
}
|
||||
|
||||
return &ParsedFile{Imports: imports, Functions: functions}, nil
|
||||
}
|
76
types.go
Normal file
76
types.go
Normal file
@ -0,0 +1,76 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type Lang_I8 int8
|
||||
type Lang_I16 int16
|
||||
type Lang_I32 int32
|
||||
type Lang_I64 int64
|
||||
|
||||
type Lang_U8 uint8
|
||||
type Lang_U16 uint16
|
||||
type Lang_U32 uint32
|
||||
type Lang_U64 uint64
|
||||
|
||||
var NumberTypeNames = [...]string{"i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64"}
|
||||
|
||||
type PrimitiveType uint32
|
||||
|
||||
const (
|
||||
Primitive_I8 PrimitiveType = iota
|
||||
Primitive_I16
|
||||
Primitive_I32
|
||||
Primitive_I64
|
||||
Primitive_U8
|
||||
Primitive_U16
|
||||
Primitive_U32
|
||||
Primitive_U64
|
||||
Primitive_F32
|
||||
Primitive_F64
|
||||
)
|
||||
|
||||
const InvalidValue = 0xEEEEEE // Magic value
|
||||
|
||||
func isSignedInt(primitiveType PrimitiveType) bool {
|
||||
switch primitiveType {
|
||||
case Primitive_I8, Primitive_I16, Primitive_I32, Primitive_I64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isUnsignedInt(primitiveType PrimitiveType) bool {
|
||||
switch primitiveType {
|
||||
case Primitive_U8, Primitive_U16, Primitive_U32, Primitive_U64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isFloatingPoint(primitiveType PrimitiveType) bool {
|
||||
switch primitiveType {
|
||||
case Primitive_F32, Primitive_F64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func getBits(primitiveType PrimitiveType) int {
|
||||
switch primitiveType {
|
||||
case Primitive_I8, Primitive_U8:
|
||||
return 8
|
||||
case Primitive_I16, Primitive_U16:
|
||||
return 16
|
||||
case Primitive_I32, Primitive_U32, Primitive_F32:
|
||||
return 32
|
||||
case Primitive_I64, Primitive_U64, Primitive_F64:
|
||||
return 64
|
||||
default:
|
||||
panic("Passed an invalid type (" + strconv.FormatUint(uint64(primitiveType), 10) + ") to getBits()")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user