Basic parsing (WIP)

This commit is contained in:
MrLetsplay 2024-03-11 22:05:36 +01:00
parent 1ce2505427
commit c22fa66492
Signed by: mr
SSH Key Fingerprint: SHA256:92jBH80vpXyaZHjaIl47pjRq+Yt7XGTArqQg1V7hSqg
6 changed files with 428 additions and 147 deletions

View File

@ -1,129 +0,0 @@
package main
type PrimitiveType uint32
const (
Primitive_I8 PrimitiveType = iota
Primitive_I16
Primitive_I32
Primitive_I64
Primitive_U8
Primitive_U16
Primitive_U32
Primitive_U64
)
type TypeType uint32
const (
Type_Primitive TypeType = iota
Type_Named
Type_Array
Type_Tuple
)
type Type struct {
Type TypeType
Value any
}
type NamedType struct {
TypeName string
}
type ArrayType struct {
ElementType Type
}
type TupleType struct {
Types []Type
}
type StatementType uint32
const (
Statement_Expression StatementType = iota
Statement_Block
Statement_Return
Statement_DeclareLocalVariable
)
type Statement struct {
Type StatementType
Value any
}
type BlockStatement struct {
Block Block
}
type ReturnStatement struct {
Value *Expression
}
type DeclareLocalVariableStatement struct {
Variable string
Initializer Expression
}
type ExpressionType uint32
const (
Expression_Assignment ExpressionType = iota
Expression_Literal
Expression_VariableReference
Expression_Arithmetic
)
type Expression struct {
Type ExpressionType
Value any
}
type AssignmentExpression struct {
Variable string
Value Expression
}
type LiteralExpression struct {
Type PrimitiveType
Value any
}
type VariableReferenceExpression struct {
Variable string
}
type ArithmeticOperation uint32
const (
Arithmetic_Add ArithmeticOperation = iota
Arithmetic_Sub
Arithmetic_Mul
Arithmetic_Div
Arithmetic_Mod
)
type ArithmeticExpression struct {
Operation ArithmeticOperation
Left Expression
Right Expression
}
type Block struct {
Statements []Statement
}
type CompiledFunction struct {
Parameters []Type
ReturnValue Type
Body Block
}
type CompiledFile struct {
Functions []CompiledFunction
}
func compiler() (*CompiledFile, error) {
return nil, nil
}

View File

@ -1,6 +1,12 @@
import lang.stdlib; import lang.stdlib;
(u8, u8) a(u8 a, u8 b) {
return a, b
}
void main() { void main() {
u8 a = 1u8; u8 a = 1u8;
f32 b = 1.5f32;
f32 c = 1.6;
println("Hello World"); println("Hello World");
} }

View File

@ -3,11 +3,14 @@ package main
import ( import (
"errors" "errors"
"slices" "slices"
"strconv"
"strings" "strings"
"unicode"
) )
var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} var Whitespace []rune = []rune{' ', '\t', '\r', '\n'}
var Separators []rune = []rune{'(', ')', '{', '}', ';'} var Separators []rune = []rune{'(', ')', '{', '}', ';', ','}
var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'}
type LexType uint32 type LexType uint32
@ -15,8 +18,7 @@ const (
Type_Identifier LexType = iota Type_Identifier LexType = iota
Type_Keyword Type_Keyword
Type_Separator Type_Separator
Type_LiteralString Type_Literal
Type_LiteralNumber
) )
type Keyword uint32 type Keyword uint32
@ -34,6 +36,7 @@ const (
Separator_OpenCurly Separator_OpenCurly
Separator_CloseCurly Separator_CloseCurly
Separator_Semicolon Separator_Semicolon
Separator_Comma
) )
type LiteralType uint32 type LiteralType uint32
@ -50,8 +53,9 @@ type LexToken struct {
} }
type Literal struct { type Literal struct {
Type LiteralType Type LiteralType
Value any Primitive PrimitiveType
Value any
} }
func stringLiteral(runes []rune) (string, []rune, error) { func stringLiteral(runes []rune) (string, []rune, error) {
@ -115,28 +119,84 @@ func nextToken(program string) (string, string, error) {
return string(runes[start:end]), string(runes[end:]), nil return string(runes[start:end]), string(runes[end:]), nil
} }
func parseToken(token string) (LexToken, error) { func parseNumber(raw string, numberType PrimitiveType) (any, error) {
if isSignedInt(numberType) {
return strconv.ParseInt(raw, 10, getBits(numberType))
}
if isUnsignedInt(numberType) {
return strconv.ParseUint(raw, 10, getBits(numberType))
}
if isFloatingPoint(numberType) {
return strconv.ParseFloat(raw, getBits(numberType))
}
panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()")
}
func parseToken(token string) (*LexToken, error) {
if strings.HasPrefix(token, "\"") { if strings.HasPrefix(token, "\"") {
return LexToken{Type: Type_LiteralString, Value: token[1 : len(token)-1]}, nil return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil
}
runes := []rune(token)
startsWithMinus := runes[0] == '-'
if startsWithMinus || unicode.IsDigit([]rune(token)[0]) {
// TODO: hexadecimal/binary/octal constants
var numberType PrimitiveType = InvalidValue
var rawNumber string = token
for i, name := range NumberTypeNames {
if strings.HasSuffix(token, name) {
numberType = PrimitiveType(i)
rawNumber = token[:len(token)-len(name)]
}
}
containsDot := slices.Contains(runes, '.')
if numberType == InvalidValue {
if containsDot {
numberType = Primitive_F64
} else if startsWithMinus {
numberType = Primitive_I64
} else {
numberType = Primitive_U64
}
}
if containsDot && !isFloatingPoint(numberType) {
return nil, errors.New("dot in non floating-point constant")
}
number, err := parseNumber(rawNumber, numberType)
if err != nil {
return nil, err
}
return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil
} }
switch token { switch token {
case "void": case "void":
return LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil
case "import": case "import":
return LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil
case "(": case "(":
return LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil
case ")": case ")":
return LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil
case "{": case "{":
return LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil
case "}": case "}":
return LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil
case ";": case ";":
return LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil
case ",":
return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil
default: default:
return LexToken{Type: Type_Identifier, Value: token}, nil return &LexToken{Type: Type_Identifier, Value: token}, nil
} }
} }
@ -159,7 +219,7 @@ func lexer(program string) ([]LexToken, error) {
} }
program = rest program = rest
tokens = append(tokens, lexToken) tokens = append(tokens, *lexToken)
} }
return tokens, nil return tokens, nil

11
main.go
View File

@ -16,10 +16,17 @@ func main() {
log.Fatalln("Cannot open input file.", err) log.Fatalln("Cannot open input file.", err)
} }
ast, err := lexer(string(content)) tokens, err := lexer(string(content))
if err != nil { if err != nil {
log.Fatalln(err) log.Fatalln(err)
} }
log.Printf("%+#v\n", ast) log.Printf("Tokens:\n%+#v\n\n", tokens)
parsed, err := parser(tokens)
if err != nil {
log.Fatalln(err)
}
log.Printf("Parsed:\n%+#v\n\n", parsed)
} }

261
parser.go Normal file
View File

@ -0,0 +1,261 @@
package main
import (
"errors"
"slices"
"strings"
)
type TypeType uint32
const (
Type_Primitive TypeType = iota
Type_Void
Type_Named
Type_Array
Type_Tuple
)
type Type struct {
Type TypeType
Value any
}
type NamedType struct {
TypeName string
}
type ArrayType struct {
ElementType Type
}
type TupleType struct {
Types []Type
}
type StatementType uint32
const (
Statement_Expression StatementType = iota
Statement_Block
Statement_Return
Statement_DeclareLocalVariable
)
type Statement struct {
Type StatementType
Value any
}
type BlockStatement struct {
Block Block
}
type ReturnStatement struct {
Value *Expression
}
type DeclareLocalVariableStatement struct {
Variable string
Initializer Expression
}
type ExpressionType uint32
const (
Expression_Assignment ExpressionType = iota
Expression_Literal
Expression_VariableReference
Expression_Arithmetic
)
type Expression struct {
Type ExpressionType
Value any
}
type AssignmentExpression struct {
Variable string
Value Expression
}
type LiteralExpression struct {
Type PrimitiveType
Value any
}
type VariableReferenceExpression struct {
Variable string
}
type ArithmeticOperation uint32
const (
Arithmetic_Add ArithmeticOperation = iota
Arithmetic_Sub
Arithmetic_Mul
Arithmetic_Div
Arithmetic_Mod
)
type ArithmeticExpression struct {
Operation ArithmeticOperation
Left Expression
Right Expression
}
type Block struct {
Statements []Statement
}
type ParsedFunction struct {
Parameters []Type
ReturnType Type
Body Block
}
type Import struct {
Import string
}
type ParsedFile struct {
Imports []Import
Functions []ParsedFunction
}
func expectSeparator(tokens []LexToken, separators ...Separator) (Separator, []LexToken, error) {
var separatorNames []string
for _, sep := range separators {
separatorNames = append(separatorNames, string(Separators[sep]))
}
if len(tokens) == 0 {
return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, " "))
}
separator := tokens[0]
if separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) {
return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, ", "))
}
return separator.Value.(Separator), tokens[1:], nil
}
func parseImport(tokens []LexToken) (*Import, []LexToken, error) {
var err error
if len(tokens) < 3 {
return nil, nil, errors.New("incomplete import")
}
// tokens[0] == import keyword
identifier := tokens[1]
if identifier.Type != Type_Identifier {
return nil, nil, errors.New("expected identifier")
}
_, tokens, err = expectSeparator(tokens[2:], Separator_Semicolon)
if err != nil {
return nil, nil, err
}
return &Import{Import: identifier.Value.(string)}, tokens, nil
}
func parseType(tokens []LexToken) (*Type, []LexToken, error) {
var err error
if len(tokens) == 0 {
return nil, nil, errors.New("expected type")
}
tok := tokens[0]
if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void {
return &Type{Type: Type_Void, Value: nil}, tokens[1:], nil
}
if tok.Type == Type_Separator && tok.Value.(Separator) == Separator_OpenParen {
// Tuple type
var types []Type
tokens = tokens[1:]
for {
var parsedType *Type
parsedType, tokens, err = parseType(tokens)
if err != nil {
return nil, nil, err
}
types = append(types, *parsedType)
var sep Separator
sep, tokens, err = expectSeparator(tokens, Separator_Comma, Separator_CloseParen)
if err != nil {
return nil, nil, err
}
if sep == Separator_CloseParen {
break
}
}
if len(types) == 0 {
return nil, nil, errors.New("empty tuple")
}
return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, tokens, nil
}
if tok.Type == Type_Identifier {
return &Type{Type: Type_Named, Value: tok.Value}, tokens[1:], nil
}
return nil, nil, errors.New("expected type")
}
func parseFunction(tokens []LexToken) (*ParsedFunction, []LexToken, error) {
var err error
var parameters []Type
var returnType *Type
var body Block
returnType, tokens, err = parseType(tokens)
if err != nil {
return nil, nil, err
}
// TODO: function name, parameters, body
return &ParsedFunction{Parameters: parameters, ReturnType: *returnType, Body: body}, tokens, nil
}
func parser(tokens []LexToken) (*ParsedFile, error) {
var err error
var functions []ParsedFunction
var imports []Import
for len(tokens) > 0 {
if tokens[0].Type == Type_Keyword && tokens[0].Value.(Keyword) == Keyword_Import {
var parsedImport *Import
parsedImport, tokens, err = parseImport(tokens)
if err != nil {
return nil, err
}
imports = append(imports, *parsedImport)
continue
}
var parsedFunction *ParsedFunction
parsedFunction, tokens, err = parseFunction(tokens)
if err != nil {
return nil, err
}
functions = append(functions, *parsedFunction)
}
return &ParsedFile{Imports: imports, Functions: functions}, nil
}

76
types.go Normal file
View File

@ -0,0 +1,76 @@
package main
import (
"strconv"
)
type Lang_I8 int8
type Lang_I16 int16
type Lang_I32 int32
type Lang_I64 int64
type Lang_U8 uint8
type Lang_U16 uint16
type Lang_U32 uint32
type Lang_U64 uint64
var NumberTypeNames = [...]string{"i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64"}
type PrimitiveType uint32
const (
Primitive_I8 PrimitiveType = iota
Primitive_I16
Primitive_I32
Primitive_I64
Primitive_U8
Primitive_U16
Primitive_U32
Primitive_U64
Primitive_F32
Primitive_F64
)
const InvalidValue = 0xEEEEEE // Magic value
func isSignedInt(primitiveType PrimitiveType) bool {
switch primitiveType {
case Primitive_I8, Primitive_I16, Primitive_I32, Primitive_I64:
return true
default:
return false
}
}
func isUnsignedInt(primitiveType PrimitiveType) bool {
switch primitiveType {
case Primitive_U8, Primitive_U16, Primitive_U32, Primitive_U64:
return true
default:
return false
}
}
func isFloatingPoint(primitiveType PrimitiveType) bool {
switch primitiveType {
case Primitive_F32, Primitive_F64:
return true
default:
return false
}
}
func getBits(primitiveType PrimitiveType) int {
switch primitiveType {
case Primitive_I8, Primitive_U8:
return 8
case Primitive_I16, Primitive_U16:
return 16
case Primitive_I32, Primitive_U32, Primitive_F32:
return 32
case Primitive_I64, Primitive_U64, Primitive_F64:
return 64
default:
panic("Passed an invalid type (" + strconv.FormatUint(uint64(primitiveType), 10) + ") to getBits()")
}
}