From c22fa66492b7b6f8d81ef8b0dc4d4de6db973954 Mon Sep 17 00:00:00 2001 From: MrLetsplay Date: Mon, 11 Mar 2024 22:05:36 +0100 Subject: [PATCH] Basic parsing (WIP) --- compiler.go | 129 -------------------- example/helloworld.lang | 6 + lexer.go | 92 +++++++++++--- main.go | 11 +- parser.go | 261 ++++++++++++++++++++++++++++++++++++++++ types.go | 76 ++++++++++++ 6 files changed, 428 insertions(+), 147 deletions(-) delete mode 100644 compiler.go create mode 100644 parser.go create mode 100644 types.go diff --git a/compiler.go b/compiler.go deleted file mode 100644 index 495fd2f..0000000 --- a/compiler.go +++ /dev/null @@ -1,129 +0,0 @@ -package main - -type PrimitiveType uint32 - -const ( - Primitive_I8 PrimitiveType = iota - Primitive_I16 - Primitive_I32 - Primitive_I64 - Primitive_U8 - Primitive_U16 - Primitive_U32 - Primitive_U64 -) - -type TypeType uint32 - -const ( - Type_Primitive TypeType = iota - Type_Named - Type_Array - Type_Tuple -) - -type Type struct { - Type TypeType - Value any -} - -type NamedType struct { - TypeName string -} - -type ArrayType struct { - ElementType Type -} - -type TupleType struct { - Types []Type -} - -type StatementType uint32 - -const ( - Statement_Expression StatementType = iota - Statement_Block - Statement_Return - Statement_DeclareLocalVariable -) - -type Statement struct { - Type StatementType - Value any -} - -type BlockStatement struct { - Block Block -} - -type ReturnStatement struct { - Value *Expression -} - -type DeclareLocalVariableStatement struct { - Variable string - Initializer Expression -} - -type ExpressionType uint32 - -const ( - Expression_Assignment ExpressionType = iota - Expression_Literal - Expression_VariableReference - Expression_Arithmetic -) - -type Expression struct { - Type ExpressionType - Value any -} - -type AssignmentExpression struct { - Variable string - Value Expression -} - -type LiteralExpression struct { - Type PrimitiveType - Value any -} - -type VariableReferenceExpression struct { - Variable string -} - -type ArithmeticOperation uint32 - -const ( - Arithmetic_Add ArithmeticOperation = iota - Arithmetic_Sub - Arithmetic_Mul - Arithmetic_Div - Arithmetic_Mod -) - -type ArithmeticExpression struct { - Operation ArithmeticOperation - Left Expression - Right Expression -} - -type Block struct { - Statements []Statement -} - -type CompiledFunction struct { - Parameters []Type - ReturnValue Type - Body Block -} - -type CompiledFile struct { - Functions []CompiledFunction -} - -func compiler() (*CompiledFile, error) { - return nil, nil -} diff --git a/example/helloworld.lang b/example/helloworld.lang index fb1491b..34cbb87 100644 --- a/example/helloworld.lang +++ b/example/helloworld.lang @@ -1,6 +1,12 @@ import lang.stdlib; +(u8, u8) a(u8 a, u8 b) { + return a, b +} + void main() { u8 a = 1u8; + f32 b = 1.5f32; + f32 c = 1.6; println("Hello World"); } diff --git a/lexer.go b/lexer.go index 97a9d2f..4fc10c5 100644 --- a/lexer.go +++ b/lexer.go @@ -3,11 +3,14 @@ package main import ( "errors" "slices" + "strconv" "strings" + "unicode" ) var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} -var Separators []rune = []rune{'(', ')', '{', '}', ';'} +var Separators []rune = []rune{'(', ')', '{', '}', ';', ','} +var Operators []rune = []rune{'=', '>', '<', '!', '+', '-', '*', '/', '%'} type LexType uint32 @@ -15,8 +18,7 @@ const ( Type_Identifier LexType = iota Type_Keyword Type_Separator - Type_LiteralString - Type_LiteralNumber + Type_Literal ) type Keyword uint32 @@ -34,6 +36,7 @@ const ( Separator_OpenCurly Separator_CloseCurly Separator_Semicolon + Separator_Comma ) type LiteralType uint32 @@ -50,8 +53,9 @@ type LexToken struct { } type Literal struct { - Type LiteralType - Value any + Type LiteralType + Primitive PrimitiveType + Value any } func stringLiteral(runes []rune) (string, []rune, error) { @@ -115,28 +119,84 @@ func nextToken(program string) (string, string, error) { return string(runes[start:end]), string(runes[end:]), nil } -func parseToken(token string) (LexToken, error) { +func parseNumber(raw string, numberType PrimitiveType) (any, error) { + if isSignedInt(numberType) { + return strconv.ParseInt(raw, 10, getBits(numberType)) + } + + if isUnsignedInt(numberType) { + return strconv.ParseUint(raw, 10, getBits(numberType)) + } + + if isFloatingPoint(numberType) { + return strconv.ParseFloat(raw, getBits(numberType)) + } + + panic("Unhandled type (" + strconv.FormatUint(uint64(numberType), 10) + ") in parseNumber()") +} + +func parseToken(token string) (*LexToken, error) { if strings.HasPrefix(token, "\"") { - return LexToken{Type: Type_LiteralString, Value: token[1 : len(token)-1]}, nil + return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_String, Primitive: InvalidValue, Value: token[1 : len(token)-1]}}, nil + } + + runes := []rune(token) + startsWithMinus := runes[0] == '-' + if startsWithMinus || unicode.IsDigit([]rune(token)[0]) { + // TODO: hexadecimal/binary/octal constants + + var numberType PrimitiveType = InvalidValue + var rawNumber string = token + for i, name := range NumberTypeNames { + if strings.HasSuffix(token, name) { + numberType = PrimitiveType(i) + rawNumber = token[:len(token)-len(name)] + } + } + + containsDot := slices.Contains(runes, '.') + + if numberType == InvalidValue { + if containsDot { + numberType = Primitive_F64 + } else if startsWithMinus { + numberType = Primitive_I64 + } else { + numberType = Primitive_U64 + } + } + + if containsDot && !isFloatingPoint(numberType) { + return nil, errors.New("dot in non floating-point constant") + } + + number, err := parseNumber(rawNumber, numberType) + if err != nil { + return nil, err + } + + return &LexToken{Type: Type_Literal, Value: Literal{Type: Literal_Number, Primitive: numberType, Value: number}}, nil } switch token { case "void": - return LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil + return &LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil case "import": - return LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil + return &LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil case "(": - return LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil + return &LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil case ")": - return LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil + return &LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil case "{": - return LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil + return &LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil case "}": - return LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil + return &LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil case ";": - return LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil + return &LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil + case ",": + return &LexToken{Type: Type_Separator, Value: Separator_Comma}, nil default: - return LexToken{Type: Type_Identifier, Value: token}, nil + return &LexToken{Type: Type_Identifier, Value: token}, nil } } @@ -159,7 +219,7 @@ func lexer(program string) ([]LexToken, error) { } program = rest - tokens = append(tokens, lexToken) + tokens = append(tokens, *lexToken) } return tokens, nil diff --git a/main.go b/main.go index fe0056f..bb394cf 100644 --- a/main.go +++ b/main.go @@ -16,10 +16,17 @@ func main() { log.Fatalln("Cannot open input file.", err) } - ast, err := lexer(string(content)) + tokens, err := lexer(string(content)) if err != nil { log.Fatalln(err) } - log.Printf("%+#v\n", ast) + log.Printf("Tokens:\n%+#v\n\n", tokens) + + parsed, err := parser(tokens) + if err != nil { + log.Fatalln(err) + } + + log.Printf("Parsed:\n%+#v\n\n", parsed) } diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..53fc986 --- /dev/null +++ b/parser.go @@ -0,0 +1,261 @@ +package main + +import ( + "errors" + "slices" + "strings" +) + +type TypeType uint32 + +const ( + Type_Primitive TypeType = iota + Type_Void + Type_Named + Type_Array + Type_Tuple +) + +type Type struct { + Type TypeType + Value any +} + +type NamedType struct { + TypeName string +} + +type ArrayType struct { + ElementType Type +} + +type TupleType struct { + Types []Type +} + +type StatementType uint32 + +const ( + Statement_Expression StatementType = iota + Statement_Block + Statement_Return + Statement_DeclareLocalVariable +) + +type Statement struct { + Type StatementType + Value any +} + +type BlockStatement struct { + Block Block +} + +type ReturnStatement struct { + Value *Expression +} + +type DeclareLocalVariableStatement struct { + Variable string + Initializer Expression +} + +type ExpressionType uint32 + +const ( + Expression_Assignment ExpressionType = iota + Expression_Literal + Expression_VariableReference + Expression_Arithmetic +) + +type Expression struct { + Type ExpressionType + Value any +} + +type AssignmentExpression struct { + Variable string + Value Expression +} + +type LiteralExpression struct { + Type PrimitiveType + Value any +} + +type VariableReferenceExpression struct { + Variable string +} + +type ArithmeticOperation uint32 + +const ( + Arithmetic_Add ArithmeticOperation = iota + Arithmetic_Sub + Arithmetic_Mul + Arithmetic_Div + Arithmetic_Mod +) + +type ArithmeticExpression struct { + Operation ArithmeticOperation + Left Expression + Right Expression +} + +type Block struct { + Statements []Statement +} + +type ParsedFunction struct { + Parameters []Type + ReturnType Type + Body Block +} + +type Import struct { + Import string +} + +type ParsedFile struct { + Imports []Import + Functions []ParsedFunction +} + +func expectSeparator(tokens []LexToken, separators ...Separator) (Separator, []LexToken, error) { + var separatorNames []string + for _, sep := range separators { + separatorNames = append(separatorNames, string(Separators[sep])) + } + + if len(tokens) == 0 { + return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, " ")) + } + + separator := tokens[0] + if separator.Type != Type_Separator || !slices.Contains(separators, separator.Value.(Separator)) { + return InvalidValue, nil, errors.New("expected one of " + strings.Join(separatorNames, ", ")) + } + + return separator.Value.(Separator), tokens[1:], nil +} + +func parseImport(tokens []LexToken) (*Import, []LexToken, error) { + var err error + + if len(tokens) < 3 { + return nil, nil, errors.New("incomplete import") + } + + // tokens[0] == import keyword + + identifier := tokens[1] + if identifier.Type != Type_Identifier { + return nil, nil, errors.New("expected identifier") + } + + _, tokens, err = expectSeparator(tokens[2:], Separator_Semicolon) + if err != nil { + return nil, nil, err + } + + return &Import{Import: identifier.Value.(string)}, tokens, nil +} + +func parseType(tokens []LexToken) (*Type, []LexToken, error) { + var err error + + if len(tokens) == 0 { + return nil, nil, errors.New("expected type") + } + + tok := tokens[0] + if tok.Type == Type_Keyword && tok.Value.(Keyword) == Keyword_Void { + return &Type{Type: Type_Void, Value: nil}, tokens[1:], nil + } + + if tok.Type == Type_Separator && tok.Value.(Separator) == Separator_OpenParen { + // Tuple type + var types []Type + tokens = tokens[1:] + for { + var parsedType *Type + parsedType, tokens, err = parseType(tokens) + if err != nil { + return nil, nil, err + } + + types = append(types, *parsedType) + + var sep Separator + sep, tokens, err = expectSeparator(tokens, Separator_Comma, Separator_CloseParen) + if err != nil { + return nil, nil, err + } + + if sep == Separator_CloseParen { + break + } + } + + if len(types) == 0 { + return nil, nil, errors.New("empty tuple") + } + + return &Type{Type: Type_Tuple, Value: TupleType{Types: types}}, tokens, nil + } + + if tok.Type == Type_Identifier { + return &Type{Type: Type_Named, Value: tok.Value}, tokens[1:], nil + } + + return nil, nil, errors.New("expected type") +} + +func parseFunction(tokens []LexToken) (*ParsedFunction, []LexToken, error) { + var err error + + var parameters []Type + var returnType *Type + var body Block + + returnType, tokens, err = parseType(tokens) + if err != nil { + return nil, nil, err + } + + // TODO: function name, parameters, body + + return &ParsedFunction{Parameters: parameters, ReturnType: *returnType, Body: body}, tokens, nil +} + +func parser(tokens []LexToken) (*ParsedFile, error) { + var err error + + var functions []ParsedFunction + var imports []Import + + for len(tokens) > 0 { + if tokens[0].Type == Type_Keyword && tokens[0].Value.(Keyword) == Keyword_Import { + var parsedImport *Import + parsedImport, tokens, err = parseImport(tokens) + if err != nil { + return nil, err + } + + imports = append(imports, *parsedImport) + continue + } + + var parsedFunction *ParsedFunction + parsedFunction, tokens, err = parseFunction(tokens) + if err != nil { + return nil, err + } + + functions = append(functions, *parsedFunction) + } + + return &ParsedFile{Imports: imports, Functions: functions}, nil +} diff --git a/types.go b/types.go new file mode 100644 index 0000000..1be8d7c --- /dev/null +++ b/types.go @@ -0,0 +1,76 @@ +package main + +import ( + "strconv" +) + +type Lang_I8 int8 +type Lang_I16 int16 +type Lang_I32 int32 +type Lang_I64 int64 + +type Lang_U8 uint8 +type Lang_U16 uint16 +type Lang_U32 uint32 +type Lang_U64 uint64 + +var NumberTypeNames = [...]string{"i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64"} + +type PrimitiveType uint32 + +const ( + Primitive_I8 PrimitiveType = iota + Primitive_I16 + Primitive_I32 + Primitive_I64 + Primitive_U8 + Primitive_U16 + Primitive_U32 + Primitive_U64 + Primitive_F32 + Primitive_F64 +) + +const InvalidValue = 0xEEEEEE // Magic value + +func isSignedInt(primitiveType PrimitiveType) bool { + switch primitiveType { + case Primitive_I8, Primitive_I16, Primitive_I32, Primitive_I64: + return true + default: + return false + } +} + +func isUnsignedInt(primitiveType PrimitiveType) bool { + switch primitiveType { + case Primitive_U8, Primitive_U16, Primitive_U32, Primitive_U64: + return true + default: + return false + } +} + +func isFloatingPoint(primitiveType PrimitiveType) bool { + switch primitiveType { + case Primitive_F32, Primitive_F64: + return true + default: + return false + } +} + +func getBits(primitiveType PrimitiveType) int { + switch primitiveType { + case Primitive_I8, Primitive_U8: + return 8 + case Primitive_I16, Primitive_U16: + return 16 + case Primitive_I32, Primitive_U32, Primitive_F32: + return 32 + case Primitive_I64, Primitive_U64, Primitive_F64: + return 64 + default: + panic("Passed an invalid type (" + strconv.FormatUint(uint64(primitiveType), 10) + ") to getBits()") + } +}