From 1ce2505427d6bb9813b56f75e06a41b5aaa2347a Mon Sep 17 00:00:00 2001 From: MrLetsplay Date: Sun, 10 Mar 2024 22:48:57 +0100 Subject: [PATCH] initial commit --- .gitignore | 1 + compiler.go | 129 +++++++++++++++++++++++++++++++ example/helloworld.lang | 6 ++ go.mod | 3 + lexer.go | 166 ++++++++++++++++++++++++++++++++++++++++ main.go | 25 ++++++ 6 files changed, 330 insertions(+) create mode 100644 .gitignore create mode 100644 compiler.go create mode 100644 example/helloworld.lang create mode 100644 go.mod create mode 100644 lexer.go create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86a7c8e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +compiler diff --git a/compiler.go b/compiler.go new file mode 100644 index 0000000..495fd2f --- /dev/null +++ b/compiler.go @@ -0,0 +1,129 @@ +package main + +type PrimitiveType uint32 + +const ( + Primitive_I8 PrimitiveType = iota + Primitive_I16 + Primitive_I32 + Primitive_I64 + Primitive_U8 + Primitive_U16 + Primitive_U32 + Primitive_U64 +) + +type TypeType uint32 + +const ( + Type_Primitive TypeType = iota + Type_Named + Type_Array + Type_Tuple +) + +type Type struct { + Type TypeType + Value any +} + +type NamedType struct { + TypeName string +} + +type ArrayType struct { + ElementType Type +} + +type TupleType struct { + Types []Type +} + +type StatementType uint32 + +const ( + Statement_Expression StatementType = iota + Statement_Block + Statement_Return + Statement_DeclareLocalVariable +) + +type Statement struct { + Type StatementType + Value any +} + +type BlockStatement struct { + Block Block +} + +type ReturnStatement struct { + Value *Expression +} + +type DeclareLocalVariableStatement struct { + Variable string + Initializer Expression +} + +type ExpressionType uint32 + +const ( + Expression_Assignment ExpressionType = iota + Expression_Literal + Expression_VariableReference + Expression_Arithmetic +) + +type Expression struct { + Type ExpressionType + Value any +} + +type AssignmentExpression struct { + Variable string + Value Expression +} + +type LiteralExpression struct { + Type PrimitiveType + Value any +} + +type VariableReferenceExpression struct { + Variable string +} + +type ArithmeticOperation uint32 + +const ( + Arithmetic_Add ArithmeticOperation = iota + Arithmetic_Sub + Arithmetic_Mul + Arithmetic_Div + Arithmetic_Mod +) + +type ArithmeticExpression struct { + Operation ArithmeticOperation + Left Expression + Right Expression +} + +type Block struct { + Statements []Statement +} + +type CompiledFunction struct { + Parameters []Type + ReturnValue Type + Body Block +} + +type CompiledFile struct { + Functions []CompiledFunction +} + +func compiler() (*CompiledFile, error) { + return nil, nil +} diff --git a/example/helloworld.lang b/example/helloworld.lang new file mode 100644 index 0000000..fb1491b --- /dev/null +++ b/example/helloworld.lang @@ -0,0 +1,6 @@ +import lang.stdlib; + +void main() { + u8 a = 1u8; + println("Hello World"); +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..7e2ac80 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module cringe-studios.com/compiler + +go 1.21.7 diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..97a9d2f --- /dev/null +++ b/lexer.go @@ -0,0 +1,166 @@ +package main + +import ( + "errors" + "slices" + "strings" +) + +var Whitespace []rune = []rune{' ', '\t', '\r', '\n'} +var Separators []rune = []rune{'(', ')', '{', '}', ';'} + +type LexType uint32 + +const ( + Type_Identifier LexType = iota + Type_Keyword + Type_Separator + Type_LiteralString + Type_LiteralNumber +) + +type Keyword uint32 + +const ( + Keyword_Import Keyword = iota + Keyword_Void +) + +type Separator uint32 + +const ( + Separator_OpenParen Separator = iota + Separator_CloseParen + Separator_OpenCurly + Separator_CloseCurly + Separator_Semicolon +) + +type LiteralType uint32 + +const ( + Literal_String LiteralType = iota + Literal_Number +) + +type LexToken struct { + Type LexType + Position uint64 + Value any +} + +type Literal struct { + Type LiteralType + Value any +} + +func stringLiteral(runes []rune) (string, []rune, error) { + idx := 1 // Always starts with " + literal := "" + for idx < len(runes) && runes[idx] != '"' { + if runes[idx] == '\\' { + if idx == len(runes)-1 { + return "", nil, errors.New("unmatched escape sequence") + } + + // TODO \n, \r, \uXXXX, ... escape sequences + + idx++ + } + + literal += string(runes[idx]) + idx++ + } + + if idx == len(runes) { + return "", nil, errors.New("unclosed string literal") + } + + idx++ + return literal, runes[idx:], nil +} + +// source -> token, remaining source, error +func nextToken(program string) (string, string, error) { + // Skip whitespace + start := 0 + runes := []rune(program) + for start < len(runes) && slices.Contains(Whitespace, runes[start]) { + start++ + } + + if start == len(runes) { + return "", "", nil + } + + if runes[start] == '"' { + // String literal + literal, remaining, err := stringLiteral(runes[start:]) + if err != nil { + return "", "", err + } + + return "\"" + literal + "\"", string(remaining), nil + } + + end := start + for end < len(runes) && !slices.Contains(Whitespace, runes[end]) && !slices.Contains(Separators, runes[end]) { + end++ + } + + if start == end { + end++ + } + + return string(runes[start:end]), string(runes[end:]), nil +} + +func parseToken(token string) (LexToken, error) { + if strings.HasPrefix(token, "\"") { + return LexToken{Type: Type_LiteralString, Value: token[1 : len(token)-1]}, nil + } + + switch token { + case "void": + return LexToken{Type: Type_Keyword, Value: Keyword_Void}, nil + case "import": + return LexToken{Type: Type_Keyword, Value: Keyword_Import}, nil + case "(": + return LexToken{Type: Type_Separator, Value: Separator_OpenParen}, nil + case ")": + return LexToken{Type: Type_Separator, Value: Separator_CloseParen}, nil + case "{": + return LexToken{Type: Type_Separator, Value: Separator_OpenCurly}, nil + case "}": + return LexToken{Type: Type_Separator, Value: Separator_CloseCurly}, nil + case ";": + return LexToken{Type: Type_Separator, Value: Separator_Semicolon}, nil + default: + return LexToken{Type: Type_Identifier, Value: token}, nil + } +} + +func lexer(program string) ([]LexToken, error) { + var tokens []LexToken + + for len(program) > 0 { + token, rest, err := nextToken(program) + if err != nil { + return nil, err + } + + if len(token) == 0 { + break + } + + lexToken, err := parseToken(token) + if err != nil { + return nil, err + } + + program = rest + tokens = append(tokens, lexToken) + } + + return tokens, nil +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..fe0056f --- /dev/null +++ b/main.go @@ -0,0 +1,25 @@ +// The compiler +package main + +import ( + "log" + "os" +) + +func main() { + if len(os.Args) != 2 { + log.Fatalln("Usage: " + os.Args[0] + " ") + } + + content, err := os.ReadFile(os.Args[1]) + if err != nil { + log.Fatalln("Cannot open input file.", err) + } + + ast, err := lexer(string(content)) + if err != nil { + log.Fatalln(err) + } + + log.Printf("%+#v\n", ast) +}