From 0d05970b5b6d99986d068df4bcdcac2f08c3d891 Mon Sep 17 00:00:00 2001 From: mappu Date: Sat, 9 Dec 2023 16:12:45 +1300 Subject: [PATCH] initial commit --- .gitignore | 1 + compile.go | 190 +++++++++++++++++++++++++++++++++++++++++++++++ elf.go | 42 +++++++++++ go.mod | 3 + lexer.go | 104 ++++++++++++++++++++++++++ main.go | 51 +++++++++++++ main_test.go | 45 +++++++++++ test_assemble.sh | 8 ++ token.go | 28 +++++++ 9 files changed, 472 insertions(+) create mode 100644 .gitignore create mode 100644 compile.go create mode 100644 elf.go create mode 100644 go.mod create mode 100644 lexer.go create mode 100644 main.go create mode 100644 main_test.go create mode 100755 test_assemble.sh create mode 100644 token.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d65227a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +pxasme diff --git a/compile.go b/compile.go new file mode 100644 index 0000000..1f150a8 --- /dev/null +++ b/compile.go @@ -0,0 +1,190 @@ +package main + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strconv" +) + +type section struct { + name string + buff bytes.Buffer +} + +type symtabEntry struct { + sectionName string + kind string + offset int64 + global bool +} + +type compiler struct { + symtab map[string]symtabEntry + sections []section + currentSection *section +} + +func NewCompiler() *compiler { + return &compiler{ + symtab: map[string]symtabEntry{}, // starts out empty + } +} + +func (c *compiler) Must(b []byte) { + n, err := c.currentSection.buff.Write(b) + if err != nil { + panic(err) + } + if n != len(b) { + panic(io.ErrShortWrite) + } +} + +func (c *compiler) MustUint64(val uint64) { + ret := make([]byte, 8) + binary.LittleEndian.PutUint64(ret, val) + c.Must(ret) +} + +func (c *compiler) Compile(t Token) error { + if c.currentSection == nil { + // The only allowable token outside of a section is to start a new section + if _, ok := t.(SectionToken); !ok { + return fmt.Errorf("Need to start with a section token, got %#t", t) + } + } + + switch tok := t.(type) { + case SectionToken: + + // Check if we are resuming an existing section + for i, sec := range c.sections { + if sec.name == tok.SectionName { + // Found it + c.currentSection = &c.sections[i] + return nil + } + } + + // It's a new section + c.sections = append(c.sections, section{ + name: tok.SectionName, + buff: bytes.Buffer{}, + }) + c.currentSection = &c.sections[len(c.sections)-1] + + return nil + + case DataVariableInstrToken: + // Stash in symbol table for future backreferences + if _, ok := c.symtab[tok.VarName]; ok { + return fmt.Errorf("variable %q was already defined", tok.VarName) + } + c.symtab[tok.VarName] = symtabEntry{ + sectionName: c.currentSection.name, + kind: ".var." + tok.Sizeclass, + offset: int64(c.currentSection.buff.Len()), + global: false, // TODO allow this? + } + + // Generate bytes for the symbol + switch tok.Sizeclass { + case "u8": + // 1 byte literal + val, err := strconv.ParseUint(tok.Value, 10, 8) + if err != nil { + return err + } + + c.Must([]byte{byte(val)}) + return nil + + case "u64": + // 8-byte literal + val, err := strconv.ParseUint(tok.Value, 10, 64) + if err != nil { + return err + } + + c.MustUint64(val) + return nil + + case "sz": + // string with null termination + ret := []byte(tok.Value) + ret = append(ret, 0) + c.Must(ret) + return nil + + default: + return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass) + } + + case LabelToken: + if _, ok := c.symtab[tok.LabelName]; ok { + return fmt.Errorf("name %q was already defined", tok.LabelName) + } + c.symtab[tok.LabelName] = symtabEntry{ + sectionName: c.currentSection.name, + kind: ".label", + offset: int64(c.currentSection.buff.Len()), + global: tok.IsGlobal, + } + return nil + + case MovInstrToken: + // TODO encode more cases properly + if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil { + + // Store immediate in register + switch tok.Args[0] { + case "rax": + c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit + c.MustUint64(uint64(literal)) + + case "rbx": + c.Must([]byte{0x48, 0xbb}) // TODO store in eax with shorter prefix if <32 bit + c.MustUint64(uint64(literal)) + + case "rcx": + c.Must([]byte{0x48, 0xb9}) // TODO store in eax with shorter prefix if <32 bit + c.MustUint64(uint64(literal)) + + case "rdx": + c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit + c.MustUint64(uint64(literal)) + + default: + // Store immediate in variable? + panic("not implemented: store immediate in ???? thing") + } + + } else if _, ok := c.symtab[tok.Args[1]]; ok { + // Store variable's contents in register + + } else if _, ok := c.symtab["&"+tok.Args[1]]; ok { + // With &; store address of variable in register + + } + + panic("unknown mov type, sorry") + + default: + return fmt.Errorf("can't compile token of type %#t", t) + } +} + +func (c *compiler) Finalize(dest io.Writer) error { + + const alignment = 4096 + + // Write ELF header + // Write section headers + // Write binary content + // Pad out section to page alignment + // Done + + panic("TODO") +} diff --git a/elf.go b/elf.go new file mode 100644 index 0000000..48a82ba --- /dev/null +++ b/elf.go @@ -0,0 +1,42 @@ +package main + +type Elf64_Ehdr struct { + e_ident [16]byte + e_type uint16 + e_machine uint16 + e_version uint32 + e_entry uint64 + e_phoff uint64 + e_shoff uint64 + e_flags uint32 + e_ehsize uint16 + e_phentsize uint16 + e_phnum uint16 + e_shentsize uint16 + e_shnum uint16 + e_shstrndx uint16 +} + +type Elf64_Phdr struct { + p_type uint32 + p_flags uint32 + p_offset uint64 + p_vaddr uint64 + p_paddr uint64 + p_filesz uint64 + p_memsz uint64 + p_align uint64 +} + +type Elf64_Shdr struct { + sh_name uint32 + sh_type uint32 + sh_flags uint64 + sh_addr uint64 + sh_offset uint64 + sh_size uint64 + sh_link uint32 + sh_info uint32 + sh_addralign uint64 + sh_entsize uint64 +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..8cf6f08 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module pxasme + +go 1.19 diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..49127df --- /dev/null +++ b/lexer.go @@ -0,0 +1,104 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "strings" +) + +type lexer struct { + r *bufio.Reader + lineno int + + peek Token +} + +func NewLexer(src io.Reader) *lexer { + return &lexer{ + r: bufio.NewReader(src), + lineno: 0, + } +} + +func (l *lexer) Peek() (Token, error) { + tok, err := l.Next() + if err != nil { + return nil, err + } + + l.peek = tok + return tok, nil +} + +func (l *lexer) Next() (Token, error) { + if l.peek != nil { + ret := l.peek + l.peek = nil + return ret, nil + } + + // + + l.lineno++ + line, err := l.r.ReadString('\n') + if err != nil { + return nil, err + } + + // Strip leading spaces + line = strings.TrimLeft(line, " \t\r\n") + + // Strip trailing line-comments (;) + line, _, _ = strings.Cut(line, `;`) + + if len(line) == 0 { + // This line only contained comments + // Continue to the next line + return l.Next() + } + + fields := strings.Fields(line) + // FIXME commas!? + + switch strings.ToLower(fields[0]) { + case "section": + return SectionToken{fields[1]}, nil + + case "global": + return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil + + case "mov": + for i, _ := range fields { + fields[i] = strings.TrimRight(fields[i], `,`) + } + return MovInstrToken{fields[1:]}, nil + + case "syscall": + return SyscallInstrToken{}, nil + + default: + // If the field ends with `:`, it's a (local) label + if strings.HasSuffix(fields[0], `:`) { + return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil + } + + // If the field starts with `$`, it's a "variable" + if strings.HasPrefix(fields[0], `$`) { + // 1: = + if fields[1] != `=` { + return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno) + } + + // 2: sizeclass + // 3+++: literal initializer + return DataVariableInstrToken{ + VarName: fields[0][1:], + Sizeclass: fields[2], + Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined + }, nil + } + } + + return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno) +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..9c13f45 --- /dev/null +++ b/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "io/ioutil" + "os" +) + +// + +func main() { + src := flag.String("Src", "", "Input file") + flag.Parse() + + fh, err := os.OpenFile(*src, os.O_RDONLY, 0644) + if err != nil { + panic(err) + } + + assemble(fh, ioutil.Discard) +} + +func assemble(src io.Reader, dest io.Writer) { + lx := NewLexer(src) + cc := NewCompiler() + + for { + tok, err := lx.Next() + if err != nil { + if errors.Is(err, io.EOF) { + // Reached EOF + // Terminate compilation + panic("Completed OK") + } + + // Real error + panic(err) + } + + fmt.Printf("[line %d] %#v\n", lx.lineno, tok) + + err = cc.Compile(tok) + if err != nil { + panic(err) + } + + } +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..ca3733e --- /dev/null +++ b/main_test.go @@ -0,0 +1,45 @@ +package main + +import ( + "io/ioutil" + "strings" + "testing" +) + +func TestCompile(t *testing.T) { + + // @ref https://gist.github.com/armicron/e891709ce8893df2fd5fc74c846dcf20 + const src = ` +section .data + $msg = sz "Hello, world\n" + $filename = sz "test.txt" + $fd = u64 0 + +section .text + +global _start: ;tell linker entry point + mov rdi, &$filename + mov rsi, 66 ;O_CREAT = 0102o (man open) + mov rdx, 438 ;umode_t = 0666 octal + mov rax, 2 + syscall + + mov $fd, rax + mov rdx, 13 ;message strlen + mov rsi, &$msg ;message to write + mov rdi, $fd ;file descriptor + mov rax, 1 ;system call number (sys_write) + syscall ;call kernel + + mov rdi, $fd + mov rax, 3 ;sys_close + syscall + + mov rax, 60 ;system call number (sys_exit) + syscall ;call kernel + +` + + assemble(strings.NewReader(src), ioutil.Discard) + +} diff --git a/test_assemble.sh b/test_assemble.sh new file mode 100755 index 0000000..806736c --- /dev/null +++ b/test_assemble.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Usage: ./test_assemble.sh $'section .text\n mov rdx, 13370000000\n' + +echo "$1" > src.asm +nasm -f elf64 src.asm +objdump -D src.o +rm src.o +rm src.asm diff --git a/token.go b/token.go new file mode 100644 index 0000000..6ca0217 --- /dev/null +++ b/token.go @@ -0,0 +1,28 @@ +package main + +type Token interface{} + +type SectionToken struct { + SectionName string +} + +type GlobalToken struct { + IdentifierName string +} + +type LabelToken struct { + LabelName string + IsGlobal bool +} + +type MovInstrToken struct { + Args []string +} + +type SyscallInstrToken struct{} + +type DataVariableInstrToken struct { + VarName string + Sizeclass string // sz, u8, u16, u32, u64 + Value string +}