initial commit

This commit is contained in:
mappu 2023-12-09 16:12:45 +13:00
commit 0d05970b5b
9 changed files with 472 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
pxasme

190
compile.go Normal file
View File

@ -0,0 +1,190 @@
package main
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"strconv"
)
type section struct {
name string
buff bytes.Buffer
}
type symtabEntry struct {
sectionName string
kind string
offset int64
global bool
}
type compiler struct {
symtab map[string]symtabEntry
sections []section
currentSection *section
}
func NewCompiler() *compiler {
return &compiler{
symtab: map[string]symtabEntry{}, // starts out empty
}
}
func (c *compiler) Must(b []byte) {
n, err := c.currentSection.buff.Write(b)
if err != nil {
panic(err)
}
if n != len(b) {
panic(io.ErrShortWrite)
}
}
func (c *compiler) MustUint64(val uint64) {
ret := make([]byte, 8)
binary.LittleEndian.PutUint64(ret, val)
c.Must(ret)
}
func (c *compiler) Compile(t Token) error {
if c.currentSection == nil {
// The only allowable token outside of a section is to start a new section
if _, ok := t.(SectionToken); !ok {
return fmt.Errorf("Need to start with a section token, got %#t", t)
}
}
switch tok := t.(type) {
case SectionToken:
// Check if we are resuming an existing section
for i, sec := range c.sections {
if sec.name == tok.SectionName {
// Found it
c.currentSection = &c.sections[i]
return nil
}
}
// It's a new section
c.sections = append(c.sections, section{
name: tok.SectionName,
buff: bytes.Buffer{},
})
c.currentSection = &c.sections[len(c.sections)-1]
return nil
case DataVariableInstrToken:
// Stash in symbol table for future backreferences
if _, ok := c.symtab[tok.VarName]; ok {
return fmt.Errorf("variable %q was already defined", tok.VarName)
}
c.symtab[tok.VarName] = symtabEntry{
sectionName: c.currentSection.name,
kind: ".var." + tok.Sizeclass,
offset: int64(c.currentSection.buff.Len()),
global: false, // TODO allow this?
}
// Generate bytes for the symbol
switch tok.Sizeclass {
case "u8":
// 1 byte literal
val, err := strconv.ParseUint(tok.Value, 10, 8)
if err != nil {
return err
}
c.Must([]byte{byte(val)})
return nil
case "u64":
// 8-byte literal
val, err := strconv.ParseUint(tok.Value, 10, 64)
if err != nil {
return err
}
c.MustUint64(val)
return nil
case "sz":
// string with null termination
ret := []byte(tok.Value)
ret = append(ret, 0)
c.Must(ret)
return nil
default:
return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass)
}
case LabelToken:
if _, ok := c.symtab[tok.LabelName]; ok {
return fmt.Errorf("name %q was already defined", tok.LabelName)
}
c.symtab[tok.LabelName] = symtabEntry{
sectionName: c.currentSection.name,
kind: ".label",
offset: int64(c.currentSection.buff.Len()),
global: tok.IsGlobal,
}
return nil
case MovInstrToken:
// TODO encode more cases properly
if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil {
// Store immediate in register
switch tok.Args[0] {
case "rax":
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
c.MustUint64(uint64(literal))
case "rbx":
c.Must([]byte{0x48, 0xbb}) // TODO store in eax with shorter prefix if <32 bit
c.MustUint64(uint64(literal))
case "rcx":
c.Must([]byte{0x48, 0xb9}) // TODO store in eax with shorter prefix if <32 bit
c.MustUint64(uint64(literal))
case "rdx":
c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit
c.MustUint64(uint64(literal))
default:
// Store immediate in variable?
panic("not implemented: store immediate in ???? thing")
}
} else if _, ok := c.symtab[tok.Args[1]]; ok {
// Store variable's contents in register
} else if _, ok := c.symtab["&"+tok.Args[1]]; ok {
// With &; store address of variable in register
}
panic("unknown mov type, sorry")
default:
return fmt.Errorf("can't compile token of type %#t", t)
}
}
func (c *compiler) Finalize(dest io.Writer) error {
const alignment = 4096
// Write ELF header
// Write section headers
// Write binary content
// Pad out section to page alignment
// Done
panic("TODO")
}

42
elf.go Normal file
View File

@ -0,0 +1,42 @@
package main
type Elf64_Ehdr struct {
e_ident [16]byte
e_type uint16
e_machine uint16
e_version uint32
e_entry uint64
e_phoff uint64
e_shoff uint64
e_flags uint32
e_ehsize uint16
e_phentsize uint16
e_phnum uint16
e_shentsize uint16
e_shnum uint16
e_shstrndx uint16
}
type Elf64_Phdr struct {
p_type uint32
p_flags uint32
p_offset uint64
p_vaddr uint64
p_paddr uint64
p_filesz uint64
p_memsz uint64
p_align uint64
}
type Elf64_Shdr struct {
sh_name uint32
sh_type uint32
sh_flags uint64
sh_addr uint64
sh_offset uint64
sh_size uint64
sh_link uint32
sh_info uint32
sh_addralign uint64
sh_entsize uint64
}

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module pxasme
go 1.19

104
lexer.go Normal file
View File

@ -0,0 +1,104 @@
package main
import (
"bufio"
"fmt"
"io"
"strings"
)
type lexer struct {
r *bufio.Reader
lineno int
peek Token
}
func NewLexer(src io.Reader) *lexer {
return &lexer{
r: bufio.NewReader(src),
lineno: 0,
}
}
func (l *lexer) Peek() (Token, error) {
tok, err := l.Next()
if err != nil {
return nil, err
}
l.peek = tok
return tok, nil
}
func (l *lexer) Next() (Token, error) {
if l.peek != nil {
ret := l.peek
l.peek = nil
return ret, nil
}
//
l.lineno++
line, err := l.r.ReadString('\n')
if err != nil {
return nil, err
}
// Strip leading spaces
line = strings.TrimLeft(line, " \t\r\n")
// Strip trailing line-comments (;)
line, _, _ = strings.Cut(line, `;`)
if len(line) == 0 {
// This line only contained comments
// Continue to the next line
return l.Next()
}
fields := strings.Fields(line)
// FIXME commas!?
switch strings.ToLower(fields[0]) {
case "section":
return SectionToken{fields[1]}, nil
case "global":
return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil
case "mov":
for i, _ := range fields {
fields[i] = strings.TrimRight(fields[i], `,`)
}
return MovInstrToken{fields[1:]}, nil
case "syscall":
return SyscallInstrToken{}, nil
default:
// If the field ends with `:`, it's a (local) label
if strings.HasSuffix(fields[0], `:`) {
return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil
}
// If the field starts with `$`, it's a "variable"
if strings.HasPrefix(fields[0], `$`) {
// 1: =
if fields[1] != `=` {
return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno)
}
// 2: sizeclass
// 3+++: literal initializer
return DataVariableInstrToken{
VarName: fields[0][1:],
Sizeclass: fields[2],
Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined
}, nil
}
}
return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno)
}

51
main.go Normal file
View File

@ -0,0 +1,51 @@
package main
import (
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
)
//
func main() {
src := flag.String("Src", "", "Input file")
flag.Parse()
fh, err := os.OpenFile(*src, os.O_RDONLY, 0644)
if err != nil {
panic(err)
}
assemble(fh, ioutil.Discard)
}
func assemble(src io.Reader, dest io.Writer) {
lx := NewLexer(src)
cc := NewCompiler()
for {
tok, err := lx.Next()
if err != nil {
if errors.Is(err, io.EOF) {
// Reached EOF
// Terminate compilation
panic("Completed OK")
}
// Real error
panic(err)
}
fmt.Printf("[line %d] %#v\n", lx.lineno, tok)
err = cc.Compile(tok)
if err != nil {
panic(err)
}
}
}

45
main_test.go Normal file
View File

@ -0,0 +1,45 @@
package main
import (
"io/ioutil"
"strings"
"testing"
)
func TestCompile(t *testing.T) {
// @ref https://gist.github.com/armicron/e891709ce8893df2fd5fc74c846dcf20
const src = `
section .data
$msg = sz "Hello, world\n"
$filename = sz "test.txt"
$fd = u64 0
section .text
global _start: ;tell linker entry point
mov rdi, &$filename
mov rsi, 66 ;O_CREAT = 0102o (man open)
mov rdx, 438 ;umode_t = 0666 octal
mov rax, 2
syscall
mov $fd, rax
mov rdx, 13 ;message strlen
mov rsi, &$msg ;message to write
mov rdi, $fd ;file descriptor
mov rax, 1 ;system call number (sys_write)
syscall ;call kernel
mov rdi, $fd
mov rax, 3 ;sys_close
syscall
mov rax, 60 ;system call number (sys_exit)
syscall ;call kernel
`
assemble(strings.NewReader(src), ioutil.Discard)
}

8
test_assemble.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
# Usage: ./test_assemble.sh $'section .text\n mov rdx, 13370000000\n'
echo "$1" > src.asm
nasm -f elf64 src.asm
objdump -D src.o
rm src.o
rm src.asm

28
token.go Normal file
View File

@ -0,0 +1,28 @@
package main
type Token interface{}
type SectionToken struct {
SectionName string
}
type GlobalToken struct {
IdentifierName string
}
type LabelToken struct {
LabelName string
IsGlobal bool
}
type MovInstrToken struct {
Args []string
}
type SyscallInstrToken struct{}
type DataVariableInstrToken struct {
VarName string
Sizeclass string // sz, u8, u16, u32, u64
Value string
}