initial commit
This commit is contained in:
commit
0d05970b5b
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
pxasme
|
190
compile.go
Normal file
190
compile.go
Normal file
@ -0,0 +1,190 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type section struct {
|
||||
name string
|
||||
buff bytes.Buffer
|
||||
}
|
||||
|
||||
type symtabEntry struct {
|
||||
sectionName string
|
||||
kind string
|
||||
offset int64
|
||||
global bool
|
||||
}
|
||||
|
||||
type compiler struct {
|
||||
symtab map[string]symtabEntry
|
||||
sections []section
|
||||
currentSection *section
|
||||
}
|
||||
|
||||
func NewCompiler() *compiler {
|
||||
return &compiler{
|
||||
symtab: map[string]symtabEntry{}, // starts out empty
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) Must(b []byte) {
|
||||
n, err := c.currentSection.buff.Write(b)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if n != len(b) {
|
||||
panic(io.ErrShortWrite)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) MustUint64(val uint64) {
|
||||
ret := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(ret, val)
|
||||
c.Must(ret)
|
||||
}
|
||||
|
||||
func (c *compiler) Compile(t Token) error {
|
||||
if c.currentSection == nil {
|
||||
// The only allowable token outside of a section is to start a new section
|
||||
if _, ok := t.(SectionToken); !ok {
|
||||
return fmt.Errorf("Need to start with a section token, got %#t", t)
|
||||
}
|
||||
}
|
||||
|
||||
switch tok := t.(type) {
|
||||
case SectionToken:
|
||||
|
||||
// Check if we are resuming an existing section
|
||||
for i, sec := range c.sections {
|
||||
if sec.name == tok.SectionName {
|
||||
// Found it
|
||||
c.currentSection = &c.sections[i]
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// It's a new section
|
||||
c.sections = append(c.sections, section{
|
||||
name: tok.SectionName,
|
||||
buff: bytes.Buffer{},
|
||||
})
|
||||
c.currentSection = &c.sections[len(c.sections)-1]
|
||||
|
||||
return nil
|
||||
|
||||
case DataVariableInstrToken:
|
||||
// Stash in symbol table for future backreferences
|
||||
if _, ok := c.symtab[tok.VarName]; ok {
|
||||
return fmt.Errorf("variable %q was already defined", tok.VarName)
|
||||
}
|
||||
c.symtab[tok.VarName] = symtabEntry{
|
||||
sectionName: c.currentSection.name,
|
||||
kind: ".var." + tok.Sizeclass,
|
||||
offset: int64(c.currentSection.buff.Len()),
|
||||
global: false, // TODO allow this?
|
||||
}
|
||||
|
||||
// Generate bytes for the symbol
|
||||
switch tok.Sizeclass {
|
||||
case "u8":
|
||||
// 1 byte literal
|
||||
val, err := strconv.ParseUint(tok.Value, 10, 8)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Must([]byte{byte(val)})
|
||||
return nil
|
||||
|
||||
case "u64":
|
||||
// 8-byte literal
|
||||
val, err := strconv.ParseUint(tok.Value, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.MustUint64(val)
|
||||
return nil
|
||||
|
||||
case "sz":
|
||||
// string with null termination
|
||||
ret := []byte(tok.Value)
|
||||
ret = append(ret, 0)
|
||||
c.Must(ret)
|
||||
return nil
|
||||
|
||||
default:
|
||||
return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass)
|
||||
}
|
||||
|
||||
case LabelToken:
|
||||
if _, ok := c.symtab[tok.LabelName]; ok {
|
||||
return fmt.Errorf("name %q was already defined", tok.LabelName)
|
||||
}
|
||||
c.symtab[tok.LabelName] = symtabEntry{
|
||||
sectionName: c.currentSection.name,
|
||||
kind: ".label",
|
||||
offset: int64(c.currentSection.buff.Len()),
|
||||
global: tok.IsGlobal,
|
||||
}
|
||||
return nil
|
||||
|
||||
case MovInstrToken:
|
||||
// TODO encode more cases properly
|
||||
if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil {
|
||||
|
||||
// Store immediate in register
|
||||
switch tok.Args[0] {
|
||||
case "rax":
|
||||
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
||||
c.MustUint64(uint64(literal))
|
||||
|
||||
case "rbx":
|
||||
c.Must([]byte{0x48, 0xbb}) // TODO store in eax with shorter prefix if <32 bit
|
||||
c.MustUint64(uint64(literal))
|
||||
|
||||
case "rcx":
|
||||
c.Must([]byte{0x48, 0xb9}) // TODO store in eax with shorter prefix if <32 bit
|
||||
c.MustUint64(uint64(literal))
|
||||
|
||||
case "rdx":
|
||||
c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit
|
||||
c.MustUint64(uint64(literal))
|
||||
|
||||
default:
|
||||
// Store immediate in variable?
|
||||
panic("not implemented: store immediate in ???? thing")
|
||||
}
|
||||
|
||||
} else if _, ok := c.symtab[tok.Args[1]]; ok {
|
||||
// Store variable's contents in register
|
||||
|
||||
} else if _, ok := c.symtab["&"+tok.Args[1]]; ok {
|
||||
// With &; store address of variable in register
|
||||
|
||||
}
|
||||
|
||||
panic("unknown mov type, sorry")
|
||||
|
||||
default:
|
||||
return fmt.Errorf("can't compile token of type %#t", t)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) Finalize(dest io.Writer) error {
|
||||
|
||||
const alignment = 4096
|
||||
|
||||
// Write ELF header
|
||||
// Write section headers
|
||||
// Write binary content
|
||||
// Pad out section to page alignment
|
||||
// Done
|
||||
|
||||
panic("TODO")
|
||||
}
|
42
elf.go
Normal file
42
elf.go
Normal file
@ -0,0 +1,42 @@
|
||||
package main
|
||||
|
||||
type Elf64_Ehdr struct {
|
||||
e_ident [16]byte
|
||||
e_type uint16
|
||||
e_machine uint16
|
||||
e_version uint32
|
||||
e_entry uint64
|
||||
e_phoff uint64
|
||||
e_shoff uint64
|
||||
e_flags uint32
|
||||
e_ehsize uint16
|
||||
e_phentsize uint16
|
||||
e_phnum uint16
|
||||
e_shentsize uint16
|
||||
e_shnum uint16
|
||||
e_shstrndx uint16
|
||||
}
|
||||
|
||||
type Elf64_Phdr struct {
|
||||
p_type uint32
|
||||
p_flags uint32
|
||||
p_offset uint64
|
||||
p_vaddr uint64
|
||||
p_paddr uint64
|
||||
p_filesz uint64
|
||||
p_memsz uint64
|
||||
p_align uint64
|
||||
}
|
||||
|
||||
type Elf64_Shdr struct {
|
||||
sh_name uint32
|
||||
sh_type uint32
|
||||
sh_flags uint64
|
||||
sh_addr uint64
|
||||
sh_offset uint64
|
||||
sh_size uint64
|
||||
sh_link uint32
|
||||
sh_info uint32
|
||||
sh_addralign uint64
|
||||
sh_entsize uint64
|
||||
}
|
104
lexer.go
Normal file
104
lexer.go
Normal file
@ -0,0 +1,104 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type lexer struct {
|
||||
r *bufio.Reader
|
||||
lineno int
|
||||
|
||||
peek Token
|
||||
}
|
||||
|
||||
func NewLexer(src io.Reader) *lexer {
|
||||
return &lexer{
|
||||
r: bufio.NewReader(src),
|
||||
lineno: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *lexer) Peek() (Token, error) {
|
||||
tok, err := l.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
l.peek = tok
|
||||
return tok, nil
|
||||
}
|
||||
|
||||
func (l *lexer) Next() (Token, error) {
|
||||
if l.peek != nil {
|
||||
ret := l.peek
|
||||
l.peek = nil
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
l.lineno++
|
||||
line, err := l.r.ReadString('\n')
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Strip leading spaces
|
||||
line = strings.TrimLeft(line, " \t\r\n")
|
||||
|
||||
// Strip trailing line-comments (;)
|
||||
line, _, _ = strings.Cut(line, `;`)
|
||||
|
||||
if len(line) == 0 {
|
||||
// This line only contained comments
|
||||
// Continue to the next line
|
||||
return l.Next()
|
||||
}
|
||||
|
||||
fields := strings.Fields(line)
|
||||
// FIXME commas!?
|
||||
|
||||
switch strings.ToLower(fields[0]) {
|
||||
case "section":
|
||||
return SectionToken{fields[1]}, nil
|
||||
|
||||
case "global":
|
||||
return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil
|
||||
|
||||
case "mov":
|
||||
for i, _ := range fields {
|
||||
fields[i] = strings.TrimRight(fields[i], `,`)
|
||||
}
|
||||
return MovInstrToken{fields[1:]}, nil
|
||||
|
||||
case "syscall":
|
||||
return SyscallInstrToken{}, nil
|
||||
|
||||
default:
|
||||
// If the field ends with `:`, it's a (local) label
|
||||
if strings.HasSuffix(fields[0], `:`) {
|
||||
return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil
|
||||
}
|
||||
|
||||
// If the field starts with `$`, it's a "variable"
|
||||
if strings.HasPrefix(fields[0], `$`) {
|
||||
// 1: =
|
||||
if fields[1] != `=` {
|
||||
return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno)
|
||||
}
|
||||
|
||||
// 2: sizeclass
|
||||
// 3+++: literal initializer
|
||||
return DataVariableInstrToken{
|
||||
VarName: fields[0][1:],
|
||||
Sizeclass: fields[2],
|
||||
Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno)
|
||||
}
|
51
main.go
Normal file
51
main.go
Normal file
@ -0,0 +1,51 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
)
|
||||
|
||||
//
|
||||
|
||||
func main() {
|
||||
src := flag.String("Src", "", "Input file")
|
||||
flag.Parse()
|
||||
|
||||
fh, err := os.OpenFile(*src, os.O_RDONLY, 0644)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
assemble(fh, ioutil.Discard)
|
||||
}
|
||||
|
||||
func assemble(src io.Reader, dest io.Writer) {
|
||||
lx := NewLexer(src)
|
||||
cc := NewCompiler()
|
||||
|
||||
for {
|
||||
tok, err := lx.Next()
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Reached EOF
|
||||
// Terminate compilation
|
||||
panic("Completed OK")
|
||||
}
|
||||
|
||||
// Real error
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fmt.Printf("[line %d] %#v\n", lx.lineno, tok)
|
||||
|
||||
err = cc.Compile(tok)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
45
main_test.go
Normal file
45
main_test.go
Normal file
@ -0,0 +1,45 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCompile(t *testing.T) {
|
||||
|
||||
// @ref https://gist.github.com/armicron/e891709ce8893df2fd5fc74c846dcf20
|
||||
const src = `
|
||||
section .data
|
||||
$msg = sz "Hello, world\n"
|
||||
$filename = sz "test.txt"
|
||||
$fd = u64 0
|
||||
|
||||
section .text
|
||||
|
||||
global _start: ;tell linker entry point
|
||||
mov rdi, &$filename
|
||||
mov rsi, 66 ;O_CREAT = 0102o (man open)
|
||||
mov rdx, 438 ;umode_t = 0666 octal
|
||||
mov rax, 2
|
||||
syscall
|
||||
|
||||
mov $fd, rax
|
||||
mov rdx, 13 ;message strlen
|
||||
mov rsi, &$msg ;message to write
|
||||
mov rdi, $fd ;file descriptor
|
||||
mov rax, 1 ;system call number (sys_write)
|
||||
syscall ;call kernel
|
||||
|
||||
mov rdi, $fd
|
||||
mov rax, 3 ;sys_close
|
||||
syscall
|
||||
|
||||
mov rax, 60 ;system call number (sys_exit)
|
||||
syscall ;call kernel
|
||||
|
||||
`
|
||||
|
||||
assemble(strings.NewReader(src), ioutil.Discard)
|
||||
|
||||
}
|
8
test_assemble.sh
Executable file
8
test_assemble.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# Usage: ./test_assemble.sh $'section .text\n mov rdx, 13370000000\n'
|
||||
|
||||
echo "$1" > src.asm
|
||||
nasm -f elf64 src.asm
|
||||
objdump -D src.o
|
||||
rm src.o
|
||||
rm src.asm
|
28
token.go
Normal file
28
token.go
Normal file
@ -0,0 +1,28 @@
|
||||
package main
|
||||
|
||||
type Token interface{}
|
||||
|
||||
type SectionToken struct {
|
||||
SectionName string
|
||||
}
|
||||
|
||||
type GlobalToken struct {
|
||||
IdentifierName string
|
||||
}
|
||||
|
||||
type LabelToken struct {
|
||||
LabelName string
|
||||
IsGlobal bool
|
||||
}
|
||||
|
||||
type MovInstrToken struct {
|
||||
Args []string
|
||||
}
|
||||
|
||||
type SyscallInstrToken struct{}
|
||||
|
||||
type DataVariableInstrToken struct {
|
||||
VarName string
|
||||
Sizeclass string // sz, u8, u16, u32, u64
|
||||
Value string
|
||||
}
|
Loading…
Reference in New Issue
Block a user