support shstrtab, symtab, elf output, ret instr, use rodata, more mov types
This commit is contained in:
parent
0d05970b5b
commit
8dde5e6de5
379
compile.go
379
compile.go
@ -6,17 +6,22 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type section struct {
|
type section struct {
|
||||||
name string
|
name string
|
||||||
buff bytes.Buffer
|
name_shstrtabOffset int
|
||||||
|
buff bytes.Buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
type symtabEntry struct {
|
type symtabEntry struct {
|
||||||
|
symtabSectionIndex int
|
||||||
|
|
||||||
sectionName string
|
sectionName string
|
||||||
kind string
|
kind string
|
||||||
offset int64
|
offset int64
|
||||||
|
length int64
|
||||||
global bool
|
global bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,12 +29,78 @@ type compiler struct {
|
|||||||
symtab map[string]symtabEntry
|
symtab map[string]symtabEntry
|
||||||
sections []section
|
sections []section
|
||||||
currentSection *section
|
currentSection *section
|
||||||
|
shstrtab *section
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCompiler() *compiler {
|
func NewCompiler() *compiler {
|
||||||
return &compiler{
|
c := &compiler{
|
||||||
symtab: map[string]symtabEntry{}, // starts out empty
|
symtab: map[string]symtabEntry{},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.sections = append(c.sections, section{
|
||||||
|
name: `.shstrtab`, // Mandatory: the table that names sections themselves
|
||||||
|
name_shstrtabOffset: 1,
|
||||||
|
buff: bytes.Buffer{},
|
||||||
|
})
|
||||||
|
c.shstrtab = &c.sections[0]
|
||||||
|
// The first byte in a string table is conventionally expected be \x00, so that you can reference
|
||||||
|
// null strings with it
|
||||||
|
c.shstrtab.buff.WriteByte(0)
|
||||||
|
|
||||||
|
c.shstrtab.buff.WriteString(c.shstrtab.name)
|
||||||
|
c.shstrtab.buff.WriteByte(0)
|
||||||
|
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *compiler) CreateSymbol(name string, class string, offset int64, length int64, global bool) error {
|
||||||
|
|
||||||
|
if _, ok := c.symtab[name]; ok {
|
||||||
|
return fmt.Errorf("Symbol %q already exists", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the .symtab section, or create if it does not exist
|
||||||
|
symtabSec := c.FindOrCreateSection(`.symtab`)
|
||||||
|
|
||||||
|
// New entry index = length / len(entry) = length / 24
|
||||||
|
nextIndex := symtabSec.buff.Len() / 24
|
||||||
|
|
||||||
|
// Add to our fast lookup table
|
||||||
|
c.symtab[name] = symtabEntry{
|
||||||
|
symtabSectionIndex: nextIndex,
|
||||||
|
|
||||||
|
sectionName: c.currentSection.name,
|
||||||
|
kind: class,
|
||||||
|
offset: offset,
|
||||||
|
global: global,
|
||||||
|
length: length,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the section index for the section containing this symbol
|
||||||
|
sectionIndex := -1
|
||||||
|
for i, _ := range c.sections {
|
||||||
|
if c.sections[i].name == c.currentSection.name {
|
||||||
|
sectionIndex = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if sectionIndex == -1 {
|
||||||
|
return fmt.Errorf("Current section missing index")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to the .symtab section
|
||||||
|
// This is required for variable references - after our single ELF .o is
|
||||||
|
// created, linking it with any other .o files will create a combined .text
|
||||||
|
// section where all the offsets have shifted
|
||||||
|
esym := Elf64_Sym{}
|
||||||
|
esym.st_name = 0 // Default: unnamed
|
||||||
|
esym.st_info = STT_SECTION | (STB_LOCAL << 4)
|
||||||
|
esym.st_other = STV_HIDDEN // For this translation unit only
|
||||||
|
esym.st_shndx = uint16(sectionIndex)
|
||||||
|
esym.st_size = uint64(length)
|
||||||
|
|
||||||
|
err := binary.Write(&symtabSec.buff, binary.LittleEndian, &esym)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *compiler) Must(b []byte) {
|
func (c *compiler) Must(b []byte) {
|
||||||
@ -38,7 +109,7 @@ func (c *compiler) Must(b []byte) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
if n != len(b) {
|
if n != len(b) {
|
||||||
panic(io.ErrShortWrite)
|
panic(fmt.Errorf("Must: %w", io.ErrShortWrite))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,6 +119,59 @@ func (c *compiler) MustUint64(val uint64) {
|
|||||||
c.Must(ret)
|
c.Must(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *compiler) FindOrCreateSection(sectionName string) *section {
|
||||||
|
|
||||||
|
if len(sectionName) == 0 || sectionName[0] != '.' {
|
||||||
|
panic("section name should start with leading period")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, sec := range c.sections {
|
||||||
|
if sec.name != sectionName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// found it
|
||||||
|
return &c.sections[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// No section with this name. Create it
|
||||||
|
c.sections = append(c.sections, section{
|
||||||
|
name: sectionName,
|
||||||
|
name_shstrtabOffset: c.shstrtab.buff.Len(),
|
||||||
|
buff: bytes.Buffer{},
|
||||||
|
})
|
||||||
|
|
||||||
|
c.shstrtab.buff.WriteString(sectionName)
|
||||||
|
c.shstrtab.buff.WriteByte(0)
|
||||||
|
|
||||||
|
return &c.sections[len(c.sections)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *compiler) Reloc(symbolName string, mode ElfRelocationType, addOffset int64) error {
|
||||||
|
// Find '.rela.{currentsection}', creating it if it does not exist
|
||||||
|
var relaSec *section = c.FindOrCreateSection(`.rela` + c.currentSection.name)
|
||||||
|
|
||||||
|
// Find target symbol
|
||||||
|
syminfo, ok := c.symtab[symbolName]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("Reference to unknown symbol %q", symbolName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the relocation to the .rela section
|
||||||
|
rr := Elf64_Rela{}
|
||||||
|
rr.r_offset = uint64(c.currentSection.buff.Len())
|
||||||
|
rr.r_info = uint64(syminfo.symtabSectionIndex)<<32 | uint64(mode) // high bits: Index of search symbol in the symtab. low bits: mode type
|
||||||
|
rr.r_addend = addOffset
|
||||||
|
|
||||||
|
err := binary.Write(&relaSec.buff, binary.LittleEndian, &rr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Done
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *compiler) Compile(t Token) error {
|
func (c *compiler) Compile(t Token) error {
|
||||||
if c.currentSection == nil {
|
if c.currentSection == nil {
|
||||||
// The only allowable token outside of a section is to start a new section
|
// The only allowable token outside of a section is to start a new section
|
||||||
@ -58,36 +182,15 @@ func (c *compiler) Compile(t Token) error {
|
|||||||
|
|
||||||
switch tok := t.(type) {
|
switch tok := t.(type) {
|
||||||
case SectionToken:
|
case SectionToken:
|
||||||
|
c.currentSection = c.FindOrCreateSection(tok.SectionName)
|
||||||
// Check if we are resuming an existing section
|
|
||||||
for i, sec := range c.sections {
|
|
||||||
if sec.name == tok.SectionName {
|
|
||||||
// Found it
|
|
||||||
c.currentSection = &c.sections[i]
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// It's a new section
|
|
||||||
c.sections = append(c.sections, section{
|
|
||||||
name: tok.SectionName,
|
|
||||||
buff: bytes.Buffer{},
|
|
||||||
})
|
|
||||||
c.currentSection = &c.sections[len(c.sections)-1]
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case DataVariableInstrToken:
|
case DataVariableInstrToken:
|
||||||
// Stash in symbol table for future backreferences
|
// Stash in symbol table for future backreferences
|
||||||
if _, ok := c.symtab[tok.VarName]; ok {
|
// TODO allow making global symbols?
|
||||||
return fmt.Errorf("variable %q was already defined", tok.VarName)
|
// CreateSymbol does check for duplicate names already
|
||||||
}
|
|
||||||
c.symtab[tok.VarName] = symtabEntry{
|
position := int64(c.currentSection.buff.Len())
|
||||||
sectionName: c.currentSection.name,
|
|
||||||
kind: ".var." + tok.Sizeclass,
|
|
||||||
offset: int64(c.currentSection.buff.Len()),
|
|
||||||
global: false, // TODO allow this?
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate bytes for the symbol
|
// Generate bytes for the symbol
|
||||||
switch tok.Sizeclass {
|
switch tok.Sizeclass {
|
||||||
@ -99,7 +202,6 @@ func (c *compiler) Compile(t Token) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.Must([]byte{byte(val)})
|
c.Must([]byte{byte(val)})
|
||||||
return nil
|
|
||||||
|
|
||||||
case "u64":
|
case "u64":
|
||||||
// 8-byte literal
|
// 8-byte literal
|
||||||
@ -109,36 +211,32 @@ func (c *compiler) Compile(t Token) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.MustUint64(val)
|
c.MustUint64(val)
|
||||||
return nil
|
|
||||||
|
|
||||||
case "sz":
|
case "sz":
|
||||||
// string with null termination
|
// string with null termination
|
||||||
ret := []byte(tok.Value)
|
ret := []byte(tok.Value)
|
||||||
ret = append(ret, 0)
|
ret = append(ret, 0)
|
||||||
c.Must(ret)
|
c.Must(ret)
|
||||||
return nil
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass)
|
return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass)
|
||||||
}
|
}
|
||||||
|
|
||||||
case LabelToken:
|
err := c.CreateSymbol(tok.VarName, ".var."+tok.Sizeclass, int64(c.currentSection.buff.Len()), position-int64(c.currentSection.buff.Len()), false)
|
||||||
if _, ok := c.symtab[tok.LabelName]; ok {
|
if err != nil {
|
||||||
return fmt.Errorf("name %q was already defined", tok.LabelName)
|
return err
|
||||||
}
|
|
||||||
c.symtab[tok.LabelName] = symtabEntry{
|
|
||||||
sectionName: c.currentSection.name,
|
|
||||||
kind: ".label",
|
|
||||||
offset: int64(c.currentSection.buff.Len()),
|
|
||||||
global: tok.IsGlobal,
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
|
case LabelToken:
|
||||||
|
return c.CreateSymbol(tok.LabelName, ".label", int64(c.currentSection.buff.Len()), 0, tok.IsGlobal)
|
||||||
|
|
||||||
case MovInstrToken:
|
case MovInstrToken:
|
||||||
// TODO encode more cases properly
|
// TODO encode more cases properly
|
||||||
if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil {
|
if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil {
|
||||||
|
// mov rxx, imm
|
||||||
// Store immediate in register
|
// Store immediate in register
|
||||||
|
|
||||||
switch tok.Args[0] {
|
switch tok.Args[0] {
|
||||||
case "rax":
|
case "rax":
|
||||||
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
@ -156,35 +254,202 @@ func (c *compiler) Compile(t Token) error {
|
|||||||
c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit
|
c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
c.MustUint64(uint64(literal))
|
c.MustUint64(uint64(literal))
|
||||||
|
|
||||||
|
case "rsi":
|
||||||
|
c.Must([]byte{0x48, 0xbe}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
|
c.MustUint64(uint64(literal))
|
||||||
|
|
||||||
|
case "rdi":
|
||||||
|
c.Must([]byte{0x48, 0xbf}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
|
c.MustUint64(uint64(literal))
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Store immediate in variable?
|
// Store immediate in variable?
|
||||||
panic("not implemented: store immediate in ???? thing")
|
panic("mov rxx,imm pattern: missing case")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
|
||||||
|
} else if strings.HasPrefix(tok.Args[0], `$`) {
|
||||||
|
// mov $var, rxx
|
||||||
|
// Load register's contents into variable
|
||||||
|
// x86_64 can only really do this in a single instruction with 32-bit displacement, not full 64-bit
|
||||||
|
|
||||||
|
switch tok.Args[1] {
|
||||||
|
case "rax":
|
||||||
|
c.Must([]byte{0x48, 0x89, 0x04, 0x25})
|
||||||
|
default:
|
||||||
|
panic("mov $var,rax pattern: missing case")
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if _, ok := c.symtab[tok.Args[1]]; ok {
|
err = c.Reloc(tok.Args[0][1:], R_X86_64_32S, 0) // Declare that this is a 32-bit reloc, not a 64-bit one
|
||||||
// Store variable's contents in register
|
if err != nil {
|
||||||
|
return fmt.Errorf("mov with relocation: %w", err)
|
||||||
|
}
|
||||||
|
c.Must([]byte{0, 0, 0, 0}) // 32-bit
|
||||||
|
return nil
|
||||||
|
|
||||||
} else if _, ok := c.symtab["&"+tok.Args[1]]; ok {
|
} else if strings.HasPrefix(tok.Args[1], `$`) {
|
||||||
// With &; store address of variable in register
|
// mov rxx, $var
|
||||||
|
// With $; load variable contents into register
|
||||||
|
|
||||||
|
switch tok.Args[0] {
|
||||||
|
case "rax":
|
||||||
|
c.Must([]byte{0x48, 0x8b, 0x04, 0x25})
|
||||||
|
case "rdi":
|
||||||
|
c.Must([]byte{0x48, 0x8b, 0x3c, 0x25})
|
||||||
|
default:
|
||||||
|
panic("mov rxx,$var pattern: missing case")
|
||||||
|
}
|
||||||
|
|
||||||
|
err = c.Reloc(tok.Args[1][1:], R_X86_64_32S, 0) // Declare that this is a 32-bit reloc, not a 64-bit one
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("mov with relocation: %w", err)
|
||||||
|
}
|
||||||
|
c.Must([]byte{0, 0, 0, 0}) // 32-bit
|
||||||
|
|
||||||
|
return nil
|
||||||
|
|
||||||
|
} else if strings.HasPrefix(tok.Args[1], `&$`) {
|
||||||
|
// mov rxx, &$var
|
||||||
|
// With &; assign exact address of variable to register
|
||||||
|
// This creates a movabs literal & a relocation entry
|
||||||
|
// It's always 64-bit
|
||||||
|
|
||||||
|
switch tok.Args[0] {
|
||||||
|
case "rax":
|
||||||
|
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
|
case "rsi":
|
||||||
|
c.Must([]byte{0x48, 0xbe}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
|
case "rdi":
|
||||||
|
c.Must([]byte{0x48, 0xbf}) // TODO store in eax with shorter prefix if <32 bit
|
||||||
|
|
||||||
|
default:
|
||||||
|
panic("mov $var,rxx pattern: missing case")
|
||||||
|
}
|
||||||
|
|
||||||
|
err = c.Reloc(tok.Args[1][2:], R_X86_64_64, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("mov with relocation: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
c.MustUint64(0)
|
||||||
|
return nil
|
||||||
|
|
||||||
|
} else if strings.HasPrefix(tok.Args[1], `strlen($`) && strings.HasSuffix(tok.Args[1], `)`) {
|
||||||
|
// mov rxx, strlen($var)
|
||||||
|
// With strlen; if this is an sz symbol, supply its length
|
||||||
|
symname := tok.Args[1][8 : len(tok.Args[1])-1]
|
||||||
|
sym, ok := c.symtab[symname]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("Can't strlen on unknown variable %q", symname)
|
||||||
|
}
|
||||||
|
|
||||||
|
if sym.kind != ".var.sz" {
|
||||||
|
return fmt.Errorf("Can't take the strlen of variable %q with type %q (expected sz)", symname, sym.kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
effective := sym.length
|
||||||
|
return c.Compile(MovInstrToken{Args: []string{tok.Args[0], strconv.Itoa(int(effective))}})
|
||||||
|
|
||||||
|
} else {
|
||||||
|
panic("unknown mov type, sorry")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
panic("unknown mov type, sorry")
|
case SyscallInstrToken:
|
||||||
|
c.Must([]byte{0x0f, 0x05}) // syscall
|
||||||
|
return nil
|
||||||
|
|
||||||
|
case RetInstrToken:
|
||||||
|
c.Must([]byte{0xc3}) // ret
|
||||||
|
return nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("can't compile token of type %#t", t)
|
return fmt.Errorf("can't compile token of type %#t", t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finalize exports the compiled sections into an ELF artefact.
|
||||||
|
// The resulting ELF is not executable directly, but it can be once fully
|
||||||
|
// linked (adding a program header and page alignment)
|
||||||
func (c *compiler) Finalize(dest io.Writer) error {
|
func (c *compiler) Finalize(dest io.Writer) error {
|
||||||
|
|
||||||
const alignment = 4096
|
|
||||||
|
|
||||||
// Write ELF header
|
// Write ELF header
|
||||||
// Write section headers
|
ehdr := Elf64_Ehdr{}
|
||||||
// Write binary content
|
ehdr.e_ident[0] = 0x7f
|
||||||
// Pad out section to page alignment
|
ehdr.e_ident[1] = 'E'
|
||||||
// Done
|
ehdr.e_ident[2] = 'L'
|
||||||
|
ehdr.e_ident[3] = 'F'
|
||||||
|
ehdr.e_ident[4] = 2 // 64-bit format
|
||||||
|
ehdr.e_ident[5] = 1 // little endian
|
||||||
|
ehdr.e_ident[6] = 1 // ELFv1 is the only format
|
||||||
|
ehdr.e_ident[7] = 3 // Linux-compatible ABI
|
||||||
|
|
||||||
|
ehdr.e_type = 0 // ET_NONE
|
||||||
|
ehdr.e_machine = 0x3E // x86_64
|
||||||
|
ehdr.e_version = 1 // ELFv1 again
|
||||||
|
|
||||||
|
ehdr.e_shoff = 64 // The Ehdr is 64 bytes long, sections start immediately following
|
||||||
|
ehdr.e_shentsize = 64 // Each Shdr is also 64 bytes long
|
||||||
|
ehdr.e_shnum = uint16(len(c.sections))
|
||||||
|
ehdr.e_shstrndx = 0 // We always put the .shstrtab as the 0th section
|
||||||
|
|
||||||
|
err := binary.Write(dest, binary.LittleEndian, &ehdr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't declare a program header
|
||||||
|
|
||||||
|
// Write section headers
|
||||||
|
pctr := 64 + (64 * len(c.sections))
|
||||||
|
for _, sec := range c.sections {
|
||||||
|
shdr := Elf64_Shdr{}
|
||||||
|
|
||||||
|
shdr.sh_name = uint32(sec.name_shstrtabOffset)
|
||||||
|
switch sec.name {
|
||||||
|
case ".text":
|
||||||
|
shdr.sh_type = 1 // SHT_PROGBITS, program data
|
||||||
|
shdr.sh_flags = 0x2 | 0x4 | 0x10 // WRITE|ALLOC|MERGE
|
||||||
|
case ".data":
|
||||||
|
shdr.sh_type = 1 // SHT_PROGBITS, program data
|
||||||
|
shdr.sh_flags = 0x2 | 0x10 // WRITE|MERGE
|
||||||
|
case ".symtab":
|
||||||
|
shdr.sh_type = 2 // SHT_SYMTAB
|
||||||
|
shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS
|
||||||
|
case ".shstrtab":
|
||||||
|
shdr.sh_type = 3 // SHT_STRTAB
|
||||||
|
shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS
|
||||||
|
case ".rodata":
|
||||||
|
fallthrough
|
||||||
|
default: // Treat anything unknown as read-only data
|
||||||
|
shdr.sh_type = 1 // SHT_PROGBITS, program data
|
||||||
|
shdr.sh_flags = 0x10 // MERGE
|
||||||
|
}
|
||||||
|
|
||||||
|
shdr.sh_offset = uint64(pctr)
|
||||||
|
shdr.sh_size = uint64(sec.buff.Len())
|
||||||
|
|
||||||
|
pctr += sec.buff.Len()
|
||||||
|
|
||||||
|
err = binary.Write(dest, binary.LittleEndian, &shdr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write binary content
|
||||||
|
for _, sec := range c.sections {
|
||||||
|
expectLen := sec.buff.Len()
|
||||||
|
n, err := sec.buff.WriteTo(dest)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n != int64(expectLen) {
|
||||||
|
return io.ErrShortWrite
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Done
|
||||||
|
return nil
|
||||||
|
|
||||||
panic("TODO")
|
|
||||||
}
|
}
|
||||||
|
83
elf.go
83
elf.go
@ -1,5 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
|
// Elf64_Ehdr is the main ELF header
|
||||||
type Elf64_Ehdr struct {
|
type Elf64_Ehdr struct {
|
||||||
e_ident [16]byte
|
e_ident [16]byte
|
||||||
e_type uint16
|
e_type uint16
|
||||||
@ -17,6 +18,7 @@ type Elf64_Ehdr struct {
|
|||||||
e_shstrndx uint16
|
e_shstrndx uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Elf64_Phdr is the Program Header
|
||||||
type Elf64_Phdr struct {
|
type Elf64_Phdr struct {
|
||||||
p_type uint32
|
p_type uint32
|
||||||
p_flags uint32
|
p_flags uint32
|
||||||
@ -28,6 +30,7 @@ type Elf64_Phdr struct {
|
|||||||
p_align uint64
|
p_align uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Elf64_Shdr is the Section header
|
||||||
type Elf64_Shdr struct {
|
type Elf64_Shdr struct {
|
||||||
sh_name uint32
|
sh_name uint32
|
||||||
sh_type uint32
|
sh_type uint32
|
||||||
@ -40,3 +43,83 @@ type Elf64_Shdr struct {
|
|||||||
sh_addralign uint64
|
sh_addralign uint64
|
||||||
sh_entsize uint64
|
sh_entsize uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
STB_LOCAL = 0
|
||||||
|
STB_GLOBAL = 1
|
||||||
|
STB_WEAK = 2
|
||||||
|
|
||||||
|
STT_NOTYPE = 0
|
||||||
|
STT_OBJECT = 1
|
||||||
|
STT_FUNC = 2
|
||||||
|
STT_SECTION = 3
|
||||||
|
STT_FILE = 4
|
||||||
|
STT_COMMON = 5
|
||||||
|
STT_TLS = 6
|
||||||
|
|
||||||
|
STV_DEFAULT = 0
|
||||||
|
STV_INTERNAL = 1
|
||||||
|
STV_HIDDEN = 2
|
||||||
|
STV_PROTECTED = 3
|
||||||
|
)
|
||||||
|
|
||||||
|
// Elf64_Sym is a symbol
|
||||||
|
type Elf64_Sym struct {
|
||||||
|
st_name uint32
|
||||||
|
st_info byte
|
||||||
|
st_other byte
|
||||||
|
st_shndx uint16
|
||||||
|
st_value uint64
|
||||||
|
st_size uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Elf64_Rela is a relocation with addend
|
||||||
|
type Elf64_Rela struct {
|
||||||
|
r_offset uint64
|
||||||
|
r_info uint64
|
||||||
|
r_addend int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relocation types
|
||||||
|
type ElfRelocationType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
R_X86_64_NONE ElfRelocationType = 0
|
||||||
|
R_X86_64_64 ElfRelocationType = 1
|
||||||
|
R_X86_64_PC32 ElfRelocationType = 2
|
||||||
|
R_X86_64_GOT32 ElfRelocationType = 3
|
||||||
|
R_X86_64_PLT32 ElfRelocationType = 4
|
||||||
|
R_X86_64_COPY ElfRelocationType = 5
|
||||||
|
R_X86_64_GLOB_DAT ElfRelocationType = 6
|
||||||
|
R_X86_64_JUMP_SLOT ElfRelocationType = 7
|
||||||
|
R_X86_64_RELATIVE ElfRelocationType = 8
|
||||||
|
R_X86_64_GOTPCREL ElfRelocationType = 9
|
||||||
|
R_X86_64_32 ElfRelocationType = 10
|
||||||
|
R_X86_64_32S ElfRelocationType = 11
|
||||||
|
R_X86_64_16 ElfRelocationType = 12
|
||||||
|
R_X86_64_PC16 ElfRelocationType = 13
|
||||||
|
R_X86_64_8 ElfRelocationType = 14
|
||||||
|
R_X86_64_PC8 ElfRelocationType = 15
|
||||||
|
R_X86_64_DTPMOD64 ElfRelocationType = 16
|
||||||
|
R_X86_64_DTPOFF64 ElfRelocationType = 17
|
||||||
|
R_X86_64_TPOFF64 ElfRelocationType = 18
|
||||||
|
R_X86_64_TLSGD ElfRelocationType = 19
|
||||||
|
R_X86_64_TLSLD ElfRelocationType = 20
|
||||||
|
R_X86_64_DTPOFF32 ElfRelocationType = 21
|
||||||
|
R_X86_64_GOTTPOFF ElfRelocationType = 22
|
||||||
|
R_X86_64_TPOFF32 ElfRelocationType = 23
|
||||||
|
R_X86_64_PC64 ElfRelocationType = 24
|
||||||
|
R_X86_64_GOTOFF64 ElfRelocationType = 25
|
||||||
|
R_X86_64_GOTPC32 ElfRelocationType = 26
|
||||||
|
R_X86_64_GOT64 ElfRelocationType = 27
|
||||||
|
R_X86_64_GOTPCREL64 ElfRelocationType = 28
|
||||||
|
R_X86_64_GOTPC64 ElfRelocationType = 29
|
||||||
|
R_X86_64_GOTPLT64 ElfRelocationType = 30
|
||||||
|
R_X86_64_PLTOFF64 ElfRelocationType = 31
|
||||||
|
R_X86_64_SIZE32 ElfRelocationType = 32
|
||||||
|
R_X86_64_SIZE64 ElfRelocationType = 33
|
||||||
|
R_X86_64_GOTPC32_TLSDESC ElfRelocationType = 34
|
||||||
|
R_X86_64_TLSDESC_CALL ElfRelocationType = 35
|
||||||
|
R_X86_64_TLSDESC ElfRelocationType = 36
|
||||||
|
R_X86_64_IRELATIVE ElfRelocationType = 37
|
||||||
|
)
|
||||||
|
3
lexer.go
3
lexer.go
@ -77,6 +77,9 @@ func (l *lexer) Next() (Token, error) {
|
|||||||
case "syscall":
|
case "syscall":
|
||||||
return SyscallInstrToken{}, nil
|
return SyscallInstrToken{}, nil
|
||||||
|
|
||||||
|
case "ret":
|
||||||
|
return RetInstrToken{}, nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// If the field ends with `:`, it's a (local) label
|
// If the field ends with `:`, it's a (local) label
|
||||||
if strings.HasSuffix(fields[0], `:`) {
|
if strings.HasSuffix(fields[0], `:`) {
|
||||||
|
8
main.go
8
main.go
@ -27,13 +27,14 @@ func assemble(src io.Reader, dest io.Writer) {
|
|||||||
lx := NewLexer(src)
|
lx := NewLexer(src)
|
||||||
cc := NewCompiler()
|
cc := NewCompiler()
|
||||||
|
|
||||||
|
mainloop:
|
||||||
for {
|
for {
|
||||||
tok, err := lx.Next()
|
tok, err := lx.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, io.EOF) {
|
if errors.Is(err, io.EOF) {
|
||||||
// Reached EOF
|
// Reached EOF
|
||||||
// Terminate compilation
|
// Terminate compilation
|
||||||
panic("Completed OK")
|
break mainloop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Real error
|
// Real error
|
||||||
@ -48,4 +49,9 @@ func assemble(src io.Reader, dest io.Writer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err := cc.Finalize(dest)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
20
main_test.go
20
main_test.go
@ -1,7 +1,8 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io/ioutil"
|
// "io/ioutil"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -10,9 +11,11 @@ func TestCompile(t *testing.T) {
|
|||||||
|
|
||||||
// @ref https://gist.github.com/armicron/e891709ce8893df2fd5fc74c846dcf20
|
// @ref https://gist.github.com/armicron/e891709ce8893df2fd5fc74c846dcf20
|
||||||
const src = `
|
const src = `
|
||||||
section .data
|
section .rodata
|
||||||
$msg = sz "Hello, world\n"
|
$msg = sz "Hello, world\n"
|
||||||
$filename = sz "test.txt"
|
$filename = sz "test.txt"
|
||||||
|
|
||||||
|
section .data
|
||||||
$fd = u64 0
|
$fd = u64 0
|
||||||
|
|
||||||
section .text
|
section .text
|
||||||
@ -25,7 +28,7 @@ global _start: ;tell linker entry point
|
|||||||
syscall
|
syscall
|
||||||
|
|
||||||
mov $fd, rax
|
mov $fd, rax
|
||||||
mov rdx, 13 ;message strlen
|
mov rdx, strlen($msg) ;message strlen
|
||||||
mov rsi, &$msg ;message to write
|
mov rsi, &$msg ;message to write
|
||||||
mov rdi, $fd ;file descriptor
|
mov rdi, $fd ;file descriptor
|
||||||
mov rax, 1 ;system call number (sys_write)
|
mov rax, 1 ;system call number (sys_write)
|
||||||
@ -40,6 +43,15 @@ global _start: ;tell linker entry point
|
|||||||
|
|
||||||
`
|
`
|
||||||
|
|
||||||
assemble(strings.NewReader(src), ioutil.Discard)
|
/*
|
||||||
|
assemble(strings.NewReader(src), ioutil.Discard)
|
||||||
|
*/
|
||||||
|
|
||||||
|
fh, err := os.OpenFile("output.o", os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assemble(strings.NewReader(src), fh) // ioutil.Discard)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
echo "$1" > src.asm
|
echo "$1" > src.asm
|
||||||
nasm -f elf64 src.asm
|
nasm -f elf64 src.asm
|
||||||
|
objdump -x src.o
|
||||||
objdump -D src.o
|
objdump -D src.o
|
||||||
rm src.o
|
rm src.o
|
||||||
rm src.asm
|
rm src.asm
|
||||||
|
2
token.go
2
token.go
@ -21,6 +21,8 @@ type MovInstrToken struct {
|
|||||||
|
|
||||||
type SyscallInstrToken struct{}
|
type SyscallInstrToken struct{}
|
||||||
|
|
||||||
|
type RetInstrToken struct{}
|
||||||
|
|
||||||
type DataVariableInstrToken struct {
|
type DataVariableInstrToken struct {
|
||||||
VarName string
|
VarName string
|
||||||
Sizeclass string // sz, u8, u16, u32, u64
|
Sizeclass string // sz, u8, u16, u32, u64
|
||||||
|
Loading…
Reference in New Issue
Block a user