626 lines
17 KiB
Go
626 lines
17 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
type section struct {
|
|
name string
|
|
name_shstrtabOffset int
|
|
buff *bytes.Buffer
|
|
}
|
|
|
|
type symtabEntry struct {
|
|
// The index of this symbol within the whole symtab
|
|
symtabSectionIndex int
|
|
name_shstrtabOffset int
|
|
|
|
sectionName string
|
|
kind string
|
|
offset int64
|
|
length int64
|
|
global bool
|
|
}
|
|
|
|
type compiler struct {
|
|
symtab map[string]symtabEntry
|
|
sections []section
|
|
currentSection *section
|
|
shstrtab *section
|
|
}
|
|
|
|
func NewCompiler() *compiler {
|
|
c := &compiler{
|
|
symtab: map[string]symtabEntry{},
|
|
}
|
|
|
|
// Fake 0th entry
|
|
// First, there's an all-zero entry that is reserved for extended ELF headers
|
|
c.sections = append(c.sections, section{})
|
|
|
|
// Real entry: shstrtab
|
|
c.sections = append(c.sections, section{
|
|
name: `.shstrtab`, // Mandatory: the table that names sections themselves
|
|
buff: &bytes.Buffer{},
|
|
})
|
|
c.shstrtab = &c.sections[1]
|
|
|
|
// The first byte in a string table is conventionally expected be \x00, so that you can reference
|
|
// null strings with it
|
|
c.StringTable("")
|
|
|
|
c.shstrtab.name_shstrtabOffset = c.StringTable(c.shstrtab.name)
|
|
|
|
return c
|
|
}
|
|
|
|
func (c *compiler) StringTable(text string) int {
|
|
pos := c.shstrtab.buff.Len()
|
|
|
|
c.shstrtab.buff.WriteString(text)
|
|
c.shstrtab.buff.WriteByte(0)
|
|
|
|
return pos
|
|
}
|
|
|
|
func (c *compiler) CreateSymbol(name string, class string, offset int64, length int64, global bool) error {
|
|
|
|
if _, ok := c.symtab[name]; ok {
|
|
return fmt.Errorf("Symbol %q already exists", name)
|
|
}
|
|
|
|
// fmt.Printf("--> CreateSymbol(%s)\n", name)
|
|
|
|
// Find the .symtab section, or create if it does not exist
|
|
symtabSec := c.FindOrCreateSection(`.symtab`)
|
|
if symtabSec.buff.Len() == 0 {
|
|
// First time initialized
|
|
|
|
// Add a zeroth symtab entry - zero is a sentinel, not a usable entry
|
|
symtabSec.buff.Write(make([]byte, 8*3))
|
|
}
|
|
|
|
// New entry index = length / len(entry) = length / 24
|
|
nextIndex := symtabSec.buff.Len() / 24
|
|
|
|
// Add to our fast lookup table
|
|
ste := symtabEntry{
|
|
symtabSectionIndex: nextIndex,
|
|
kind: class,
|
|
offset: offset,
|
|
global: global,
|
|
length: length,
|
|
}
|
|
|
|
// Find the section index for the section containing this symbol
|
|
var srcSectionIdx int = 0
|
|
var sttType uint8 = STT_NOTYPE
|
|
|
|
if class == `.section` {
|
|
ste.sectionName = name
|
|
srcSectionIdx = len(c.sections) - 1 // The most recent added section
|
|
sttType = STT_SECTION
|
|
|
|
} else if c.currentSection != nil {
|
|
ste.sectionName = c.currentSection.name
|
|
var ok bool
|
|
srcSectionIdx, ok = c.FindSectionIndex(c.currentSection.name)
|
|
if !ok {
|
|
panic("current section does not exist?")
|
|
}
|
|
|
|
} else {
|
|
panic("Symbol is neither a section, nor within a section (?)")
|
|
}
|
|
|
|
// Add to the .symtab section
|
|
// This is required for variable references - after our single ELF .o is
|
|
// created, linking it with any other .o files will create a combined .text
|
|
// section where all the offsets have shifted
|
|
esym := Elf64_Sym{}
|
|
esym.St_value = uint64(offset)
|
|
|
|
if class == `.section` {
|
|
esym.St_name = uint32(c.StringTable(name)) // Write name into public string table
|
|
esym.St_info = sttType | (STB_LOCAL << 4)
|
|
esym.St_other = STV_DEFAULT
|
|
esym.St_shndx = uint16(srcSectionIdx)
|
|
|
|
} else if global {
|
|
esym.St_name = uint32(c.StringTable(name)) // Write name into public string table
|
|
esym.St_info = sttType | (STB_GLOBAL << 4)
|
|
esym.St_other = STV_DEFAULT
|
|
esym.St_shndx = uint16(srcSectionIdx)
|
|
|
|
} else {
|
|
// Private variable for this translation unit
|
|
// Needs an entry, but no need to expose the name
|
|
esym.St_name = 0 // uint32(c.StringTable(name)) // Write name into public string table // 0 // Default: unnamed (0th entry in our string table is \x00)
|
|
esym.St_info = sttType | (STB_LOCAL << 4)
|
|
esym.St_other = STV_HIDDEN // For this translation unit only
|
|
esym.St_shndx = uint16(srcSectionIdx)
|
|
}
|
|
|
|
fmt.Printf("-->New symbol %q in section %q (sectionidx %v)\n", name, ste.sectionName, srcSectionIdx)
|
|
|
|
esym.St_size = uint64(length)
|
|
|
|
err := binary.Write(symtabSec.buff, binary.LittleEndian, &esym)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Stash in symtabEntry
|
|
ste.name_shstrtabOffset = int(esym.St_name)
|
|
c.symtab[name] = ste
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *compiler) Must(b []byte) {
|
|
n, err := c.currentSection.buff.Write(b)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if n != len(b) {
|
|
panic(fmt.Errorf("Must: %w", io.ErrShortWrite))
|
|
}
|
|
}
|
|
|
|
func (c *compiler) MustUint64(val uint64) {
|
|
ret := make([]byte, 8)
|
|
binary.LittleEndian.PutUint64(ret, val)
|
|
c.Must(ret)
|
|
}
|
|
|
|
func (c *compiler) FindSectionIndex(sectionName string) (int, bool) {
|
|
|
|
for i, sec := range c.sections {
|
|
if sec.name != sectionName {
|
|
continue
|
|
}
|
|
|
|
// found it
|
|
return i, true
|
|
}
|
|
|
|
return 0, false
|
|
}
|
|
|
|
func (c *compiler) FindOrCreateSection(sectionName string) *section {
|
|
|
|
if len(sectionName) == 0 || sectionName[0] != '.' {
|
|
panic("section name should start with leading period")
|
|
}
|
|
|
|
if i, ok := c.FindSectionIndex(sectionName); ok {
|
|
return &c.sections[i]
|
|
}
|
|
|
|
// No section with this name. Create it
|
|
c.sections = append(c.sections, section{
|
|
name: sectionName,
|
|
buff: &bytes.Buffer{},
|
|
})
|
|
sec := &c.sections[len(c.sections)-1]
|
|
|
|
// Create a symbol for it
|
|
// This creates a string table entry for us
|
|
err := c.CreateSymbol(sectionName, ".section", 0, 0, true)
|
|
if err != nil {
|
|
panic("CreateSymbol: " + err.Error())
|
|
}
|
|
|
|
sec.name_shstrtabOffset = c.StringTable(sectionName)
|
|
|
|
return sec
|
|
}
|
|
|
|
func (c *compiler) Reloc(symbolName string, mode ElfRelocationType) error {
|
|
// Find '.rela.{currentsection}', creating it if it does not exist
|
|
var relaSec *section = c.FindOrCreateSection(`.rela` + c.currentSection.name)
|
|
|
|
// Find target symbol
|
|
syminfo, ok := c.symtab[symbolName]
|
|
if !ok {
|
|
return fmt.Errorf("Reference to unknown symbol %q", symbolName)
|
|
}
|
|
|
|
// Find the symbol pointing to its parent section
|
|
/*
|
|
parentSectionSyminfo, ok := c.symtab[syminfo.sectionName]
|
|
if !ok {
|
|
return fmt.Errorf("Bad parent section")
|
|
}
|
|
|
|
fmt.Printf("-->Relocation %q found in %q (sectionidx %d)\n", symbolName, syminfo.sectionName, parentSectionSyminfo.symtabSectionIndex)
|
|
|
|
rootSymbol := parentSectionSyminfo.symtabSectionIndex
|
|
if rootSymbol == 5 {
|
|
rootSymbol = 7
|
|
}
|
|
*/
|
|
rootSymbol := syminfo.symtabSectionIndex
|
|
|
|
// Add the relocation to the .rela section
|
|
rr := Elf64_Rela{}
|
|
rr.R_offset = uint64(c.currentSection.buff.Len())
|
|
rr.R_info = uint64(rootSymbol)<<32 | uint64(mode) // high bits: Index of search symbol in the symtab (the source section). low bits: mode type
|
|
rr.R_addend = 0 // syminfo.offset // Add to the result when relocating (offset within source section)
|
|
|
|
err := binary.Write(relaSec.buff, binary.LittleEndian, &rr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Done
|
|
return nil
|
|
}
|
|
|
|
func (c *compiler) Compile(t Token) error {
|
|
if c.currentSection == nil {
|
|
// The only allowable token outside of a section is to start a new section
|
|
if _, ok := t.(SectionToken); !ok {
|
|
return fmt.Errorf("Need to start with a section token, got %#t", t)
|
|
}
|
|
}
|
|
|
|
switch tok := t.(type) {
|
|
case SectionToken:
|
|
c.currentSection = c.FindOrCreateSection(tok.SectionName)
|
|
return nil
|
|
|
|
case DataVariableInstrToken:
|
|
// Stash in symbol table for future backreferences
|
|
// TODO allow making global symbols?
|
|
// CreateSymbol does check for duplicate names already
|
|
|
|
position := int64(c.currentSection.buff.Len())
|
|
|
|
// Generate bytes for the symbol
|
|
switch tok.Sizeclass {
|
|
case "u8":
|
|
// 1 byte literal
|
|
val, err := strconv.ParseUint(tok.Value, 10, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.Must([]byte{byte(val)})
|
|
|
|
case "u64":
|
|
// 8-byte literal
|
|
val, err := strconv.ParseUint(tok.Value, 10, 64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.MustUint64(val)
|
|
|
|
case "sz":
|
|
// string with null termination
|
|
ret := []byte(tok.Value)
|
|
ret = append(ret, 0)
|
|
c.Must(ret)
|
|
|
|
default:
|
|
return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass)
|
|
}
|
|
|
|
err := c.CreateSymbol(tok.VarName, ".var."+tok.Sizeclass, int64(position), int64(c.currentSection.buff.Len())-position, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
|
|
case LabelToken:
|
|
return c.CreateSymbol(tok.LabelName, ".label", int64(c.currentSection.buff.Len()), 0, tok.IsGlobal)
|
|
|
|
case MovInstrToken:
|
|
// TODO encode more cases properly
|
|
if literal, err := strconv.ParseInt(tok.Args[1], 10, 64); err == nil {
|
|
// mov rxx, imm
|
|
// Store immediate in register
|
|
|
|
switch tok.Args[0] {
|
|
case "rax":
|
|
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
case "rbx":
|
|
c.Must([]byte{0x48, 0xbb}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
case "rcx":
|
|
c.Must([]byte{0x48, 0xb9}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
case "rdx":
|
|
c.Must([]byte{0x48, 0xba}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
case "rsi":
|
|
c.Must([]byte{0x48, 0xbe}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
case "rdi":
|
|
c.Must([]byte{0x48, 0xbf}) // TODO store in eax with shorter prefix if <32 bit
|
|
c.MustUint64(uint64(literal))
|
|
|
|
default:
|
|
// Store immediate in variable?
|
|
panic("mov rxx,imm pattern: missing case")
|
|
}
|
|
return nil
|
|
|
|
} else if strings.HasPrefix(tok.Args[0], `$`) {
|
|
// mov $var, rxx
|
|
// Load register's contents into variable
|
|
// x86_64 can only really do this in a single instruction with 32-bit displacement, not full 64-bit
|
|
// The PIC alternative is to transform this into `lea symbol(%rip), %rdi`
|
|
|
|
switch tok.Args[1] {
|
|
case "rax":
|
|
c.Must([]byte{0x48, 0x89, 0x04, 0x25})
|
|
default:
|
|
panic("mov $var,rax pattern: missing case")
|
|
}
|
|
|
|
err = c.Reloc(tok.Args[0][1:], R_X86_64_32S) // Declare that this is a 32-bit reloc, not a 64-bit one
|
|
if err != nil {
|
|
return fmt.Errorf("mov with relocation: %w", err)
|
|
}
|
|
c.Must([]byte{0, 0, 0, 0}) // 32-bit
|
|
return nil
|
|
|
|
} else if strings.HasPrefix(tok.Args[1], `$`) {
|
|
// mov rxx, $var
|
|
// With $; load variable contents into register
|
|
|
|
switch tok.Args[0] {
|
|
case "rax":
|
|
c.Must([]byte{0x48, 0x8b, 0x04, 0x25})
|
|
case "rdi":
|
|
c.Must([]byte{0x48, 0x8b, 0x3c, 0x25})
|
|
default:
|
|
panic("mov rxx,$var pattern: missing case")
|
|
}
|
|
|
|
err = c.Reloc(tok.Args[1][1:], R_X86_64_32S) // Declare that this is a 32-bit reloc, not a 64-bit one
|
|
if err != nil {
|
|
return fmt.Errorf("mov with relocation: %w", err)
|
|
}
|
|
c.Must([]byte{0, 0, 0, 0}) // 32-bit
|
|
|
|
return nil
|
|
|
|
} else if strings.HasPrefix(tok.Args[1], `&$`) {
|
|
// mov rxx, &$var
|
|
// With &; assign exact address of variable to register
|
|
// This creates a movabs literal & a relocation entry
|
|
// It's always 64-bit
|
|
|
|
switch tok.Args[0] {
|
|
case "rax":
|
|
c.Must([]byte{0x48, 0xb8}) // TODO store in eax with shorter prefix if <32 bit
|
|
case "rsi":
|
|
c.Must([]byte{0x48, 0xbe}) // TODO store in eax with shorter prefix if <32 bit
|
|
case "rdi":
|
|
c.Must([]byte{0x48, 0xbf}) // TODO store in eax with shorter prefix if <32 bit
|
|
|
|
default:
|
|
panic("mov $var,rxx pattern: missing case")
|
|
}
|
|
|
|
err = c.Reloc(tok.Args[1][2:], R_X86_64_64)
|
|
if err != nil {
|
|
return fmt.Errorf("mov with relocation: %w", err)
|
|
}
|
|
|
|
c.MustUint64(0)
|
|
return nil
|
|
|
|
} else if strings.HasPrefix(tok.Args[1], `strlen($`) && strings.HasSuffix(tok.Args[1], `)`) {
|
|
// mov rxx, strlen($var)
|
|
// With strlen; if this is an sz symbol, supply its length
|
|
symname := tok.Args[1][8 : len(tok.Args[1])-1]
|
|
sym, ok := c.symtab[symname]
|
|
if !ok {
|
|
return fmt.Errorf("Can't strlen on unknown variable %q", symname)
|
|
}
|
|
|
|
if sym.kind != ".var.sz" {
|
|
return fmt.Errorf("Can't take the strlen of variable %q with type %q (expected sz)", symname, sym.kind)
|
|
}
|
|
|
|
effective := sym.length
|
|
return c.Compile(MovInstrToken{Args: []string{tok.Args[0], strconv.Itoa(int(effective))}})
|
|
|
|
} else {
|
|
panic("unknown mov type, sorry")
|
|
|
|
}
|
|
|
|
case SyscallInstrToken:
|
|
c.Must([]byte{0x0f, 0x05}) // syscall
|
|
return nil
|
|
|
|
case RetInstrToken:
|
|
c.Must([]byte{0xc3}) // ret
|
|
return nil
|
|
|
|
default:
|
|
return fmt.Errorf("can't compile token of type %#t", t)
|
|
}
|
|
}
|
|
|
|
// Finalize exports the compiled sections into an ELF artefact.
|
|
// The resulting ELF is not executable directly, but it can be once fully
|
|
// linked (adding a program header and page alignment)
|
|
func (c *compiler) Finalize(dest io.Writer) error {
|
|
|
|
// Find some well-known section indexes
|
|
symtabSectionIndex, ok := c.FindSectionIndex(`.symtab`)
|
|
if !ok {
|
|
return fmt.Errorf("No symbol table present")
|
|
}
|
|
|
|
shstrtabSectionIndex, ok := c.FindSectionIndex(`.shstrtab`)
|
|
if !ok {
|
|
return fmt.Errorf("No string table present")
|
|
}
|
|
|
|
// (Safely) move all global symtab to the end
|
|
// Because there may be existing references to global symtab entries (e.g. relocs)
|
|
// just duplicate them in place
|
|
tmp := c.sections[symtabSectionIndex].buff.Bytes()
|
|
extraSymtabContent := bytes.Buffer{}
|
|
for i := 0; i < len(tmp); i += 24 {
|
|
sym := Elf64_Sym{}
|
|
err := binary.Read(bytes.NewReader(tmp[i:i+24]), binary.LittleEndian, &sym)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if sym.St_info&(STB_GLOBAL<<4) == 0 {
|
|
continue // not a global symbol
|
|
}
|
|
|
|
// Was a global symbol
|
|
// Re-add the global symbol at the end
|
|
extraSymtabContent.Write(tmp[i : i+24])
|
|
|
|
// Patch the existing symbol
|
|
sym.St_name = 0
|
|
sym.St_info &= ^uint8(STB_GLOBAL << 4)
|
|
replacement := bytes.Buffer{}
|
|
err = binary.Write(&replacement, binary.LittleEndian, &sym)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
copy(tmp[i:i+24], replacement.Bytes())
|
|
}
|
|
numLocalSymbols := len(tmp) / 24
|
|
c.sections[symtabSectionIndex].buff.Write(extraSymtabContent.Bytes())
|
|
|
|
// Write ELF header
|
|
ehdr := Elf64_Ehdr{}
|
|
ehdr.e_ident[0] = 0x7f
|
|
ehdr.e_ident[1] = 'E'
|
|
ehdr.e_ident[2] = 'L'
|
|
ehdr.e_ident[3] = 'F'
|
|
ehdr.e_ident[4] = 2 // 64-bit format
|
|
ehdr.e_ident[5] = 1 // little endian
|
|
ehdr.e_ident[6] = 1 // ELFv1 is the only format
|
|
ehdr.e_ident[7] = 0 // Don't declare any ABI
|
|
|
|
ehdr.e_type = ET_REL
|
|
ehdr.e_machine = 0x3E // x86_64
|
|
ehdr.e_version = 1 // ELFv1 again
|
|
//ehdr.e_flags = 11 // ????
|
|
ehdr.e_ehsize = 64
|
|
|
|
ehdr.e_shoff = 64 // The Ehdr is 64 bytes long, sections start immediately following
|
|
ehdr.e_shentsize = 64 // Each Shdr is also 64 bytes long
|
|
ehdr.e_shnum = uint16(len(c.sections))
|
|
ehdr.e_shstrndx = uint16(shstrtabSectionIndex)
|
|
|
|
err := binary.Write(dest, binary.LittleEndian, &ehdr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Don't declare a program header
|
|
|
|
// Write fake 0th section header
|
|
dest.Write(make([]byte, 64))
|
|
|
|
// Write remaining section headers
|
|
pctr := 64 + (64 * len(c.sections))
|
|
for _, sec := range c.sections[1:] {
|
|
shdr := Elf64_Shdr{}
|
|
|
|
shdr.sh_name = uint32(sec.name_shstrtabOffset)
|
|
|
|
switch sec.name {
|
|
case ".text":
|
|
shdr.sh_type = SHT_PROGBITS
|
|
shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR
|
|
shdr.sh_addralign = 16 // Request for final linking
|
|
|
|
case ".data":
|
|
shdr.sh_type = SHT_PROGBITS
|
|
shdr.sh_flags = SHF_WRITE | SHF_ALLOC
|
|
shdr.sh_addralign = 4 // Request for final linking
|
|
|
|
case ".symtab":
|
|
shdr.sh_type = SHT_SYMTAB
|
|
shdr.sh_flags = 0
|
|
shdr.sh_info = uint32(numLocalSymbols) // sh_info points to the first global symbol. Global symbols must go after local symbols
|
|
shdr.sh_entsize = 24 // Size in bytes of each entry
|
|
shdr.sh_link = 1 // The index of the section containing the actual strings. We reuse shstrtab(!?!)
|
|
shdr.sh_addralign = 8 // Request for final linking
|
|
|
|
case ".shstrtab":
|
|
shdr.sh_type = SHT_STRTAB
|
|
shdr.sh_flags = 0
|
|
shdr.sh_addralign = 1 // Not doing any proper alignment
|
|
|
|
case ".rodata":
|
|
shdr.sh_type = SHT_PROGBITS
|
|
shdr.sh_flags = SHF_ALLOC
|
|
shdr.sh_addralign = 4 // Request for final linking
|
|
|
|
default:
|
|
if strings.HasPrefix(sec.name, ".rela.") {
|
|
shdr.sh_type = SHT_RELA
|
|
shdr.sh_flags = 0 // ?
|
|
shdr.sh_link = uint32(symtabSectionIndex)
|
|
shdr.sh_entsize = 24 // Size in bytes of each entry
|
|
|
|
// Find the index of the section for which this relocates. Match by name
|
|
srcSectionIdx, ok := c.FindSectionIndex(sec.name[5:])
|
|
if !ok {
|
|
return fmt.Errorf("Missing parent section for relocation section %q", sec.name)
|
|
}
|
|
shdr.sh_info = uint32(srcSectionIdx)
|
|
shdr.sh_addralign = 8 // Request for final linking
|
|
} else {
|
|
return fmt.Errorf("don't know the right flags to use for section %q", sec.name)
|
|
}
|
|
}
|
|
|
|
shdr.sh_offset = uint64(pctr)
|
|
shdr.sh_size = uint64(sec.buff.Len())
|
|
|
|
pctr += sec.buff.Len()
|
|
|
|
err = binary.Write(dest, binary.LittleEndian, &shdr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Write binary content
|
|
for _, sec := range c.sections[1:] {
|
|
expectLen := sec.buff.Len()
|
|
n, err := sec.buff.WriteTo(dest)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if n != int64(expectLen) {
|
|
return io.ErrShortWrite
|
|
}
|
|
}
|
|
|
|
// Done
|
|
return nil
|
|
|
|
}
|