From e03434bd5029c028992e18eb8a6a4e5f9d97411e Mon Sep 17 00:00:00 2001 From: mappu Date: Mon, 11 Dec 2023 14:24:25 +1300 Subject: [PATCH] work on elf output --- compile.go | 91 +++++++++++++++++++++++++++++++++++++--------------- elf.go | 33 +++++++++++++++++-- main_test.go | 2 +- 3 files changed, 97 insertions(+), 29 deletions(-) diff --git a/compile.go b/compile.go index b35d88e..69f98d4 100644 --- a/compile.go +++ b/compile.go @@ -12,7 +12,7 @@ import ( type section struct { name string name_shstrtabOffset int - buff bytes.Buffer + buff *bytes.Buffer } type symtabEntry struct { @@ -37,12 +37,16 @@ func NewCompiler() *compiler { symtab: map[string]symtabEntry{}, } + // Fake 0th entry + // First, there's an all-zero entry that is reserved for extended ELF headers + c.sections = append(c.sections, section{}) + c.sections = append(c.sections, section{ name: `.shstrtab`, // Mandatory: the table that names sections themselves name_shstrtabOffset: 1, - buff: bytes.Buffer{}, + buff: &bytes.Buffer{}, }) - c.shstrtab = &c.sections[0] + c.shstrtab = &c.sections[1] // The first byte in a string table is conventionally expected be \x00, so that you can reference // null strings with it c.shstrtab.buff.WriteByte(0) @@ -99,7 +103,7 @@ func (c *compiler) CreateSymbol(name string, class string, offset int64, length esym.st_shndx = uint16(sectionIndex) esym.st_size = uint64(length) - err := binary.Write(&symtabSec.buff, binary.LittleEndian, &esym) + err := binary.Write(symtabSec.buff, binary.LittleEndian, &esym) return err } @@ -119,11 +123,7 @@ func (c *compiler) MustUint64(val uint64) { c.Must(ret) } -func (c *compiler) FindOrCreateSection(sectionName string) *section { - - if len(sectionName) == 0 || sectionName[0] != '.' { - panic("section name should start with leading period") - } +func (c *compiler) FindSectionIndex(sectionName string) (int, bool) { for i, sec := range c.sections { if sec.name != sectionName { @@ -131,6 +131,19 @@ func (c *compiler) FindOrCreateSection(sectionName string) *section { } // found it + return i, true + } + + return 0, false +} + +func (c *compiler) FindOrCreateSection(sectionName string) *section { + + if len(sectionName) == 0 || sectionName[0] != '.' { + panic("section name should start with leading period") + } + + if i, ok := c.FindSectionIndex(sectionName); ok { return &c.sections[i] } @@ -138,7 +151,7 @@ func (c *compiler) FindOrCreateSection(sectionName string) *section { c.sections = append(c.sections, section{ name: sectionName, name_shstrtabOffset: c.shstrtab.buff.Len(), - buff: bytes.Buffer{}, + buff: &bytes.Buffer{}, }) c.shstrtab.buff.WriteString(sectionName) @@ -163,7 +176,7 @@ func (c *compiler) Reloc(symbolName string, mode ElfRelocationType, addOffset in rr.r_info = uint64(syminfo.symtabSectionIndex)<<32 | uint64(mode) // high bits: Index of search symbol in the symtab. low bits: mode type rr.r_addend = addOffset - err := binary.Write(&relaSec.buff, binary.LittleEndian, &rr) + err := binary.Write(relaSec.buff, binary.LittleEndian, &rr) if err != nil { return err } @@ -373,6 +386,10 @@ func (c *compiler) Compile(t Token) error { // linked (adding a program header and page alignment) func (c *compiler) Finalize(dest io.Writer) error { + // Find some well-known section indexes + symtabSectionIndex, _ := c.FindSectionIndex(`.symtab`) + shstrtabSectionIndex, _ := c.FindSectionIndex(`.shstrtab`) + // Write ELF header ehdr := Elf64_Ehdr{} ehdr.e_ident[0] = 0x7f @@ -382,16 +399,18 @@ func (c *compiler) Finalize(dest io.Writer) error { ehdr.e_ident[4] = 2 // 64-bit format ehdr.e_ident[5] = 1 // little endian ehdr.e_ident[6] = 1 // ELFv1 is the only format - ehdr.e_ident[7] = 3 // Linux-compatible ABI + ehdr.e_ident[7] = 0 // Don't declare any ABI - ehdr.e_type = 0 // ET_NONE + ehdr.e_type = ET_REL ehdr.e_machine = 0x3E // x86_64 ehdr.e_version = 1 // ELFv1 again + //ehdr.e_flags = 11 // ???? + ehdr.e_ehsize = 64 ehdr.e_shoff = 64 // The Ehdr is 64 bytes long, sections start immediately following ehdr.e_shentsize = 64 // Each Shdr is also 64 bytes long ehdr.e_shnum = uint16(len(c.sections)) - ehdr.e_shstrndx = 0 // We always put the .shstrtab as the 0th section + ehdr.e_shstrndx = uint16(shstrtabSectionIndex) err := binary.Write(dest, binary.LittleEndian, &ehdr) if err != nil { @@ -400,30 +419,50 @@ func (c *compiler) Finalize(dest io.Writer) error { // Don't declare a program header - // Write section headers + // Write fake 0th section header + dest.Write(make([]byte, 64)) + + // Write remaining section headers pctr := 64 + (64 * len(c.sections)) - for _, sec := range c.sections { + for _, sec := range c.sections[1:] { shdr := Elf64_Shdr{} shdr.sh_name = uint32(sec.name_shstrtabOffset) + + // Default (unknown section): + // Treat as extra read-only data (.rodata) + shdr.sh_type = 1 // SHT_PROGBITS, program data + shdr.sh_flags = 0x10 // MERGE + switch sec.name { case ".text": - shdr.sh_type = 1 // SHT_PROGBITS, program data + shdr.sh_type = SHT_PROGBITS shdr.sh_flags = 0x2 | 0x4 | 0x10 // WRITE|ALLOC|MERGE case ".data": - shdr.sh_type = 1 // SHT_PROGBITS, program data + shdr.sh_type = SHT_PROGBITS shdr.sh_flags = 0x2 | 0x10 // WRITE|MERGE case ".symtab": - shdr.sh_type = 2 // SHT_SYMTAB + shdr.sh_type = SHT_SYMTAB shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS + shdr.sh_info = uint32(len(c.symtab)) + shdr.sh_entsize = uint64(sec.buff.Len()) // Number of fixed-sized symtab entries + shdr.sh_link = 1 // The index of the section containing the actual strings. We reuse shstrtab(!?!) case ".shstrtab": - shdr.sh_type = 3 // SHT_STRTAB + shdr.sh_type = SHT_STRTAB shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS - case ".rodata": - fallthrough - default: // Treat anything unknown as read-only data - shdr.sh_type = 1 // SHT_PROGBITS, program data - shdr.sh_flags = 0x10 // MERGE + default: + if strings.HasPrefix(sec.name, ".rela.") { + shdr.sh_type = SHT_RELA + shdr.sh_flags = 0 // ? + shdr.sh_link = uint32(symtabSectionIndex) // The index of the symtab section + + // Find the index of the section for which this relocates. Match by name + srcSectionIdx, ok := c.FindSectionIndex(sec.name[5:]) + if !ok { + return fmt.Errorf("Missing parent section for relocation section %q", sec.name) + } + shdr.sh_info = uint32(srcSectionIdx) + } } shdr.sh_offset = uint64(pctr) @@ -438,7 +477,7 @@ func (c *compiler) Finalize(dest io.Writer) error { } // Write binary content - for _, sec := range c.sections { + for _, sec := range c.sections[1:] { expectLen := sec.buff.Len() n, err := sec.buff.WriteTo(dest) if err != nil { diff --git a/elf.go b/elf.go index 83e26ef..9f34653 100644 --- a/elf.go +++ b/elf.go @@ -18,6 +18,15 @@ type Elf64_Ehdr struct { e_shstrndx uint16 } +// File types +const ( + ET_NONE = 0 + ET_REL = 1 + ET_EXEC = 2 + ET_DYN = 3 + ET_CORE = 4 +) + // Elf64_Phdr is the Program Header type Elf64_Phdr struct { p_type uint32 @@ -30,6 +39,26 @@ type Elf64_Phdr struct { p_align uint64 } +const ( + SHT_NULL = 0 + SHT_PROGBITS = 1 + SHT_SYMTAB = 2 + SHT_STRTAB = 3 + SHT_RELA = 4 + SHT_HASH = 5 + SHT_DYNAMIC = 6 + SHT_NOTE = 7 + SHT_NOBITS = 8 + SHT_REL = 9 + SHT_SHLIB = 10 + SHT_DYNSYM = 11 + SHT_INIT_ARRAY = 14 + SHT_FINI_ARRAY = 15 + SHT_PREINIT_ARRAY = 16 + SHT_GROUP = 17 + SHT_SYMTAB_SHNDX = 18 +) + // Elf64_Shdr is the Section header type Elf64_Shdr struct { sh_name uint32 @@ -66,8 +95,8 @@ const ( // Elf64_Sym is a symbol type Elf64_Sym struct { st_name uint32 - st_info byte - st_other byte + st_info uint8 + st_other uint8 st_shndx uint16 st_value uint64 st_size uint64 diff --git a/main_test.go b/main_test.go index 32c215f..022a7d0 100644 --- a/main_test.go +++ b/main_test.go @@ -47,7 +47,7 @@ global _start: ;tell linker entry point assemble(strings.NewReader(src), ioutil.Discard) */ - fh, err := os.OpenFile("output.o", os.O_CREATE|os.O_WRONLY, 0644) + fh, err := os.OpenFile("output.o", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) if err != nil { panic(err) }