From d95fa7e564c5b3973c6d9f2f84bed8b230ae103d Mon Sep 17 00:00:00 2001 From: mappu Date: Mon, 11 Dec 2023 17:26:59 +1300 Subject: [PATCH] work on elf output (2) --- compile.go | 183 +++++++++++++++++++++++++++++++++++++---------------- elf.go | 11 ++++ 2 files changed, 140 insertions(+), 54 deletions(-) diff --git a/compile.go b/compile.go index 69f98d4..bb58baf 100644 --- a/compile.go +++ b/compile.go @@ -16,7 +16,9 @@ type section struct { } type symtabEntry struct { - symtabSectionIndex int + // The index of this symbol within the whole symtab + symtabSectionIndex int + name_shstrtabOffset int sectionName string kind string @@ -41,55 +43,79 @@ func NewCompiler() *compiler { // First, there's an all-zero entry that is reserved for extended ELF headers c.sections = append(c.sections, section{}) + // Real entry: shstrtab c.sections = append(c.sections, section{ - name: `.shstrtab`, // Mandatory: the table that names sections themselves - name_shstrtabOffset: 1, - buff: &bytes.Buffer{}, + name: `.shstrtab`, // Mandatory: the table that names sections themselves + buff: &bytes.Buffer{}, }) c.shstrtab = &c.sections[1] + // The first byte in a string table is conventionally expected be \x00, so that you can reference // null strings with it - c.shstrtab.buff.WriteByte(0) + c.StringTable("") - c.shstrtab.buff.WriteString(c.shstrtab.name) - c.shstrtab.buff.WriteByte(0) + c.shstrtab.name_shstrtabOffset = c.StringTable(c.shstrtab.name) return c } +func (c *compiler) StringTable(text string) int { + pos := c.shstrtab.buff.Len() + + c.shstrtab.buff.WriteString(text) + c.shstrtab.buff.WriteByte(0) + + return pos +} + func (c *compiler) CreateSymbol(name string, class string, offset int64, length int64, global bool) error { if _, ok := c.symtab[name]; ok { return fmt.Errorf("Symbol %q already exists", name) } + // fmt.Printf("--> CreateSymbol(%s)\n", name) + // Find the .symtab section, or create if it does not exist symtabSec := c.FindOrCreateSection(`.symtab`) + if symtabSec.buff.Len() == 0 { + // First time initialized + + // Add a zeroth symtab entry - zero is a sentinel, not a usable entry + symtabSec.buff.Write(make([]byte, 8*3)) + } // New entry index = length / len(entry) = length / 24 nextIndex := symtabSec.buff.Len() / 24 // Add to our fast lookup table - c.symtab[name] = symtabEntry{ + ste := symtabEntry{ symtabSectionIndex: nextIndex, - - sectionName: c.currentSection.name, - kind: class, - offset: offset, - global: global, - length: length, + kind: class, + offset: offset, + global: global, + length: length, } // Find the section index for the section containing this symbol - sectionIndex := -1 - for i, _ := range c.sections { - if c.sections[i].name == c.currentSection.name { - sectionIndex = i - break + var srcSectionIdx int = 0 + var sttType uint8 = STT_NOTYPE + + if class == `.section` { + ste.sectionName = name + srcSectionIdx = len(c.sections) - 1 // The most recent added section + sttType = STT_SECTION + + } else if c.currentSection != nil { + ste.sectionName = c.currentSection.name + var ok bool + srcSectionIdx, ok = c.FindSectionIndex(c.currentSection.name) + if !ok { + panic("current section does not exist?") } - } - if sectionIndex == -1 { - return fmt.Errorf("Current section missing index") + + } else { + panic("Symbol is neither a section, nor within a section (?)") } // Add to the .symtab section @@ -97,14 +123,42 @@ func (c *compiler) CreateSymbol(name string, class string, offset int64, length // created, linking it with any other .o files will create a combined .text // section where all the offsets have shifted esym := Elf64_Sym{} - esym.st_name = 0 // Default: unnamed - esym.st_info = STT_SECTION | (STB_LOCAL << 4) - esym.st_other = STV_HIDDEN // For this translation unit only - esym.st_shndx = uint16(sectionIndex) + + if class == `.section` { + esym.st_name = uint32(c.StringTable(name)) // Write name into public string table + esym.st_info = sttType | (STB_LOCAL << 4) + esym.st_other = STV_DEFAULT + esym.st_shndx = uint16(srcSectionIdx) + + } else if global { + esym.st_name = uint32(c.StringTable(name)) // Write name into public string table + esym.st_info = sttType | (STB_GLOBAL << 4) + esym.st_other = STV_DEFAULT + esym.st_shndx = uint16(srcSectionIdx) + + } else { + // Private variable for this translation unit + // Needs an entry, but no need to expose the name + esym.st_name = 0 // uint32(c.StringTable(name)) // Write name into public string table // 0 // Default: unnamed (0th entry in our string table is \x00) + esym.st_info = sttType | (STB_LOCAL << 4) + esym.st_other = STV_HIDDEN // For this translation unit only + esym.st_shndx = uint16(srcSectionIdx) + } + + fmt.Printf("-->New symbol %q in section %q (sectionidx %v)\n", name, ste.sectionName, srcSectionIdx) + esym.st_size = uint64(length) err := binary.Write(symtabSec.buff, binary.LittleEndian, &esym) - return err + if err != nil { + return err + } + + // Stash in symtabEntry + ste.name_shstrtabOffset = int(esym.st_name) + c.symtab[name] = ste + + return nil } func (c *compiler) Must(b []byte) { @@ -149,18 +203,24 @@ func (c *compiler) FindOrCreateSection(sectionName string) *section { // No section with this name. Create it c.sections = append(c.sections, section{ - name: sectionName, - name_shstrtabOffset: c.shstrtab.buff.Len(), - buff: &bytes.Buffer{}, + name: sectionName, + buff: &bytes.Buffer{}, }) + sec := &c.sections[len(c.sections)-1] - c.shstrtab.buff.WriteString(sectionName) - c.shstrtab.buff.WriteByte(0) + // Create a symbol for it + // This creates a string table entry for us + err := c.CreateSymbol(sectionName, ".section", 0, 0, true) + if err != nil { + panic("CreateSymbol: " + err.Error()) + } - return &c.sections[len(c.sections)-1] + sec.name_shstrtabOffset = c.StringTable(sectionName) + + return sec } -func (c *compiler) Reloc(symbolName string, mode ElfRelocationType, addOffset int64) error { +func (c *compiler) Reloc(symbolName string, mode ElfRelocationType) error { // Find '.rela.{currentsection}', creating it if it does not exist var relaSec *section = c.FindOrCreateSection(`.rela` + c.currentSection.name) @@ -170,11 +230,19 @@ func (c *compiler) Reloc(symbolName string, mode ElfRelocationType, addOffset in return fmt.Errorf("Reference to unknown symbol %q", symbolName) } + // Find the symbol pointing to its parent section + parentSectionSyminfo, ok := c.symtab[syminfo.sectionName] + if !ok { + return fmt.Errorf("Bad parent section") + } + + fmt.Printf("-->Relocation %q found in %q (sectionidx %d)\n", symbolName, syminfo.sectionName, parentSectionSyminfo.symtabSectionIndex) + // Add the relocation to the .rela section rr := Elf64_Rela{} rr.r_offset = uint64(c.currentSection.buff.Len()) - rr.r_info = uint64(syminfo.symtabSectionIndex)<<32 | uint64(mode) // high bits: Index of search symbol in the symtab. low bits: mode type - rr.r_addend = addOffset + rr.r_info = uint64(parentSectionSyminfo.symtabSectionIndex)<<32 | uint64(mode) // high bits: Index of search symbol in the symtab (the source section). low bits: mode type + rr.r_addend = syminfo.offset // Add to the result when relocating (offset within source section) err := binary.Write(relaSec.buff, binary.LittleEndian, &rr) if err != nil { @@ -235,7 +303,7 @@ func (c *compiler) Compile(t Token) error { return fmt.Errorf("variable %q has unknown size class %q", tok.VarName, tok.Sizeclass) } - err := c.CreateSymbol(tok.VarName, ".var."+tok.Sizeclass, int64(c.currentSection.buff.Len()), position-int64(c.currentSection.buff.Len()), false) + err := c.CreateSymbol(tok.VarName, ".var."+tok.Sizeclass, int64(position), int64(c.currentSection.buff.Len())-position, false) if err != nil { return err } @@ -293,7 +361,7 @@ func (c *compiler) Compile(t Token) error { panic("mov $var,rax pattern: missing case") } - err = c.Reloc(tok.Args[0][1:], R_X86_64_32S, 0) // Declare that this is a 32-bit reloc, not a 64-bit one + err = c.Reloc(tok.Args[0][1:], R_X86_64_32S) // Declare that this is a 32-bit reloc, not a 64-bit one if err != nil { return fmt.Errorf("mov with relocation: %w", err) } @@ -313,7 +381,7 @@ func (c *compiler) Compile(t Token) error { panic("mov rxx,$var pattern: missing case") } - err = c.Reloc(tok.Args[1][1:], R_X86_64_32S, 0) // Declare that this is a 32-bit reloc, not a 64-bit one + err = c.Reloc(tok.Args[1][1:], R_X86_64_32S) // Declare that this is a 32-bit reloc, not a 64-bit one if err != nil { return fmt.Errorf("mov with relocation: %w", err) } @@ -339,7 +407,7 @@ func (c *compiler) Compile(t Token) error { panic("mov $var,rxx pattern: missing case") } - err = c.Reloc(tok.Args[1][2:], R_X86_64_64, 0) + err = c.Reloc(tok.Args[1][2:], R_X86_64_64) if err != nil { return fmt.Errorf("mov with relocation: %w", err) } @@ -428,33 +496,38 @@ func (c *compiler) Finalize(dest io.Writer) error { shdr := Elf64_Shdr{} shdr.sh_name = uint32(sec.name_shstrtabOffset) - - // Default (unknown section): - // Treat as extra read-only data (.rodata) - shdr.sh_type = 1 // SHT_PROGBITS, program data - shdr.sh_flags = 0x10 // MERGE + shdr.sh_addralign = 1 // Not doing any proper alignment switch sec.name { case ".text": shdr.sh_type = SHT_PROGBITS - shdr.sh_flags = 0x2 | 0x4 | 0x10 // WRITE|ALLOC|MERGE + shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR + case ".data": shdr.sh_type = SHT_PROGBITS - shdr.sh_flags = 0x2 | 0x10 // WRITE|MERGE + shdr.sh_flags = SHF_WRITE | SHF_ALLOC + case ".symtab": shdr.sh_type = SHT_SYMTAB - shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS - shdr.sh_info = uint32(len(c.symtab)) - shdr.sh_entsize = uint64(sec.buff.Len()) // Number of fixed-sized symtab entries - shdr.sh_link = 1 // The index of the section containing the actual strings. We reuse shstrtab(!?!) + shdr.sh_flags = 0 + shdr.sh_info = uint32(len(c.symtab) + 1) + shdr.sh_entsize = 24 // Size in bytes of each entry + shdr.sh_link = 1 // The index of the section containing the actual strings. We reuse shstrtab(!?!) + case ".shstrtab": shdr.sh_type = SHT_STRTAB - shdr.sh_flags = 0x10 | 0x20 // MERGE|STRINGS + shdr.sh_flags = 0 + + case ".rodata": + shdr.sh_type = SHT_PROGBITS + shdr.sh_flags = SHF_ALLOC + default: if strings.HasPrefix(sec.name, ".rela.") { shdr.sh_type = SHT_RELA - shdr.sh_flags = 0 // ? - shdr.sh_link = uint32(symtabSectionIndex) // The index of the symtab section + shdr.sh_flags = 0 // ? + shdr.sh_link = uint32(symtabSectionIndex) + shdr.sh_entsize = 24 // Size in bytes of each entry // Find the index of the section for which this relocates. Match by name srcSectionIdx, ok := c.FindSectionIndex(sec.name[5:]) @@ -462,6 +535,8 @@ func (c *compiler) Finalize(dest io.Writer) error { return fmt.Errorf("Missing parent section for relocation section %q", sec.name) } shdr.sh_info = uint32(srcSectionIdx) + } else { + return fmt.Errorf("don't know the right flags to use for section %q", sec.name) } } diff --git a/elf.go b/elf.go index 9f34653..dbefd9f 100644 --- a/elf.go +++ b/elf.go @@ -57,6 +57,17 @@ const ( SHT_PREINIT_ARRAY = 16 SHT_GROUP = 17 SHT_SYMTAB_SHNDX = 18 + + SHF_WRITE = 0x1 + SHF_ALLOC = 0x2 + SHF_EXECINSTR = 0x4 + SHF_MERGE = 0x10 + SHF_STRINGS = 0x20 + SHF_INFO_LINK = 0x40 + SHF_LINK_ORDER = 0x80 + SHF_OS_NONCONFORMING = 0x100 + SHF_GROUP = 0x200 + SHF_TLS = 0x400 ) // Elf64_Shdr is the Section header