202 lines
3.7 KiB
Go
202 lines
3.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
)
|
|
|
|
type lexer struct {
|
|
r *bufio.Reader
|
|
lineno int
|
|
|
|
peek Token
|
|
}
|
|
|
|
func NewLexer(src io.Reader) *lexer {
|
|
return &lexer{
|
|
r: bufio.NewReader(src),
|
|
lineno: 0,
|
|
}
|
|
}
|
|
|
|
func (l *lexer) Peek() (Token, error) {
|
|
tok, err := l.Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
l.peek = tok
|
|
return tok, nil
|
|
}
|
|
|
|
func (l *lexer) Fields(line string) ([]string, error) {
|
|
const (
|
|
STATE_START = 0
|
|
STATE_IN_WORD = 1
|
|
STATE_IN_QUOTED_STRING = 3
|
|
STATE_BACKSLASH = 4
|
|
STATE_LINE_COMMENT = 5
|
|
)
|
|
|
|
var (
|
|
ret = []string{}
|
|
state = STATE_START
|
|
buff = ""
|
|
)
|
|
|
|
for _, c := range line {
|
|
|
|
switch state {
|
|
case STATE_START:
|
|
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
|
|
continue
|
|
} else if c == ';' {
|
|
state = STATE_LINE_COMMENT
|
|
} else if c == '"' {
|
|
state = STATE_IN_QUOTED_STRING
|
|
} else {
|
|
buff = string(c)
|
|
state = STATE_IN_WORD
|
|
}
|
|
|
|
case STATE_IN_WORD:
|
|
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
|
|
ret = append(ret, buff)
|
|
buff = ""
|
|
state = STATE_START
|
|
} else if c == ';' {
|
|
ret = append(ret, buff)
|
|
buff = ""
|
|
state = STATE_LINE_COMMENT
|
|
} else {
|
|
buff += string(c)
|
|
}
|
|
|
|
case STATE_IN_QUOTED_STRING:
|
|
if c == '"' {
|
|
ret = append(ret, buff)
|
|
buff = ""
|
|
state = STATE_START
|
|
} else if c == '\\' {
|
|
state = STATE_BACKSLASH
|
|
} else {
|
|
buff += string(c)
|
|
}
|
|
|
|
case STATE_BACKSLASH:
|
|
if c == 'n' {
|
|
buff += "\n"
|
|
} else if c == 'r' {
|
|
buff += "\r"
|
|
} else if c == 't' {
|
|
buff += "\t"
|
|
} else if c == 's' {
|
|
buff += " "
|
|
} else if c == '\\' {
|
|
buff += `\`
|
|
} else {
|
|
return nil, fmt.Errorf("Unknown backslash escape sequence")
|
|
}
|
|
state = STATE_IN_QUOTED_STRING
|
|
|
|
case STATE_LINE_COMMENT:
|
|
if c == '\n' {
|
|
state = STATE_START
|
|
} // ignore all else
|
|
|
|
default:
|
|
panic("impossible state")
|
|
}
|
|
}
|
|
|
|
if state == STATE_IN_QUOTED_STRING {
|
|
return nil, fmt.Errorf("unterminated string literal")
|
|
}
|
|
|
|
if state == STATE_IN_WORD {
|
|
ret = append(ret, buff)
|
|
buff = ""
|
|
state = STATE_START
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
func (l *lexer) Next() (Token, error) {
|
|
if l.peek != nil {
|
|
ret := l.peek
|
|
l.peek = nil
|
|
return ret, nil
|
|
}
|
|
|
|
//
|
|
|
|
l.lineno++
|
|
line, err := l.r.ReadString('\n')
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
//
|
|
|
|
fields, err := l.Fields(line)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(fields) == 0 {
|
|
// This line only contained comments
|
|
// Continue to the next line
|
|
return l.Next()
|
|
}
|
|
|
|
switch strings.ToLower(fields[0]) {
|
|
case "section":
|
|
return SectionToken{fields[1]}, nil
|
|
|
|
case "global":
|
|
return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil
|
|
|
|
case "mov":
|
|
for i, _ := range fields {
|
|
fields[i] = strings.TrimRight(fields[i], `,`)
|
|
}
|
|
return MovInstrToken{fields[1:]}, nil
|
|
|
|
case "syscall":
|
|
return SyscallInstrToken{}, nil
|
|
|
|
case "ret":
|
|
return RetInstrToken{}, nil
|
|
|
|
case "nop":
|
|
return NopInstrToken{}, nil
|
|
|
|
default:
|
|
// If the field ends with `:`, it's a (local) label
|
|
if strings.HasSuffix(fields[0], `:`) {
|
|
return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil
|
|
}
|
|
|
|
// If the field starts with `$`, it's a "variable"
|
|
if strings.HasPrefix(fields[0], `$`) {
|
|
// 1: =
|
|
if fields[1] != `=` {
|
|
return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno)
|
|
}
|
|
|
|
// 2: sizeclass
|
|
// 3+++: literal initializer
|
|
return DataVariableInstrToken{
|
|
VarName: fields[0][1:],
|
|
Sizeclass: fields[2],
|
|
Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno)
|
|
}
|