pxasme/lexer.go

202 lines
3.7 KiB
Go

package main
import (
"bufio"
"fmt"
"io"
"strings"
)
type lexer struct {
r *bufio.Reader
lineno int
peek Token
}
func NewLexer(src io.Reader) *lexer {
return &lexer{
r: bufio.NewReader(src),
lineno: 0,
}
}
func (l *lexer) Peek() (Token, error) {
tok, err := l.Next()
if err != nil {
return nil, err
}
l.peek = tok
return tok, nil
}
func (l *lexer) Fields(line string) ([]string, error) {
const (
STATE_START = 0
STATE_IN_WORD = 1
STATE_IN_QUOTED_STRING = 3
STATE_BACKSLASH = 4
STATE_LINE_COMMENT = 5
)
var (
ret = []string{}
state = STATE_START
buff = ""
)
for _, c := range line {
switch state {
case STATE_START:
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
continue
} else if c == ';' {
state = STATE_LINE_COMMENT
} else if c == '"' {
state = STATE_IN_QUOTED_STRING
} else {
buff = string(c)
state = STATE_IN_WORD
}
case STATE_IN_WORD:
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
ret = append(ret, buff)
buff = ""
state = STATE_START
} else if c == ';' {
ret = append(ret, buff)
buff = ""
state = STATE_LINE_COMMENT
} else {
buff += string(c)
}
case STATE_IN_QUOTED_STRING:
if c == '"' {
ret = append(ret, buff)
buff = ""
state = STATE_START
} else if c == '\\' {
state = STATE_BACKSLASH
} else {
buff += string(c)
}
case STATE_BACKSLASH:
if c == 'n' {
buff += "\n"
} else if c == 'r' {
buff += "\r"
} else if c == 't' {
buff += "\t"
} else if c == 's' {
buff += " "
} else if c == '\\' {
buff += `\`
} else {
return nil, fmt.Errorf("Unknown backslash escape sequence")
}
state = STATE_IN_QUOTED_STRING
case STATE_LINE_COMMENT:
if c == '\n' {
state = STATE_START
} // ignore all else
default:
panic("impossible state")
}
}
if state == STATE_IN_QUOTED_STRING {
return nil, fmt.Errorf("unterminated string literal")
}
if state == STATE_IN_WORD {
ret = append(ret, buff)
buff = ""
state = STATE_START
}
return ret, nil
}
func (l *lexer) Next() (Token, error) {
if l.peek != nil {
ret := l.peek
l.peek = nil
return ret, nil
}
//
l.lineno++
line, err := l.r.ReadString('\n')
if err != nil {
return nil, err
}
//
fields, err := l.Fields(line)
if err != nil {
return nil, err
}
if len(fields) == 0 {
// This line only contained comments
// Continue to the next line
return l.Next()
}
switch strings.ToLower(fields[0]) {
case "section":
return SectionToken{fields[1]}, nil
case "global":
return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil
case "mov":
for i, _ := range fields {
fields[i] = strings.TrimRight(fields[i], `,`)
}
return MovInstrToken{fields[1:]}, nil
case "syscall":
return SyscallInstrToken{}, nil
case "ret":
return RetInstrToken{}, nil
case "nop":
return NopInstrToken{}, nil
default:
// If the field ends with `:`, it's a (local) label
if strings.HasSuffix(fields[0], `:`) {
return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil
}
// If the field starts with `$`, it's a "variable"
if strings.HasPrefix(fields[0], `$`) {
// 1: =
if fields[1] != `=` {
return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno)
}
// 2: sizeclass
// 3+++: literal initializer
return DataVariableInstrToken{
VarName: fields[0][1:],
Sizeclass: fields[2],
Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined
}, nil
}
}
return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno)
}