package main import ( "bufio" "fmt" "io" "strings" ) type lexer struct { r *bufio.Reader lineno int peek Token } func NewLexer(src io.Reader) *lexer { return &lexer{ r: bufio.NewReader(src), lineno: 0, } } func (l *lexer) Peek() (Token, error) { tok, err := l.Next() if err != nil { return nil, err } l.peek = tok return tok, nil } func (l *lexer) Fields(line string) ([]string, error) { const ( STATE_START = 0 STATE_IN_WORD = 1 STATE_IN_QUOTED_STRING = 3 STATE_BACKSLASH = 4 STATE_LINE_COMMENT = 5 ) var ( ret = []string{} state = STATE_START buff = "" ) for _, c := range line { switch state { case STATE_START: if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' { continue } else if c == ';' { state = STATE_LINE_COMMENT } else if c == '"' { state = STATE_IN_QUOTED_STRING } else { buff = string(c) state = STATE_IN_WORD } case STATE_IN_WORD: if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' { ret = append(ret, buff) buff = "" state = STATE_START } else if c == ';' { ret = append(ret, buff) buff = "" state = STATE_LINE_COMMENT } else { buff += string(c) } case STATE_IN_QUOTED_STRING: if c == '"' { ret = append(ret, buff) buff = "" state = STATE_START } else if c == '\\' { state = STATE_BACKSLASH } else { buff += string(c) } case STATE_BACKSLASH: if c == 'n' { buff += "\n" } else if c == 'r' { buff += "\r" } else if c == 't' { buff += "\t" } else if c == 's' { buff += " " } else if c == '\\' { buff += `\` } else { return nil, fmt.Errorf("Unknown backslash escape sequence") } state = STATE_IN_QUOTED_STRING case STATE_LINE_COMMENT: if c == '\n' { state = STATE_START } // ignore all else default: panic("impossible state") } } if state == STATE_IN_QUOTED_STRING { return nil, fmt.Errorf("unterminated string literal") } if state == STATE_IN_WORD { ret = append(ret, buff) buff = "" state = STATE_START } return ret, nil } func (l *lexer) Next() (Token, error) { if l.peek != nil { ret := l.peek l.peek = nil return ret, nil } // l.lineno++ line, err := l.r.ReadString('\n') if err != nil { return nil, err } // fields, err := l.Fields(line) if err != nil { return nil, err } if len(fields) == 0 { // This line only contained comments // Continue to the next line return l.Next() } switch strings.ToLower(fields[0]) { case "section": return SectionToken{fields[1]}, nil case "global": return LabelToken{strings.TrimRight(fields[1], `:`), true}, nil case "mov": for i, _ := range fields { fields[i] = strings.TrimRight(fields[i], `,`) } return MovInstrToken{fields[1:]}, nil case "syscall": return SyscallInstrToken{}, nil case "ret": return RetInstrToken{}, nil case "nop": return NopInstrToken{}, nil default: // If the field ends with `:`, it's a (local) label if strings.HasSuffix(fields[0], `:`) { return LabelToken{strings.TrimRight(fields[0], `:`), false}, nil } // If the field starts with `$`, it's a "variable" if strings.HasPrefix(fields[0], `$`) { // 1: = if fields[1] != `=` { return nil, fmt.Errorf("Invalid syntax at line %d (expected = in declaration)", l.lineno) } // 2: sizeclass // 3+++: literal initializer return DataVariableInstrToken{ VarName: fields[0][1:], Sizeclass: fields[2], Value: strings.Join(fields[3:], " "), // FIXME consecutive spaces are ruined }, nil } } return nil, fmt.Errorf("Unknown token '%s' at line %d", fields[0], l.lineno) }