This repository has been archived on 2026-05-13. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
sfdl/scanner/scanner.go
2025-12-03 09:57:17 +08:00

203 lines
3.3 KiB
Go

package scanner
import (
"os"
"unicode"
)
const EOF rune = 0
type Scanner struct {
src []rune
pos int
}
func NewScannerFromFile(filename string) (*Scanner, error) {
data, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
src := []rune(string(data))
for idx := range src {
src[idx] = unicode.ToUpper(src[idx])
}
return &Scanner{
src: src,
pos: 0,
}, nil
}
func NewScannerFromString(data string) *Scanner {
src := []rune(data)
for idx := range src {
src[idx] = unicode.ToUpper(src[idx])
}
return &Scanner{
src: src,
pos: 0,
}
}
// get now char, no pos++
func (s *Scanner) peek() rune {
if s.pos >= len(s.src) {
return EOF
}
return s.src[s.pos]
}
// get next char, no pos++
func (s *Scanner) peekNext() rune {
if s.pos+1 >= len(s.src) {
return EOF
}
return s.src[s.pos+1]
}
// get now char, pos++
func (s *Scanner) next() rune {
if s.pos >= len(s.src) {
return EOF
}
ch := s.src[s.pos]
s.pos++
return ch
}
func (s *Scanner) skipSpacesAndComments() {
for {
c := s.peek()
n := s.peekNext()
if c == EOF {
return
}
if (c == '/' && n == '/') || (c == '-' && n == '-') {
for {
ch := s.next()
if ch == '\n' || ch == '\r' || ch == EOF {
break
}
}
continue
}
if !unicode.IsSpace(c) {
break
}
s.next()
}
}
func (s *Scanner) GetToken() Token {
s.skipSpacesAndComments()
ch := s.peek()
if ch == EOF {
return Token{Type: NONTOKEN}
}
// is number?
if unicode.IsDigit(ch) {
return s.scanNumber()
}
// is id / keyword?
if unicode.IsLetter(ch) {
return s.scanIdentOrKeyword()
}
// is operator?
switch ch {
case '+':
s.next()
return Token{Type: PLUS, Lexeme: "+"}
case '-':
s.next()
return Token{Type: MINUS, Lexeme: "-"}
case '*':
s.next()
if s.peek() == '*' {
s.next()
return Token{Type: POWER, Lexeme: "**"}
}
return Token{Type: MUL, Lexeme: "*"}
case '/':
s.next()
return Token{Type: DIV, Lexeme: "/"}
case '(':
s.next()
return Token{Type: L_BRACKET, Lexeme: "("}
case ')':
s.next()
return Token{Type: R_BRACKET, Lexeme: ")"}
case ',':
s.next()
return Token{Type: COMMA, Lexeme: ","}
case ';':
s.next()
return Token{Type: SEMICO, Lexeme: ";"}
default:
s.next()
return Token{Type: ERRTOKEN, Lexeme: string(ch)}
}
}
// digit+("."digit*)?
func (s *Scanner) scanNumber() Token {
haveDot := false
mask := 0.1
token := Token{Type: CONST_ID, Lexeme: "", Value: 0.0}
for {
ch := s.peek()
if unicode.IsDigit(ch) {
s.next()
if haveDot {
token.Value += float64(ch-'0') * mask
mask *= 0.1
} else {
token.Value *= 10
token.Value += float64(ch - '0')
}
token.Lexeme += string(ch)
continue
}
if ch == '.' {
s.next()
if haveDot {
return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)}
}
haveDot = true
token.Lexeme += string(ch)
continue
}
if unicode.IsLetter(ch) {
s.next()
return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)}
}
return token
}
}
// letter(letter|digit)*
func (s *Scanner) scanIdentOrKeyword() Token {
var lexeme string
for {
ch := s.peek()
if ch == EOF || unicode.IsSpace(ch) {
break
}
if unicode.IsDigit(ch) || unicode.IsLetter(ch) {
lexeme += string(ch)
s.next()
continue
}
break
}
token, ok := keywords[lexeme]
if ok {
return token
} else {
return Token{Type: ERRTOKEN, Lexeme: lexeme}
}
}