203 lines
3.3 KiB
Go
203 lines
3.3 KiB
Go
package scanner
|
|
|
|
import (
|
|
"os"
|
|
"unicode"
|
|
)
|
|
|
|
const EOF rune = 0
|
|
|
|
type Scanner struct {
|
|
src []rune
|
|
pos int
|
|
}
|
|
|
|
func NewScannerFromFile(filename string) (*Scanner, error) {
|
|
data, err := os.ReadFile(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
src := []rune(string(data))
|
|
for idx := range src {
|
|
src[idx] = unicode.ToUpper(src[idx])
|
|
}
|
|
return &Scanner{
|
|
src: src,
|
|
pos: 0,
|
|
}, nil
|
|
}
|
|
|
|
func NewScannerFromString(data string) *Scanner {
|
|
src := []rune(data)
|
|
for idx := range src {
|
|
src[idx] = unicode.ToUpper(src[idx])
|
|
}
|
|
return &Scanner{
|
|
src: src,
|
|
pos: 0,
|
|
}
|
|
}
|
|
|
|
// get now char, no pos++
|
|
func (s *Scanner) peek() rune {
|
|
if s.pos >= len(s.src) {
|
|
return EOF
|
|
}
|
|
return s.src[s.pos]
|
|
}
|
|
|
|
// get next char, no pos++
|
|
func (s *Scanner) peekNext() rune {
|
|
if s.pos+1 >= len(s.src) {
|
|
return EOF
|
|
}
|
|
return s.src[s.pos+1]
|
|
}
|
|
|
|
// get now char, pos++
|
|
func (s *Scanner) next() rune {
|
|
if s.pos >= len(s.src) {
|
|
return EOF
|
|
}
|
|
ch := s.src[s.pos]
|
|
s.pos++
|
|
return ch
|
|
}
|
|
|
|
func (s *Scanner) skipSpacesAndComments() {
|
|
for {
|
|
c := s.peek()
|
|
n := s.peekNext()
|
|
if c == EOF {
|
|
return
|
|
}
|
|
if (c == '/' && n == '/') || (c == '-' && n == '-') {
|
|
for {
|
|
ch := s.next()
|
|
if ch == '\n' || ch == '\r' || ch == EOF {
|
|
break
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
if !unicode.IsSpace(c) {
|
|
break
|
|
}
|
|
s.next()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) GetToken() Token {
|
|
s.skipSpacesAndComments()
|
|
ch := s.peek()
|
|
if ch == EOF {
|
|
return Token{Type: NONTOKEN}
|
|
}
|
|
|
|
// is number?
|
|
if unicode.IsDigit(ch) {
|
|
return s.scanNumber()
|
|
}
|
|
|
|
// is id / keyword?
|
|
if unicode.IsLetter(ch) {
|
|
return s.scanIdentOrKeyword()
|
|
}
|
|
|
|
// is operator?
|
|
switch ch {
|
|
case '+':
|
|
s.next()
|
|
return Token{Type: PLUS, Lexeme: "+"}
|
|
case '-':
|
|
s.next()
|
|
return Token{Type: MINUS, Lexeme: "-"}
|
|
case '*':
|
|
s.next()
|
|
if s.peek() == '*' {
|
|
s.next()
|
|
return Token{Type: POWER, Lexeme: "**"}
|
|
}
|
|
return Token{Type: MUL, Lexeme: "*"}
|
|
case '/':
|
|
s.next()
|
|
return Token{Type: DIV, Lexeme: "/"}
|
|
case '(':
|
|
s.next()
|
|
return Token{Type: L_BRACKET, Lexeme: "("}
|
|
case ')':
|
|
s.next()
|
|
return Token{Type: R_BRACKET, Lexeme: ")"}
|
|
case ',':
|
|
s.next()
|
|
return Token{Type: COMMA, Lexeme: ","}
|
|
case ';':
|
|
s.next()
|
|
return Token{Type: SEMICO, Lexeme: ";"}
|
|
default:
|
|
s.next()
|
|
return Token{Type: ERRTOKEN, Lexeme: string(ch)}
|
|
}
|
|
}
|
|
|
|
// digit+("."digit*)?
|
|
func (s *Scanner) scanNumber() Token {
|
|
haveDot := false
|
|
mask := 0.1
|
|
token := Token{Type: CONST_ID, Lexeme: "", Value: 0.0}
|
|
for {
|
|
ch := s.peek()
|
|
if unicode.IsDigit(ch) {
|
|
s.next()
|
|
if haveDot {
|
|
token.Value += float64(ch-'0') * mask
|
|
mask *= 0.1
|
|
} else {
|
|
token.Value *= 10
|
|
token.Value += float64(ch - '0')
|
|
}
|
|
token.Lexeme += string(ch)
|
|
continue
|
|
}
|
|
|
|
if ch == '.' {
|
|
s.next()
|
|
if haveDot {
|
|
return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)}
|
|
}
|
|
haveDot = true
|
|
token.Lexeme += string(ch)
|
|
continue
|
|
}
|
|
|
|
if unicode.IsLetter(ch) {
|
|
s.next()
|
|
return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)}
|
|
}
|
|
return token
|
|
}
|
|
}
|
|
|
|
// letter(letter|digit)*
|
|
func (s *Scanner) scanIdentOrKeyword() Token {
|
|
var lexeme string
|
|
for {
|
|
ch := s.peek()
|
|
if ch == EOF || unicode.IsSpace(ch) {
|
|
break
|
|
}
|
|
if unicode.IsDigit(ch) || unicode.IsLetter(ch) {
|
|
lexeme += string(ch)
|
|
s.next()
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
token, ok := keywords[lexeme]
|
|
if ok {
|
|
return token
|
|
} else {
|
|
return Token{Type: ERRTOKEN, Lexeme: lexeme}
|
|
}
|
|
}
|