package scanner import ( "os" "unicode" ) const EOF rune = 0 type Scanner struct { src []rune pos int } func NewScannerFromFile(filename string) (*Scanner, error) { data, err := os.ReadFile(filename) if err != nil { return nil, err } src := []rune(string(data)) for idx := range src { src[idx] = unicode.ToUpper(src[idx]) } return &Scanner{ src: src, pos: 0, }, nil } func NewScannerFromString(data string) *Scanner { src := []rune(data) for idx := range src { src[idx] = unicode.ToUpper(src[idx]) } return &Scanner{ src: src, pos: 0, } } // get now char, no pos++ func (s *Scanner) peek() rune { if s.pos >= len(s.src) { return EOF } return s.src[s.pos] } // get next char, no pos++ func (s *Scanner) peekNext() rune { if s.pos+1 >= len(s.src) { return EOF } return s.src[s.pos+1] } // get now char, pos++ func (s *Scanner) next() rune { if s.pos >= len(s.src) { return EOF } ch := s.src[s.pos] s.pos++ return ch } func (s *Scanner) skipSpacesAndComments() { for { c := s.peek() n := s.peekNext() if c == EOF { return } if (c == '/' && n == '/') || (c == '-' && n == '-') { for { ch := s.next() if ch == '\n' || ch == '\r' || ch == EOF { break } } continue } if !unicode.IsSpace(c) { break } s.next() } } func (s *Scanner) GetToken() Token { s.skipSpacesAndComments() ch := s.peek() if ch == EOF { return Token{Type: NONTOKEN} } // is number? if unicode.IsDigit(ch) { return s.scanNumber() } // is id / keyword? if unicode.IsLetter(ch) { return s.scanIdentOrKeyword() } // is operator? switch ch { case '+': s.next() return Token{Type: PLUS, Lexeme: "+"} case '-': s.next() return Token{Type: MINUS, Lexeme: "-"} case '*': s.next() if s.peek() == '*' { s.next() return Token{Type: POWER, Lexeme: "**"} } return Token{Type: MUL, Lexeme: "*"} case '/': s.next() return Token{Type: DIV, Lexeme: "/"} case '(': s.next() return Token{Type: L_BRACKET, Lexeme: "("} case ')': s.next() return Token{Type: R_BRACKET, Lexeme: ")"} case ',': s.next() return Token{Type: COMMA, Lexeme: ","} case ';': s.next() return Token{Type: SEMICO, Lexeme: ";"} default: s.next() return Token{Type: ERRTOKEN, Lexeme: string(ch)} } } // digit+("."digit*)? func (s *Scanner) scanNumber() Token { haveDot := false mask := 0.1 token := Token{Type: CONST_ID, Lexeme: "", Value: 0.0} for { ch := s.peek() if unicode.IsDigit(ch) { s.next() if haveDot { token.Value += float64(ch-'0') * mask mask *= 0.1 } else { token.Value *= 10 token.Value += float64(ch - '0') } token.Lexeme += string(ch) continue } if ch == '.' { s.next() if haveDot { return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)} } haveDot = true token.Lexeme += string(ch) continue } if unicode.IsLetter(ch) { s.next() return Token{Type: ERRTOKEN, Lexeme: token.Lexeme + string(ch)} } return token } } // letter(letter|digit)* func (s *Scanner) scanIdentOrKeyword() Token { var lexeme string for { ch := s.peek() if ch == EOF || unicode.IsSpace(ch) { break } if unicode.IsDigit(ch) || unicode.IsLetter(ch) { lexeme += string(ch) s.next() continue } break } token, ok := keywords[lexeme] if ok { return token } else { return Token{Type: ERRTOKEN, Lexeme: lexeme} } }