diff options
author | Aaditya Dhruv <[email protected]> | 2025-07-07 18:16:35 -0500 |
---|---|---|
committer | Aaditya Dhruv <[email protected]> | 2025-07-07 18:16:35 -0500 |
commit | 63dbc54db1c33e341f2e843a6c71807e51a4dd7b (patch) | |
tree | 880ddfea2639360d07430d10403747fa1c9b5823 | |
parent | 348e190427c28d3985ad4c8453eeb2fdee5d4d22 (diff) |
Add Lexer
Add a really basic lexer, which only lexes HASH (headings), TEXT
(paragraphs), and newlines. These generated tokens will be fed into the
parser
-rw-r--r-- | src/parser/lexer.rs | 112 | ||||
-rw-r--r-- | src/parser/mod.rs | 1 | ||||
-rw-r--r-- | src/parser/parser.rs | 3 |
3 files changed, 116 insertions, 0 deletions
diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs new file mode 100644 index 0000000..78bfa3b --- /dev/null +++ b/src/parser/lexer.rs @@ -0,0 +1,112 @@ +use std::{str::Chars, iter::Peekable}; + +#[derive(Debug, PartialEq)] +enum TokenType { + TEXT, + NEWLINE, + HASH, + BACKTICK, +} + +#[derive(Debug, PartialEq)] +struct Token { + token_type: TokenType, + value: String, + location: (u8, u8), +} + + +#[derive(Debug)] +struct Scanner { + tokens: Vec<Token>, + input_string: String, + position: (u8, u8), //Line, index +} + +impl Scanner { + fn new(source: &str) -> Self { + let string = source.to_string(); + Scanner { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) } + } + + + fn scan(&mut self) { + let string = self.input_string.clone(); + let mut chars = string.chars().peekable(); + + loop { + let character = chars.next(); + match character { + Some(c) => { + self.scan_token(c, &mut chars); + }, + None => { break } + } + } + } + + fn scan_token(&mut self, token: char, iter: &mut Peekable<Chars>) { + match token { + + '#' => { + let token = Token { token_type: TokenType::HASH, value: token.to_string(), location: self.position }; + self.position.1 += 1; + self.tokens.push(token); + }, + + '\n' => { + let token = Token { token_type: TokenType::NEWLINE, value: token.to_string(), location: self.position }; + self.position.0 += 1; + self.position.1 = 0; + self.tokens.push(token); + }, + + _ => { + let mut text = String::from(token); + let mut position = 1; + loop { + let peeked = iter.peek(); + match peeked { + Some(peek) => { + if *peek == '\n' { + break; + } + text.push(peek.to_owned()); + position += 1; + iter.next(); + }, + None => {break;} + }; + }; + let token = Token { token_type: TokenType::TEXT, value: text, location: self.position }; + self.position.1 += position; + self.tokens.push(token); + }, + } + } +} + + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn init() { + let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long."; + let mut scanner = Scanner::new(source); + scanner.scan(); + let tokens = vec![ + Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) }, + Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 1) }, + Token { token_type: TokenType::TEXT, value: " This is a heading".to_string(), location: (0, 2) }, + Token { token_type: TokenType::NEWLINE, value: "\n".to_string(), location: (0, 20) }, + Token { token_type: TokenType::TEXT, value: "I am a bunch of paragraph text. I can get pretty long.".to_string(), location: (1, 0) }, + ]; + assert_eq!(tokens.len(), scanner.tokens.len()); + for (index, token) in scanner.tokens.iter().enumerate() { + assert_eq!(tokens.get(index).unwrap(), token); + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67c567f..da03ab1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1 +1,2 @@ pub mod parser; +pub mod lexer; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 017ec83..53aa751 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -8,3 +8,6 @@ pub struct Heading { size: u8, value: Text, } + +pub struct Exp { +} |