Add Lexer

Add a really basic lexer, which only lexes HASH (headings), TEXT (paragraphs), and newlines. These generated tokens will be fed into the parser
author: Aaditya Dhruv <[email protected]> 2025-07-07 18:16:35 -0500
committer: Aaditya Dhruv <[email protected]> 2025-07-07 18:16:35 -0500
commit: 63dbc54db1c33e341f2e843a6c71807e51a4dd7b (patch)
tree: 880ddfea2639360d07430d10403747fa1c9b5823
parent: 348e190427c28d3985ad4c8453eeb2fdee5d4d22 (diff)
3 files changed, 116 insertions, 0 deletions
diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs
new file mode 100644
index 0000000..78bfa3b
--- /dev/null
+++ b/src/parser/lexer.rs
@@ -0,0 +1,112 @@
+use std::{str::Chars, iter::Peekable};
+
+#[derive(Debug, PartialEq)]
+enum TokenType {
+    TEXT,
+    NEWLINE,
+    HASH,
+    BACKTICK,
+}
+
+#[derive(Debug, PartialEq)]
+struct Token {
+    token_type: TokenType,
+    value: String,
+    location: (u8, u8),
+}
+
+
+#[derive(Debug)]
+struct Scanner {
+    tokens: Vec<Token>,
+    input_string: String,
+    position: (u8, u8), //Line, index
+}
+
+impl Scanner {
+    fn new(source: &str) -> Self {
+        let string = source.to_string();
+        Scanner { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) }
+    }
+
+
+    fn scan(&mut self) {
+        let string = self.input_string.clone();
+        let mut chars = string.chars().peekable();
+
+        loop {
+            let character = chars.next();
+            match character {
+                Some(c) => {
+                    self.scan_token(c, &mut chars);
+                },
+                None => { break }
+            }
+        }
+    }
+
+    fn scan_token(&mut self, token: char, iter: &mut Peekable<Chars>) {
+        match token {
+
+            '#' => { 
+                let token = Token { token_type: TokenType::HASH, value: token.to_string(), location: self.position }; 
+                self.position.1 += 1;
+                self.tokens.push(token);
+            },
+
+            '\n' => {  
+                let token = Token { token_type: TokenType::NEWLINE, value: token.to_string(), location: self.position };
+                self.position.0 += 1;
+                self.position.1 = 0;
+                self.tokens.push(token);
+            },
+
+            _ => { 
+                let mut text = String::from(token);
+                let mut position = 1;
+                loop {
+                    let peeked = iter.peek();
+                    match peeked {
+                        Some(peek) => {
+                            if *peek == '\n' {
+                                break;
+                            }
+                            text.push(peek.to_owned());
+                            position += 1;
+                            iter.next();
+                        },
+                        None => {break;}
+                    };
+                };
+                let token = Token { token_type: TokenType::TEXT, value: text, location: self.position };
+                self.position.1 += position;
+                self.tokens.push(token);
+            },
+        }
+    }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn init() {
+        let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long.";
+        let mut scanner = Scanner::new(source);
+        scanner.scan();
+        let tokens = vec![
+            Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) },
+            Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 1) },
+            Token { token_type: TokenType::TEXT, value: " This is a heading".to_string(), location: (0, 2) },
+            Token { token_type: TokenType::NEWLINE, value: "\n".to_string(), location: (0, 20) },
+            Token { token_type: TokenType::TEXT, value: "I am a bunch of paragraph text. I can get pretty long.".to_string(), location: (1, 0) },
+        ];
+        assert_eq!(tokens.len(), scanner.tokens.len());
+        for (index, token) in scanner.tokens.iter().enumerate() {
+            assert_eq!(tokens.get(index).unwrap(), token);
+        }
+    }
+}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 67c567f..da03ab1 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1 +1,2 @@
 pub mod parser;
+pub mod lexer;
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
index 017ec83..53aa751 100644
--- a/src/parser/parser.rs
+++ b/src/parser/parser.rs
@@ -8,3 +8,6 @@ pub struct Heading {
     size: u8,
     value: Text,
 }
+
+pub struct Exp {
+}
author	Aaditya Dhruv <[email protected]>	2025-07-07 18:16:35 -0500
committer	Aaditya Dhruv <[email protected]>	2025-07-07 18:16:35 -0500
commit	63dbc54db1c33e341f2e843a6c71807e51a4dd7b (patch)
tree	880ddfea2639360d07430d10403747fa1c9b5823
parent	348e190427c28d3985ad4c8453eeb2fdee5d4d22 (diff)