aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaditya Dhruv <[email protected]>2025-07-07 18:16:35 -0500
committerAaditya Dhruv <[email protected]>2025-07-07 18:16:35 -0500
commit63dbc54db1c33e341f2e843a6c71807e51a4dd7b (patch)
tree880ddfea2639360d07430d10403747fa1c9b5823
parent348e190427c28d3985ad4c8453eeb2fdee5d4d22 (diff)
Add Lexer
Add a really basic lexer, which only lexes HASH (headings), TEXT (paragraphs), and newlines. These generated tokens will be fed into the parser
-rw-r--r--src/parser/lexer.rs112
-rw-r--r--src/parser/mod.rs1
-rw-r--r--src/parser/parser.rs3
3 files changed, 116 insertions, 0 deletions
diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs
new file mode 100644
index 0000000..78bfa3b
--- /dev/null
+++ b/src/parser/lexer.rs
@@ -0,0 +1,112 @@
+use std::{str::Chars, iter::Peekable};
+
+#[derive(Debug, PartialEq)]
+enum TokenType {
+ TEXT,
+ NEWLINE,
+ HASH,
+ BACKTICK,
+}
+
+#[derive(Debug, PartialEq)]
+struct Token {
+ token_type: TokenType,
+ value: String,
+ location: (u8, u8),
+}
+
+
+#[derive(Debug)]
+struct Scanner {
+ tokens: Vec<Token>,
+ input_string: String,
+ position: (u8, u8), //Line, index
+}
+
+impl Scanner {
+ fn new(source: &str) -> Self {
+ let string = source.to_string();
+ Scanner { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) }
+ }
+
+
+ fn scan(&mut self) {
+ let string = self.input_string.clone();
+ let mut chars = string.chars().peekable();
+
+ loop {
+ let character = chars.next();
+ match character {
+ Some(c) => {
+ self.scan_token(c, &mut chars);
+ },
+ None => { break }
+ }
+ }
+ }
+
+ fn scan_token(&mut self, token: char, iter: &mut Peekable<Chars>) {
+ match token {
+
+ '#' => {
+ let token = Token { token_type: TokenType::HASH, value: token.to_string(), location: self.position };
+ self.position.1 += 1;
+ self.tokens.push(token);
+ },
+
+ '\n' => {
+ let token = Token { token_type: TokenType::NEWLINE, value: token.to_string(), location: self.position };
+ self.position.0 += 1;
+ self.position.1 = 0;
+ self.tokens.push(token);
+ },
+
+ _ => {
+ let mut text = String::from(token);
+ let mut position = 1;
+ loop {
+ let peeked = iter.peek();
+ match peeked {
+ Some(peek) => {
+ if *peek == '\n' {
+ break;
+ }
+ text.push(peek.to_owned());
+ position += 1;
+ iter.next();
+ },
+ None => {break;}
+ };
+ };
+ let token = Token { token_type: TokenType::TEXT, value: text, location: self.position };
+ self.position.1 += position;
+ self.tokens.push(token);
+ },
+ }
+ }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn init() {
+ let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long.";
+ let mut scanner = Scanner::new(source);
+ scanner.scan();
+ let tokens = vec![
+ Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) },
+ Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 1) },
+ Token { token_type: TokenType::TEXT, value: " This is a heading".to_string(), location: (0, 2) },
+ Token { token_type: TokenType::NEWLINE, value: "\n".to_string(), location: (0, 20) },
+ Token { token_type: TokenType::TEXT, value: "I am a bunch of paragraph text. I can get pretty long.".to_string(), location: (1, 0) },
+ ];
+ assert_eq!(tokens.len(), scanner.tokens.len());
+ for (index, token) in scanner.tokens.iter().enumerate() {
+ assert_eq!(tokens.get(index).unwrap(), token);
+ }
+ }
+}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 67c567f..da03ab1 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1 +1,2 @@
pub mod parser;
+pub mod lexer;
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
index 017ec83..53aa751 100644
--- a/src/parser/parser.rs
+++ b/src/parser/parser.rs
@@ -8,3 +8,6 @@ pub struct Heading {
size: u8,
value: Text,
}
+
+pub struct Exp {
+}