aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaditya Dhruv <[email protected]>2025-08-10 22:10:11 -0500
committerAaditya Dhruv <[email protected]>2025-08-10 22:10:11 -0500
commitd862b84ce868353e3482b380d0b68495de12eb6b (patch)
treed114c4b44460b9d1eaf689ea2ffed10c21943ecc
parent63dbc54db1c33e341f2e843a6c71807e51a4dd7b (diff)
Add Parser support
- Parser is able to take Lexer tokens and generate AST with Text (Paragraph) and Headings - Scanner has been renamed to Lexer - main.rs has been updated to use the Lexer and Parser
-rw-r--r--out.html7
-rw-r--r--src/main.rs19
-rw-r--r--src/parser/lexer.rs26
-rw-r--r--src/parser/parser.rs151
-rw-r--r--src/types/elements.rs13
5 files changed, 189 insertions, 27 deletions
diff --git a/out.html b/out.html
index 6c70bcf..eb22d77 100644
--- a/out.html
+++ b/out.html
@@ -1 +1,6 @@
-<html></html> \ No newline at end of file
+<html>
+<h2> Title</h2>
+<p>Text of a paragraph</p>
+<h5> Second heading</h5>
+<p>Some more text</p>
+</html> \ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index 5d2610d..3ca140c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,11 +3,28 @@ mod parser;
use std::fs::File;
use std::io::prelude::*;
+use parser::lexer::Lexer;
+use parser::parser::{Parser, AST};
+
use crate::types::elements;
use crate::types::elements::Renderable;
fn main() {
- let doc = elements::HTML::new();
+ // let doc = elements::HTML::new();
+ // let doc = doc.render();
+ // let mut file = File::create("out.html").unwrap();
+ // file.write_all(doc.as_bytes()).unwrap();
+ let md = "## Title\nText of a paragraph\n\n##### Second heading\nSome more text";
+ let mut lexer = Lexer::new(md);
+ lexer.scan();
+ let mut parser = Parser::new(lexer);
+ parser.parse();
+ let root = parser.tree;
+ let mut doc = elements::HTML::new();
+ for child in root.children {
+ let ast = child.item;
+ doc.items.push(ast.convert_to_renderable());
+ }
let doc = doc.render();
let mut file = File::create("out.html").unwrap();
file.write_all(doc.as_bytes()).unwrap();
diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs
index 78bfa3b..125470d 100644
--- a/src/parser/lexer.rs
+++ b/src/parser/lexer.rs
@@ -1,36 +1,36 @@
use std::{str::Chars, iter::Peekable};
-#[derive(Debug, PartialEq)]
-enum TokenType {
+#[derive(Debug, PartialEq, Clone)]
+pub enum TokenType {
TEXT,
NEWLINE,
HASH,
BACKTICK,
}
-#[derive(Debug, PartialEq)]
-struct Token {
- token_type: TokenType,
- value: String,
+#[derive(Debug, PartialEq, Clone)]
+pub struct Token {
+ pub token_type: TokenType,
+ pub value: String,
location: (u8, u8),
}
#[derive(Debug)]
-struct Scanner {
- tokens: Vec<Token>,
+pub struct Lexer {
+ pub tokens: Vec<Token>,
input_string: String,
position: (u8, u8), //Line, index
}
-impl Scanner {
- fn new(source: &str) -> Self {
+impl Lexer {
+ pub fn new(source: &str) -> Self {
let string = source.to_string();
- Scanner { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) }
+ Lexer { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) }
}
- fn scan(&mut self) {
+ pub fn scan(&mut self) {
let string = self.input_string.clone();
let mut chars = string.chars().peekable();
@@ -95,7 +95,7 @@ mod tests {
#[test]
fn init() {
let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long.";
- let mut scanner = Scanner::new(source);
+ let mut scanner = Lexer::new(source);
scanner.scan();
let tokens = vec![
Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) },
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
index 53aa751..3ac6e82 100644
--- a/src/parser/parser.rs
+++ b/src/parser/parser.rs
@@ -1,13 +1,152 @@
-pub struct Parser {
+use std::iter::Peekable;
+
+use super::lexer::{TokenType, Token, Lexer};
+use crate::types::elements;
+
+struct Text {
+ token: Token,
+ text: String,
+}
+impl Into<elements::Paragraph> for &Text {
+ fn into(self) -> elements::Paragraph {
+ elements::Paragraph::new(self.text.clone())
+ }
+}
+
+struct Heading {
+ level: u8,
+ text: Text,
+}
+impl Into<elements::Heading> for &Heading {
+ fn into(self) -> elements::Heading {
+ elements::Heading::new(self.text.text.clone(), self.level)
+ }
}
-pub struct Text {
- value: String,
+struct Noop {}
+
+pub trait AST {
+ fn convert_to_renderable(&self) -> Box<dyn elements::Renderable>;
+}
+
+impl AST for Text {
+ fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> {
+ let text: elements::Paragraph = self.into();
+ return Box::new(text);
+ }
+}
+impl AST for Heading {
+ fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> {
+ let heading: elements::Heading = self.into();
+ return Box::new(heading);
+ }
}
-pub struct Heading {
- size: u8,
- value: Text,
+impl AST for Noop {
+ fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> {
+ let blank = elements::Paragraph::new(String::new());
+ return Box::new(blank);
+ }
}
pub struct Exp {
+ pub item: Box<dyn AST>,
+}
+
+pub struct Node {
+ pub children: Vec<Exp>,
+}
+
+pub struct Parser {
+ lexer: Lexer,
+ tokens: Peekable<std::vec::IntoIter<Token>>,
+ pub tree: Node,
+}
+
+
+impl Parser {
+ pub fn new(lexer: Lexer) -> Self {
+ let root = Node { children: vec![] };
+ let input_lexer = lexer;
+ let input_tokens = input_lexer.tokens.clone().into_iter().peekable();
+ Parser { lexer: input_lexer, tokens: input_tokens, tree: root }
+ }
+
+ /* Parse a Text block
+ * TEXT
+ */
+ fn text(&mut self) -> Text {
+ match &self.tokens.next() {
+ Some(token) => {
+ if token.token_type == TokenType::TEXT {
+ return Text { token: token.clone(), text: token.value.clone() }
+ }
+ panic!("Invalid expression for text!");
+ },
+ None => { panic!("Invalid expression for text!") }
+ }
+ }
+
+ /* Parse a Heading
+ * HASH heading | HASH text
+ */
+ fn heading(&mut self) -> Heading {
+ let mut heading_size = 0;
+ while self.tokens.peek().unwrap().token_type == TokenType::HASH {
+ heading_size += 1;
+ self.tokens.next();
+ };
+ let heading_text = self.text();
+ return Heading { level: heading_size, text: heading_text }
+ }
+
+ /* exp
+ * text | heading
+ */
+ fn exp(&mut self) -> Exp {
+ let token = self.tokens.peek();
+ if token.is_none() {
+ return Exp {item: Box::new(Noop{}) };
+ }
+ let token = token.unwrap().clone();
+ if token.token_type == TokenType::TEXT {
+ let tree = self.text();
+ return Exp { item: Box::new(tree) };
+ }
+
+ if token.token_type == TokenType::HASH {
+ let tree = self.heading();
+ return Exp { item: Box::new(tree) };
+ }
+
+ panic!("Invalid Exp type!")
+
+ }
+
+ /* Node
+ * exp | exp NEWLINE node
+ */
+ fn node(&mut self) -> Node {
+ let mut node = Node { children: vec![] };
+ let exp = self.exp();
+ node.children.push(exp);
+
+ loop {
+ match self.tokens.peek() {
+ Some(token) => {
+ if token.token_type == TokenType::NEWLINE {
+ self.tokens.next();
+ continue
+ } else {
+ node.children.push(self.exp());
+ }
+ },
+ None => { break }
+ }
+ }
+ return node;
+ }
+
+ pub fn parse(&mut self) {
+ self.tree = self.node();
+ }
}
diff --git a/src/types/elements.rs b/src/types/elements.rs
index 0f766e4..3e81b9a 100644
--- a/src/types/elements.rs
+++ b/src/types/elements.rs
@@ -35,8 +35,8 @@ pub struct Paragraph {
}
impl Paragraph {
- pub fn new() -> Self {
- Paragraph { text: String::new() }
+ pub fn new(text: String) -> Self {
+ Paragraph { text: text }
}
}
impl Renderable for Paragraph {
@@ -80,8 +80,8 @@ pub struct Heading {
}
impl Heading {
- pub fn new() -> Self {
- Heading { text: String::new(), level: 1 }
+ pub fn new(text: String, level: u8) -> Self {
+ Heading { text: text, level: level }
}
}
impl Renderable for Heading {
@@ -99,7 +99,7 @@ impl Renderable for Heading {
// Heading
pub struct HTML {
- items: Vec<Box<dyn Renderable>>,
+ pub items: Vec<Box<dyn Renderable>>,
}
impl HTML {
@@ -111,13 +111,14 @@ impl HTML {
impl Renderable for HTML {
fn render(&self) -> String {
let mut master = String::new();
- let start_tag = "<html>";
+ let start_tag = "<html>\n";
let end_tag = "</html>";
master.push_str(&start_tag);
for child in &self.items {
let text = child.render();
master.push_str(text.as_str());
+ master.push_str("\n");
}
master.push_str(&end_tag);
master