diff options
author | Aaditya Dhruv <[email protected]> | 2025-08-10 22:10:11 -0500 |
---|---|---|
committer | Aaditya Dhruv <[email protected]> | 2025-08-10 22:10:11 -0500 |
commit | d862b84ce868353e3482b380d0b68495de12eb6b (patch) | |
tree | d114c4b44460b9d1eaf689ea2ffed10c21943ecc | |
parent | 63dbc54db1c33e341f2e843a6c71807e51a4dd7b (diff) |
Add Parser support
- Parser is able to take Lexer tokens and generate AST with Text
(Paragraph) and Headings
- Scanner has been renamed to Lexer
- main.rs has been updated to use the Lexer and Parser
-rw-r--r-- | out.html | 7 | ||||
-rw-r--r-- | src/main.rs | 19 | ||||
-rw-r--r-- | src/parser/lexer.rs | 26 | ||||
-rw-r--r-- | src/parser/parser.rs | 151 | ||||
-rw-r--r-- | src/types/elements.rs | 13 |
5 files changed, 189 insertions, 27 deletions
@@ -1 +1,6 @@ -<html></html>
\ No newline at end of file +<html> +<h2> Title</h2> +<p>Text of a paragraph</p> +<h5> Second heading</h5> +<p>Some more text</p> +</html>
\ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 5d2610d..3ca140c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,11 +3,28 @@ mod parser; use std::fs::File; use std::io::prelude::*; +use parser::lexer::Lexer; +use parser::parser::{Parser, AST}; + use crate::types::elements; use crate::types::elements::Renderable; fn main() { - let doc = elements::HTML::new(); + // let doc = elements::HTML::new(); + // let doc = doc.render(); + // let mut file = File::create("out.html").unwrap(); + // file.write_all(doc.as_bytes()).unwrap(); + let md = "## Title\nText of a paragraph\n\n##### Second heading\nSome more text"; + let mut lexer = Lexer::new(md); + lexer.scan(); + let mut parser = Parser::new(lexer); + parser.parse(); + let root = parser.tree; + let mut doc = elements::HTML::new(); + for child in root.children { + let ast = child.item; + doc.items.push(ast.convert_to_renderable()); + } let doc = doc.render(); let mut file = File::create("out.html").unwrap(); file.write_all(doc.as_bytes()).unwrap(); diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index 78bfa3b..125470d 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -1,36 +1,36 @@ use std::{str::Chars, iter::Peekable}; -#[derive(Debug, PartialEq)] -enum TokenType { +#[derive(Debug, PartialEq, Clone)] +pub enum TokenType { TEXT, NEWLINE, HASH, BACKTICK, } -#[derive(Debug, PartialEq)] -struct Token { - token_type: TokenType, - value: String, +#[derive(Debug, PartialEq, Clone)] +pub struct Token { + pub token_type: TokenType, + pub value: String, location: (u8, u8), } #[derive(Debug)] -struct Scanner { - tokens: Vec<Token>, +pub struct Lexer { + pub tokens: Vec<Token>, input_string: String, position: (u8, u8), //Line, index } -impl Scanner { - fn new(source: &str) -> Self { +impl Lexer { + pub fn new(source: &str) -> Self { let string = source.to_string(); - Scanner { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) } + Lexer { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) } } - fn scan(&mut self) { + pub fn scan(&mut self) { let string = self.input_string.clone(); let mut chars = string.chars().peekable(); @@ -95,7 +95,7 @@ mod tests { #[test] fn init() { let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long."; - let mut scanner = Scanner::new(source); + let mut scanner = Lexer::new(source); scanner.scan(); let tokens = vec![ Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) }, diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 53aa751..3ac6e82 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,13 +1,152 @@ -pub struct Parser { +use std::iter::Peekable; + +use super::lexer::{TokenType, Token, Lexer}; +use crate::types::elements; + +struct Text { + token: Token, + text: String, +} +impl Into<elements::Paragraph> for &Text { + fn into(self) -> elements::Paragraph { + elements::Paragraph::new(self.text.clone()) + } +} + +struct Heading { + level: u8, + text: Text, +} +impl Into<elements::Heading> for &Heading { + fn into(self) -> elements::Heading { + elements::Heading::new(self.text.text.clone(), self.level) + } } -pub struct Text { - value: String, +struct Noop {} + +pub trait AST { + fn convert_to_renderable(&self) -> Box<dyn elements::Renderable>; +} + +impl AST for Text { + fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> { + let text: elements::Paragraph = self.into(); + return Box::new(text); + } +} +impl AST for Heading { + fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> { + let heading: elements::Heading = self.into(); + return Box::new(heading); + } } -pub struct Heading { - size: u8, - value: Text, +impl AST for Noop { + fn convert_to_renderable(&self) -> Box<dyn elements::Renderable> { + let blank = elements::Paragraph::new(String::new()); + return Box::new(blank); + } } pub struct Exp { + pub item: Box<dyn AST>, +} + +pub struct Node { + pub children: Vec<Exp>, +} + +pub struct Parser { + lexer: Lexer, + tokens: Peekable<std::vec::IntoIter<Token>>, + pub tree: Node, +} + + +impl Parser { + pub fn new(lexer: Lexer) -> Self { + let root = Node { children: vec![] }; + let input_lexer = lexer; + let input_tokens = input_lexer.tokens.clone().into_iter().peekable(); + Parser { lexer: input_lexer, tokens: input_tokens, tree: root } + } + + /* Parse a Text block + * TEXT + */ + fn text(&mut self) -> Text { + match &self.tokens.next() { + Some(token) => { + if token.token_type == TokenType::TEXT { + return Text { token: token.clone(), text: token.value.clone() } + } + panic!("Invalid expression for text!"); + }, + None => { panic!("Invalid expression for text!") } + } + } + + /* Parse a Heading + * HASH heading | HASH text + */ + fn heading(&mut self) -> Heading { + let mut heading_size = 0; + while self.tokens.peek().unwrap().token_type == TokenType::HASH { + heading_size += 1; + self.tokens.next(); + }; + let heading_text = self.text(); + return Heading { level: heading_size, text: heading_text } + } + + /* exp + * text | heading + */ + fn exp(&mut self) -> Exp { + let token = self.tokens.peek(); + if token.is_none() { + return Exp {item: Box::new(Noop{}) }; + } + let token = token.unwrap().clone(); + if token.token_type == TokenType::TEXT { + let tree = self.text(); + return Exp { item: Box::new(tree) }; + } + + if token.token_type == TokenType::HASH { + let tree = self.heading(); + return Exp { item: Box::new(tree) }; + } + + panic!("Invalid Exp type!") + + } + + /* Node + * exp | exp NEWLINE node + */ + fn node(&mut self) -> Node { + let mut node = Node { children: vec![] }; + let exp = self.exp(); + node.children.push(exp); + + loop { + match self.tokens.peek() { + Some(token) => { + if token.token_type == TokenType::NEWLINE { + self.tokens.next(); + continue + } else { + node.children.push(self.exp()); + } + }, + None => { break } + } + } + return node; + } + + pub fn parse(&mut self) { + self.tree = self.node(); + } } diff --git a/src/types/elements.rs b/src/types/elements.rs index 0f766e4..3e81b9a 100644 --- a/src/types/elements.rs +++ b/src/types/elements.rs @@ -35,8 +35,8 @@ pub struct Paragraph { } impl Paragraph { - pub fn new() -> Self { - Paragraph { text: String::new() } + pub fn new(text: String) -> Self { + Paragraph { text: text } } } impl Renderable for Paragraph { @@ -80,8 +80,8 @@ pub struct Heading { } impl Heading { - pub fn new() -> Self { - Heading { text: String::new(), level: 1 } + pub fn new(text: String, level: u8) -> Self { + Heading { text: text, level: level } } } impl Renderable for Heading { @@ -99,7 +99,7 @@ impl Renderable for Heading { // Heading pub struct HTML { - items: Vec<Box<dyn Renderable>>, + pub items: Vec<Box<dyn Renderable>>, } impl HTML { @@ -111,13 +111,14 @@ impl HTML { impl Renderable for HTML { fn render(&self) -> String { let mut master = String::new(); - let start_tag = "<html>"; + let start_tag = "<html>\n"; let end_tag = "</html>"; master.push_str(&start_tag); for child in &self.items { let text = child.render(); master.push_str(text.as_str()); + master.push_str("\n"); } master.push_str(&end_tag); master |