aboutsummaryrefslogtreecommitdiff
path: root/src/parser/lexer.rs
blob: 125470d15537e26b110961fb1be83e0878b6861c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use std::{str::Chars, iter::Peekable};

#[derive(Debug, PartialEq, Clone)]
pub enum TokenType {
    TEXT,
    NEWLINE,
    HASH,
    BACKTICK,
}

#[derive(Debug, PartialEq, Clone)]
pub struct Token {
    pub token_type: TokenType,
    pub value: String,
    location: (u8, u8),
}


#[derive(Debug)]
pub struct Lexer {
    pub tokens: Vec<Token>,
    input_string: String,
    position: (u8, u8), //Line, index
}

impl Lexer {
    pub fn new(source: &str) -> Self {
        let string = source.to_string();
        Lexer { input_string: string.clone(), tokens: Vec::new(), position: (0, 0) }
    }


    pub fn scan(&mut self) {
        let string = self.input_string.clone();
        let mut chars = string.chars().peekable();

        loop {
            let character = chars.next();
            match character {
                Some(c) => {
                    self.scan_token(c, &mut chars);
                },
                None => { break }
            }
        }
    }

    fn scan_token(&mut self, token: char, iter: &mut Peekable<Chars>) {
        match token {

            '#' => { 
                let token = Token { token_type: TokenType::HASH, value: token.to_string(), location: self.position }; 
                self.position.1 += 1;
                self.tokens.push(token);
            },

            '\n' => {  
                let token = Token { token_type: TokenType::NEWLINE, value: token.to_string(), location: self.position };
                self.position.0 += 1;
                self.position.1 = 0;
                self.tokens.push(token);
            },

            _ => { 
                let mut text = String::from(token);
                let mut position = 1;
                loop {
                    let peeked = iter.peek();
                    match peeked {
                        Some(peek) => {
                            if *peek == '\n' {
                                break;
                            }
                            text.push(peek.to_owned());
                            position += 1;
                            iter.next();
                        },
                        None => {break;}
                    };
                };
                let token = Token { token_type: TokenType::TEXT, value: text, location: self.position };
                self.position.1 += position;
                self.tokens.push(token);
            },
        }
    }
}



#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn init() {
        let source = "## This is a heading\nI am a bunch of paragraph text. I can get pretty long.";
        let mut scanner = Lexer::new(source);
        scanner.scan();
        let tokens = vec![
            Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 0) },
            Token { token_type: TokenType::HASH, value: "#".to_string(), location: (0, 1) },
            Token { token_type: TokenType::TEXT, value: " This is a heading".to_string(), location: (0, 2) },
            Token { token_type: TokenType::NEWLINE, value: "\n".to_string(), location: (0, 20) },
            Token { token_type: TokenType::TEXT, value: "I am a bunch of paragraph text. I can get pretty long.".to_string(), location: (1, 0) },
        ];
        assert_eq!(tokens.len(), scanner.tokens.len());
        for (index, token) in scanner.tokens.iter().enumerate() {
            assert_eq!(tokens.get(index).unwrap(), token);
        }
    }
}