From 83cc98907e67db3dfbb7240bb150c7a0129835b4 Mon Sep 17 00:00:00 2001
From: benoitlx <benoitleroux41@gmail.com>
Date: Tue, 28 Jan 2025 20:45:34 +0100
Subject: [PATCH 1/5] wip: error-handling for tokenizer

---
 Cargo.toml     |  3 ++-
 src/main.rs    | 53 +++++++++++++++++++++++++++++++++++++++++---------
 tests/test.asm | 10 ++++++++++
 3 files changed, 56 insertions(+), 10 deletions(-)
 create mode 100644 tests/test.asm
diff --git a/Cargo.toml b/Cargo.toml
index 470e8ad..4d44534 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,8 @@ edition = "2021"
 
 [dependencies]
 logos = "0.15.0"
-miette = "7.4.0"
+miette = { version = "7.4.0", features = ["fancy"] }
+thiserror = "2.0.11"
 
 [dev-dependencies]
 rusty-hook = "^0.11.2"
diff --git a/src/main.rs b/src/main.rs
index c06fec6..c574dae 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,29 +1,64 @@
 mod constants;
 mod tokenizer;
 
-use tokenizer::Token;
 use logos::Logos;
+use miette::{Diagnostic, NamedSource, SourceSpan};
+use std::env;
 use std::fs::File;
 use std::io::prelude::*;
-use std::env;
+use thiserror::Error;
+use tokenizer::Token;
+
+#[derive(Error, Debug, Diagnostic)]
+#[error("Unrecognized token")]
+#[diagnostic(code(oops), url("https://rezoleo.fr"), help("Try with A *A V *V or C for a register"))]
+pub struct TokenError {
+    #[source_code]
+    src: NamedSource<String>,
+
+    #[label("problem here")]
+    bad_bit: SourceSpan,
+}
+
+#[derive(Error, Diagnostic, Debug)]
+pub enum TokenizerError {
+    #[error(transparent)]
+    #[diagnostic(code(tokernizer::io_error))]
+    IoError(#[from] std::io::Error),
 
-fn main() -> std::io::Result<()> {
-    let args: Vec<String> = env::args().collect();
+    #[error(transparent)]
+    #[diagnostic(transparent)]
+    TokenError(#[from] TokenError),
+}
+
+use miette::Result;
+fn tokenizer_app(args: std::env::Args) -> Result<(), TokenizerError> {
+    let args: Vec<String> = args.collect();
 
-    let mut file = File::open(&args[1])?;
+    let filename: &str = &args[1];
+
+    let mut file = File::open(filename)?;
     let mut contents = String::new();
 
     file.read_to_string(&mut contents)?;
 
+    let mut lex = Token::lexer(contents.as_str());
 
-    let lex = Token::lexer(contents.as_str());
-
-    for result in lex {
+    while let Some(result) = lex.next() {
         match result {
             Ok(token) => println!("{:#?}", token),
-            Err(_) => panic!("Err occured"),
+            Err(_) => Err(TokenError {
+                src: NamedSource::new(filename, contents.clone()),
+                bad_bit: lex.span().into()
+            })?
         }
     }
 
     Ok(())
 }
+
+fn main() -> Result<()> {
+    let _ = tokenizer_app(env::args())?;
+
+    Ok(())
+}
diff --git a/tests/test.asm b/tests/test.asm
new file mode 100644
index 0000000..078c405
--- /dev/null
+++ b/tests/test.asm
@@ -0,0 +1,10 @@
+A <-- 1 + C
+A <- 256
+*A <- C
+A >= 3
+A
+D
+
+thisis_a_label:
+
+0x333
\ No newline at end of file

From 4fe1fc5595c42abbff26395e19669a24b98354ea Mon Sep 17 00:00:00 2001
From: benoitlx <benoitleroux41@gmail.com>
Date: Tue, 28 Jan 2025 21:56:23 +0100
Subject: [PATCH 2/5] refactor: error definition separated from main

---
 src/error_handling.rs | 28 ++++++++++++++++++++++++++++
 src/main.rs           | 33 ++++++---------------------------
 2 files changed, 34 insertions(+), 27 deletions(-)
 create mode 100644 src/error_handling.rs

diff --git a/src/error_handling.rs b/src/error_handling.rs
new file mode 100644
index 0000000..042ba93
--- /dev/null
+++ b/src/error_handling.rs
@@ -0,0 +1,28 @@
+use miette::{Diagnostic, NamedSource, SourceSpan};
+use thiserror::Error;
+
+#[derive(Error, Debug, Diagnostic)]
+#[error("Unrecognized token")]
+#[diagnostic(
+    code(tokenizer::no_matching_pattern),
+    url("https://my-incredible-doc.fr"),
+    help("TODO provide the closest pattern")
+)]
+pub struct UnrecognizedToken {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("This doesn't match any Token pattern")]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Diagnostic, Debug)]
+pub enum TokenizerError {
+    #[error(transparent)]
+    #[diagnostic(code(tokenizer::io_error), help("try this filename:"))]
+    IoError(#[from] std::io::Error),
+
+    #[error(transparent)]
+    #[diagnostic(transparent)]
+    TokenError(#[from] UnrecognizedToken),
+}
diff --git a/src/main.rs b/src/main.rs
index c574dae..00ba6b3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,36 +1,15 @@
 mod constants;
+mod error_handling;
 mod tokenizer;
 
+use error_handling::{TokenizerError, UnrecognizedToken};
 use logos::Logos;
-use miette::{Diagnostic, NamedSource, SourceSpan};
+use miette::NamedSource;
 use std::env;
 use std::fs::File;
 use std::io::prelude::*;
-use thiserror::Error;
 use tokenizer::Token;
 
-#[derive(Error, Debug, Diagnostic)]
-#[error("Unrecognized token")]
-#[diagnostic(code(oops), url("https://rezoleo.fr"), help("Try with A *A V *V or C for a register"))]
-pub struct TokenError {
-    #[source_code]
-    src: NamedSource<String>,
-
-    #[label("problem here")]
-    bad_bit: SourceSpan,
-}
-
-#[derive(Error, Diagnostic, Debug)]
-pub enum TokenizerError {
-    #[error(transparent)]
-    #[diagnostic(code(tokernizer::io_error))]
-    IoError(#[from] std::io::Error),
-
-    #[error(transparent)]
-    #[diagnostic(transparent)]
-    TokenError(#[from] TokenError),
-}
-
 use miette::Result;
 fn tokenizer_app(args: std::env::Args) -> Result<(), TokenizerError> {
     let args: Vec<String> = args.collect();
@@ -47,10 +26,10 @@ fn tokenizer_app(args: std::env::Args) -> Result<(), TokenizerError> {
     while let Some(result) = lex.next() {
         match result {
             Ok(token) => println!("{:#?}", token),
-            Err(_) => Err(TokenError {
+            Err(_) => Err(UnrecognizedToken{
                 src: NamedSource::new(filename, contents.clone()),
-                bad_bit: lex.span().into()
-            })?
+                src_span: lex.span().into(),
+            })?,
         }
     }
 

From 356fb1ceb30bf1681f0ec1b2254e245f1d206451 Mon Sep 17 00:00:00 2001
From: benoitlx <benoitleroux41@gmail.com>
Date: Fri, 31 Jan 2025 17:51:50 +0100
Subject: [PATCH 3/5] feat!: better error handling - catch multiple label
 definitions when lexing -

---
 src/constants.rs              |   1 +
 src/error_handling.rs         |  28 -----
 src/lexer/lexer.rs            | 206 ++++++++++++++++++++++++++++++++++
 src/lexer/lexer_error.rs      |  92 +++++++++++++++
 src/lexer/token_definition.rs |  89 +++++++++++++++
 src/main.rs                   |  39 +------
 src/tokenizer.rs              | 140 -----------------------
 tests/test.asm                |  15 ++-
 8 files changed, 398 insertions(+), 212 deletions(-)
 delete mode 100644 src/error_handling.rs
 create mode 100644 src/lexer/lexer.rs
 create mode 100644 src/lexer/lexer_error.rs
 create mode 100644 src/lexer/token_definition.rs
 delete mode 100644 src/tokenizer.rs

diff --git a/src/constants.rs b/src/constants.rs
index 96fa0e9..dc24172 100644
--- a/src/constants.rs
+++ b/src/constants.rs
@@ -1 +1,2 @@
+#[allow(dead_code)]
 pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15);
\ No newline at end of file
diff --git a/src/error_handling.rs b/src/error_handling.rs
deleted file mode 100644
index 042ba93..0000000
--- a/src/error_handling.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use miette::{Diagnostic, NamedSource, SourceSpan};
-use thiserror::Error;
-
-#[derive(Error, Debug, Diagnostic)]
-#[error("Unrecognized token")]
-#[diagnostic(
-    code(tokenizer::no_matching_pattern),
-    url("https://my-incredible-doc.fr"),
-    help("TODO provide the closest pattern")
-)]
-pub struct UnrecognizedToken {
-    #[source_code]
-    pub src: NamedSource<String>,
-
-    #[label("This doesn't match any Token pattern")]
-    pub src_span: SourceSpan,
-}
-
-#[derive(Error, Diagnostic, Debug)]
-pub enum TokenizerError {
-    #[error(transparent)]
-    #[diagnostic(code(tokenizer::io_error), help("try this filename:"))]
-    IoError(#[from] std::io::Error),
-
-    #[error(transparent)]
-    #[diagnostic(transparent)]
-    TokenError(#[from] UnrecognizedToken),
-}
diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs
new file mode 100644
index 0000000..621c7d1
--- /dev/null
+++ b/src/lexer/lexer.rs
@@ -0,0 +1,206 @@
+#[path = "../constants.rs"]
+mod constants;
+mod lexer_error;
+
+use lexer_error::{AppError, LexingError, UnrecognizedToken};
+use logos::{Lexer, Logos};
+use miette::NamedSource;
+use std::io::Read;
+
+pub fn lex_from_file(filename: &str) -> miette::Result<(), AppError> {
+    if let Ok(mut file) = std::fs::File::open(filename) {
+        let mut content = String::new();
+
+        let _ = file.read_to_string(&mut content);
+
+        let mut lex = Token::lexer_with_extras(content.as_str(), filename.to_owned());
+
+        while let Some(result) = lex.next() {
+            match result {
+                Ok(token) => println!("{:#?}", token),
+                Err(e) => match e {
+                    LexingError::Utoken(_) => {
+                        Err(AppError::A(LexingError::Utoken(UnrecognizedToken {
+                            src: NamedSource::new(filename, content.clone()),
+                            src_span: lex.span().into(),
+                        })))?
+                    }
+                    any_error => Err(AppError::A(any_error))?,
+                },
+            }
+        }
+
+        return Ok(());
+    }
+    Err(AppError::IoError)
+}
+
+#[derive(Logos, Debug, PartialEq, Clone)]
+#[logos(skip r"[ \t\n\f]+")]
+#[logos(error = LexingError)]
+#[logos(extras = String)]
+enum Token {
+    // Operations
+    #[token("+")]
+    #[token("ADD")]
+    Add,
+
+    #[token("-")]
+    #[token("SUB")]
+    Sub,
+
+    #[token("&")]
+    #[token("AND")]
+    And,
+
+    #[token("|")]
+    #[token("OR")]
+    Or,
+
+    #[token("^")]
+    #[token("XOR")]
+    Xor,
+
+    #[token("~")]
+    #[token("NOT")]
+    Not,
+
+    #[token("<-")]
+    Assignment,
+
+    // Branch
+    #[token("JMP")]
+    Jmp,
+
+    #[token(">")]
+    Gt,
+
+    #[token("<")]
+    Lt,
+
+    #[token("==")]
+    Eq,
+
+    #[token("!=")]
+    Neq,
+
+    #[token(">=")]
+    Gtoeq,
+
+    #[token("<=")]
+    Ltoeq,
+
+    // Registers
+    #[token("A")]
+    A,
+
+    #[token("*A")]
+    StarA,
+
+    #[token("V")]
+    V,
+
+    #[token("*V")]
+    StarV,
+
+    #[token("C")]
+    C,
+
+    // Values
+    #[regex("[0-9]+", |lex| parse_value("", 10, lex))]
+    #[regex("(0x|0X){1}[a-fA-F0-9]+", |lex| parse_value("0x", 16, lex))]
+    #[regex("(0b|0B){1}(0|1)+", |lex| parse_value("0b", 2, lex))]
+    Number(u16),
+
+    // Labels
+    #[regex("[a-zA-Z_]+:", parse_label)]
+    Label(String),
+}
+
+fn parse_label(lex: &mut Lexer<Token>) -> Result<String, lexer_error::ParseLabelError> {
+    let slice = lex.slice().replace(":", "");
+
+    for maybe_token in lex.clone().spanned() {
+        match maybe_token.0 {
+            Ok(Token::Label(s)) if s == slice.clone() => Err(lexer_error::ParseLabelError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: lex.span().into(),
+                previous_label_span: maybe_token.1.into(),
+            })?,
+            Ok(_) | Err(_) => (),
+        }
+    }
+
+    Ok(slice)
+}
+
+fn parse_value(
+    prefix: &str,
+    base: u32,
+    lex: &mut Lexer<Token>,
+) -> Result<u16, lexer_error::ParseValueError> {
+    use constants::MAX_LOAD_VALUE;
+    use lexer_error::LoadValueOverflowError;
+    use lexer_error::ParseValueError;
+    use std::num::IntErrorKind::PosOverflow;
+
+    let slice = lex.slice();
+    let raw_bits = slice.trim_start_matches(prefix);
+
+    return match u16::from_str_radix(raw_bits, base) {
+        Ok(n) if n > MAX_LOAD_VALUE => {
+            Err(ParseValueError::OverflowError(LoadValueOverflowError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: lex.span().into(),
+            }))
+        }
+        Err(e) if *e.kind() == PosOverflow => {
+            println!("value should fir in 16 bits");
+            Err(ParseValueError::OverflowError(LoadValueOverflowError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: lex.span().into(),
+            }))
+        }
+        Ok(n) => Ok(n),
+        Err(e) => Err(ParseValueError::ParseIntError(e)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_add_token() {
+        let mut lex = Token::lexer("+ ADD");
+        assert_eq!(lex.next(), Some(Ok(Token::Add)));
+        assert_eq!(lex.next(), Some(Ok(Token::Add)));
+    }
+
+    #[test]
+    fn test_labels_token() {
+        let mut lex = Token::lexer("some_label:");
+        assert_eq!(
+            lex.next(),
+            Some(Ok(Token::Label(String::from("some_label"))))
+        );
+
+        let mut lex = Token::lexer("SOME_LABEL:");
+        assert_eq!(
+            lex.next(),
+            Some(Ok(Token::Label(String::from("SOME_LABEL"))))
+        );
+    }
+
+    #[test]
+    fn test_values() {
+        let inputs = ["554", "0x5fa4", "0b1000110"];
+        let expected_numbers = [554, 0x5fa4, 0b1000110];
+
+        for (l, r) in std::iter::zip(inputs, expected_numbers) {
+            let mut lex = Token::lexer(l);
+
+            assert_eq!(lex.next(), Some(Ok(Token::Number(r))))
+        }
+    }
+}
diff --git a/src/lexer/lexer_error.rs b/src/lexer/lexer_error.rs
new file mode 100644
index 0000000..dace221
--- /dev/null
+++ b/src/lexer/lexer_error.rs
@@ -0,0 +1,92 @@
+#[path = "../constants.rs"]
+mod constants;
+
+use miette::{Diagnostic, NamedSource, SourceSpan};
+use thiserror::Error;
+
+#[derive(Error, Diagnostic, Debug, Clone, PartialEq)]
+#[error(transparent)]
+#[diagnostic(transparent)]
+pub enum LexingError {
+    Utoken(#[from] UnrecognizedToken),
+
+    LabelError(#[from] ParseLabelError),
+
+    ValueError(#[from] ParseValueError),
+}
+
+impl Default for LexingError {
+    fn default() -> Self {
+        LexingError::Utoken(UnrecognizedToken {
+            src: NamedSource::new("", String::new()),
+            src_span: (0, 1).into(),
+        })
+    }
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Unrecognized Token")]
+#[diagnostic(
+    code(token_definition::Token),
+    help("See the list of tokens in src/lexer/token_definition.rs (todo: give the closest token to the slice given)")
+)]
+pub struct UnrecognizedToken {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("This doesn't match any token")]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Multiple Definitions of the same label")]
+#[diagnostic(code(lexer::parse_label))]
+pub struct ParseLabelError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("Can't declare this label")]
+    pub previous_label_span: SourceSpan,
+
+    #[label("This label is already defined here")]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Diagnostic, Debug, PartialEq, Clone)]
+pub enum ParseValueError {
+    #[error(transparent)]
+    #[diagnostic(
+        code(lexer::parse_value),
+        help("try finding clues in std::num::IntErrorKind")
+    )]
+    ParseIntError(#[from] std::num::ParseIntError),
+
+    #[error(transparent)]
+    #[diagnostic(transparent)]
+    OverflowError(#[from] LoadValueOverflowError),
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Value Load Overflow")]
+#[diagnostic(
+    code(lexer::parse_value),
+    help(
+        "- The value should be under 0x8000 in hexadecimal\n- The value should be under 32768 in decimal\n- The value should fit in 15 bits\n\nnote: future note on how to quickfix this problem"
+    )
+)]
+pub struct LoadValueOverflowError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("This value should be under {}", constants::MAX_LOAD_VALUE)]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Debug, Diagnostic)]
+pub enum AppError {
+    #[error(transparent)]
+    #[diagnostic(transparent)]
+    A(LexingError),
+    #[error("Io error")]
+    IoError,
+}
diff --git a/src/lexer/token_definition.rs b/src/lexer/token_definition.rs
new file mode 100644
index 0000000..be8bf68
--- /dev/null
+++ b/src/lexer/token_definition.rs
@@ -0,0 +1,89 @@
+mod lexer_error;
+
+use logos::Logos;
+use lexer_error::LexingError;
+
+#[derive(Logos, Debug, PartialEq, Clone)]
+#[logos(skip r"[ \t\n\f]+")]
+#[logos(error = LexingError)]
+#[logos(extras = String)]
+// #[logos(extras = (filename, contents))]
+// see: https://docs.rs/logos/latest/logos/trait.Logos.html
+pub enum Token {
+    // Operations
+    #[token("+")]
+    #[token("ADD")]
+    Add,
+
+    #[token("-")]
+    #[token("SUB")]
+    Sub,
+
+    #[token("&")]
+    #[token("AND")]
+    And,
+
+    #[token("|")]
+    #[token("OR")]
+    Or,
+
+    #[token("^")]
+    #[token("XOR")]
+    Xor,
+
+    #[token("~")]
+    #[token("NOT")]
+    Not,
+
+    #[token("<-")]
+    Assignment,
+
+    // Branch
+    #[token("JMP")]
+    Jmp,
+
+    #[token(">")]
+    Gt,
+
+    #[token("<")]
+    Lt,
+
+    #[token("==")]
+    Eq,
+
+    #[token("!=")]
+    Neq,
+
+    #[token(">=")]
+    Gtoeq,
+
+    #[token("<=")]
+    Ltoeq,
+
+    // Registers
+    #[token("A")]
+    A,
+
+    #[token("*A")]
+    StarA,
+
+    #[token("V")]
+    V,
+
+    #[token("*V")]
+    StarV,
+
+    #[token("C")]
+    C,
+
+    // Values
+    #[regex("[0-9]+", |lex| parse_value("", 10, lex))]
+    #[regex("(0x|0X){1}[a-fA-F0-9]+", |lex| parse_value("0x", 16, lex))]
+    #[regex("(0b|0B){1}(0|1)+", |lex| parse_value("0b", 2, lex))]
+    Number(u16),
+
+    // Labels
+    // #[regex("[a-zA-Z_]+:", |lex| lex.slice().replace(":", ""))]
+    #[regex("[a-zA-Z_]+:", parse_label)]
+    Label(String),
+}
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index 00ba6b3..573000f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,43 +1,10 @@
 mod constants;
-mod error_handling;
-mod tokenizer;
-
-use error_handling::{TokenizerError, UnrecognizedToken};
-use logos::Logos;
-use miette::NamedSource;
-use std::env;
-use std::fs::File;
-use std::io::prelude::*;
-use tokenizer::Token;
+#[path = "lexer/lexer.rs"]
+mod lexer;
 
 use miette::Result;
-fn tokenizer_app(args: std::env::Args) -> Result<(), TokenizerError> {
-    let args: Vec<String> = args.collect();
-
-    let filename: &str = &args[1];
-
-    let mut file = File::open(filename)?;
-    let mut contents = String::new();
-
-    file.read_to_string(&mut contents)?;
-
-    let mut lex = Token::lexer(contents.as_str());
-
-    while let Some(result) = lex.next() {
-        match result {
-            Ok(token) => println!("{:#?}", token),
-            Err(_) => Err(UnrecognizedToken{
-                src: NamedSource::new(filename, contents.clone()),
-                src_span: lex.span().into(),
-            })?,
-        }
-    }
-
-    Ok(())
-}
-
 fn main() -> Result<()> {
-    let _ = tokenizer_app(env::args())?;
+    let _ = lexer::lex_from_file("tests/test.asm")?;
 
     Ok(())
 }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
deleted file mode 100644
index 7a2a7d5..0000000
--- a/src/tokenizer.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-use crate::constants::MAX_LOAD_VALUE;
-use logos::{Lexer, Logos};
-
-fn parse_values(prefix: &str, base: u32, lex: &mut Lexer<Token>) -> Option<u16> {
-    let slice = lex.slice();
-    let raw_bits = slice.trim_start_matches(prefix);
-    let n: u16 = u16::from_str_radix(raw_bits, base).ok()?;
-    assert!(
-        n <= MAX_LOAD_VALUE,
-        "Can't load data exceeding {} from ram",
-        MAX_LOAD_VALUE
-    );
-    Some(n)
-}
-
-#[derive(Logos, Debug, PartialEq)]
-#[logos(skip r"[ \t\n\f]+")]
-pub enum Token {
-    // Operations
-    #[token("+")]
-    #[token("ADD")]
-    Add,
-
-    #[token("-")]
-    #[token("SUB")]
-    Sub,
-
-    #[token("&")]
-    #[token("AND")]
-    And,
-
-    #[token("|")]
-    #[token("OR")]
-    Or,
-
-    #[token("^")]
-    #[token("XOR")]
-    Xor,
-
-    #[token("~")]
-    #[token("NOT")]
-    Not,
-
-    #[token("<-")]
-    Assignment,
-
-    // Branch
-    #[token("JMP")]
-    Jmp,
-
-    #[token(">")]
-    Gt,
-
-    #[token("<")]
-    Lt,
-
-    #[token("==")]
-    Eq,
-
-    #[token("!=")]
-    Neq,
-
-    #[token(">=")]
-    Gtoeq,
-
-    #[token("<=")]
-    Ltoeq,
-
-    // Registers
-    #[token("A")]
-    A,
-
-    #[token("*A")]
-    StarA,
-
-    #[token("V")]
-    V,
-
-    #[token("*V")]
-    StarV,
-
-    #[token("C")]
-    C,
-
-    // Values
-    #[regex("[0-9]+", |lex| parse_values("", 10, lex))]
-    #[regex("(0x|0X){1}[a-fA-F0-9]+", |lex| parse_values("0x", 16, lex))]
-    #[regex("(0b|0B){1}(0|1)+", |lex| parse_values("0b", 2, lex))]
-    Number(u16),
-
-    // Labels
-    #[regex("[a-zA-Z_]+:", |lex| lex.slice().replace(":", ""))]
-    Label(String),
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_add_token() {
-        let mut lex = Token::lexer("+ ADD");
-        assert_eq!(lex.next(), Some(Ok(Token::Add)));
-        assert_eq!(lex.next(), Some(Ok(Token::Add)));
-    }
-
-    #[test]
-    fn test_labels_token() {
-        let mut lex = Token::lexer("some_label:");
-        assert_eq!(
-            lex.next(),
-            Some(Ok(Token::Label(String::from("some_label"))))
-        );
-
-        let mut lex = Token::lexer("SOME_LABEL:");
-        assert_eq!(
-            lex.next(),
-            Some(Ok(Token::Label(String::from("SOME_LABEL"))))
-        );
-    }
-
-    #[test]
-    fn test_not_a_label() {
-        let mut lex = Token::lexer("Centrale Lille");
-
-        assert_eq!(lex.next(), Some(Err(())));
-    }
-
-    #[test]
-    fn test_values() {
-        let inputs = ["554", "0x5fa4", "0b1000110"];
-        let expected_numbers = [554, 0x5fa4, 0b1000110];
-
-        for (l, r) in std::iter::zip(inputs, expected_numbers) {
-            let mut lex = Token::lexer(l);
-
-            assert_eq!(lex.next(), Some(Ok(Token::Number(r))))
-        }
-    }
-}
diff --git a/tests/test.asm b/tests/test.asm
index 078c405..7640c86 100644
--- a/tests/test.asm
+++ b/tests/test.asm
@@ -1,10 +1,9 @@
-A <-- 1 + C
-A <- 256
-*A <- C
-A >= 3
-A
-D
+0x8000
+A <- A majhkdf & 3
+V <- V + 1
 
-thisis_a_label:
 
-0x333
\ No newline at end of file
+main:
+0x033
+
+main:
\ No newline at end of file

From a1f61555da8997525a3901929934d4a47233ab77 Mon Sep 17 00:00:00 2001
From: benoitlx <benoitleroux41@gmail.com>
Date: Sat, 1 Feb 2025 21:54:52 +0100
Subject: [PATCH 4/5] feat: support define and single line comments

---
 Cargo.toml               |   1 +
 README.md                |   8 ++
 src/constants.rs         |   2 +-
 src/lexer/lexer.rs       | 242 +++++++++++++++++++++++++++++++++++++--
 src/lexer/lexer_error.rs |  83 ++++++++++++--
 tests/test.asm           |  32 +++++-
 6 files changed, 340 insertions(+), 28 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4d44534..0f8d0fc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 logos = "0.15.0"
 miette = { version = "7.4.0", features = ["fancy"] }
+regex = "1.11.1"
 thiserror = "2.0.11"
 
 [dev-dependencies]
diff --git a/README.md b/README.md
index 3329925..102a995 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,10 @@
 # simple-assembler
+
 Simple assembler for my custom cpu
+
+## TODO
+
+- [ ] improve test coverage
+- [ ] export graph from the lexer
+- [ ] refactor lexer_error (might introduce breaking change as error name's can change)
+- [ ] refactor parsing code
\ No newline at end of file
diff --git a/src/constants.rs b/src/constants.rs
index dc24172..b4adc80 100644
--- a/src/constants.rs
+++ b/src/constants.rs
@@ -1,2 +1,2 @@
 #[allow(dead_code)]
-pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15);
\ No newline at end of file
+pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15) - 1;
\ No newline at end of file
diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs
index 621c7d1..4c9b270 100644
--- a/src/lexer/lexer.rs
+++ b/src/lexer/lexer.rs
@@ -115,18 +115,55 @@ enum Token {
     // Labels
     #[regex("[a-zA-Z_]+:", parse_label)]
     Label(String),
+
+    // Define
+    // #[regex("DEFINE [a-zA-Z_]+ [0-9]+", |lex| parse_define("", 10, lex))]
+    // #[regex("DEFINE [a-zA-Z_]+ (0x|0X){1}[a-fA-F0-9]+", |lex| parse_define("0x", 16, lex))]
+    // #[regex("DEFINE [a-zA-Z_]+ (0b|0B){1}(0|1)+", |lex| parse_define("0b", 2, lex))]
+    // #[regex(r"DEFINE\s*[^\s]*", define_too_few_arguments)]
+    #[regex(r"DEFINE [^[\n(//)]]*", parse_define)] // tofix: wrong error with "DEFINE t/est 0x0"
+    Define((String, u16)),
+
+    // Comments
+    // #[regex(r"\s*/*.**/")] // multiline comments
+    #[regex(r"\s*//.*")]
+    Comments,
 }
 
 fn parse_label(lex: &mut Lexer<Token>) -> Result<String, lexer_error::ParseLabelError> {
+    // check for regex [a-zA-Z_]+
+    // if it fail => NameError
+
     let slice = lex.slice().replace(":", "");
 
-    for maybe_token in lex.clone().spanned() {
+    parse_text_raw(
+        slice,
+        lex.span(),
+        lex.extras.clone(),
+        lex.source(),
+        lex.clone().spanned(),
+    )
+}
+
+fn parse_text_raw(
+    slice: String,
+    span: logos::Span,
+    f: String,
+    source: &str,
+    spanned: logos::SpannedIter<Token>,
+) -> Result<String, lexer_error::ParseLabelError> {
+    for maybe_token in spanned {
         match maybe_token.0 {
             Ok(Token::Label(s)) if s == slice.clone() => Err(lexer_error::ParseLabelError {
-                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
-                src_span: lex.span().into(),
+                src: NamedSource::new(&f, source.to_owned()),
+                src_span: span.clone().into(),
                 previous_label_span: maybe_token.1.into(),
             })?,
+            Ok(Token::Define((s, _))) if s == slice.clone() => Err(lexer_error::ParseLabelError {
+                src: NamedSource::new(&f, source.to_owned()),
+                src_span: span.clone().into(),
+                previous_label_span: (maybe_token.1.start + 7, s.len()).into(),
+            })?,
             Ok(_) | Err(_) => (),
         }
     }
@@ -138,36 +175,151 @@ fn parse_value(
     prefix: &str,
     base: u32,
     lex: &mut Lexer<Token>,
+) -> Result<u16, lexer_error::ParseValueError> {
+    let slice = lex.slice();
+
+    parse_value_raw(
+        prefix,
+        base,
+        slice,
+        lex.span(),
+        lex.extras.clone(),
+        lex.source(),
+    )
+}
+
+fn parse_value_raw(
+    prefix: &str,
+    base: u32,
+    slice: &str,
+    span: logos::Span,
+    f: String,
+    s: &str,
 ) -> Result<u16, lexer_error::ParseValueError> {
     use constants::MAX_LOAD_VALUE;
+    use lexer_error::InvalidDigitError;
     use lexer_error::LoadValueOverflowError;
     use lexer_error::ParseValueError;
+    use std::num::IntErrorKind::InvalidDigit;
     use std::num::IntErrorKind::PosOverflow;
 
-    let slice = lex.slice();
     let raw_bits = slice.trim_start_matches(prefix);
 
     return match u16::from_str_radix(raw_bits, base) {
         Ok(n) if n > MAX_LOAD_VALUE => {
             Err(ParseValueError::OverflowError(LoadValueOverflowError {
-                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
-                src_span: lex.span().into(),
+                src: NamedSource::new(f, s.to_owned()),
+                src_span: span.into(),
             }))
         }
         Err(e) if *e.kind() == PosOverflow => {
-            println!("value should fir in 16 bits");
             Err(ParseValueError::OverflowError(LoadValueOverflowError {
-                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
-                src_span: lex.span().into(),
+                src: NamedSource::new(f, s.to_owned()),
+                src_span: span.into(),
+            }))
+        }
+        Err(e) if *e.kind() == InvalidDigit => {
+            Err(ParseValueError::WrongDigitError(InvalidDigitError {
+                src: NamedSource::new(f, s.to_owned()),
+                src_span: span.into(),
             }))
         }
         Ok(n) => Ok(n),
         Err(e) => Err(ParseValueError::ParseIntError(e)),
+    };
+}
+
+fn parse_define(lex: &mut Lexer<Token>) -> Result<(String, u16), lexer_error::ParseDefineError> {
+    use lexer_error::DefineFewOperandError;
+    use lexer_error::DefineManyOperandError;
+    use lexer_error::NameError;
+    use lexer_error::ParseDefineError;
+    use regex::Regex;
+
+    let mut slices = lex.slice().trim_end().split_whitespace();
+    let mut result: (String, u16) = (String::from(""), 0);
+
+    let _ = slices.next();
+
+    let mut arg_number = 0;
+    if let Some(label) = slices.next() {
+        arg_number += 1;
+
+        let label_length = label.len();
+        let span_start = lex.span().start + 7;
+        let span_range = span_start..(span_start + label_length);
+
+        let re = Regex::new(r"^[a-zA-Z_]+$").unwrap();
+
+        if !re.is_match(label) {
+            return Err(ParseDefineError::InvalidName(NameError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: span_range.into(),
+            }));
+        }
+
+        result.0 = parse_text_raw(
+            label.to_owned(),
+            span_range,
+            lex.extras.clone(),
+            lex.source(),
+            lex.clone().spanned(),
+        )?;
+
+        if let Some(value) = slices.next() {
+            let mut prefix = "";
+            let mut base = 10;
+
+            if value.len() >= 2 && &value[0..2] == "0x" {
+                prefix = "0x";
+                base = 16;
+            }
+            if value.len() >= 2 && &value[0..2] == "0b" {
+                prefix = "0b";
+                base = 2;
+            }
+
+            arg_number += 1;
+
+            let value_length = value.len();
+
+            result.1 = parse_value_raw(
+                prefix,
+                base,
+                value,
+                (lex.span().start + 8 + label_length)
+                    ..(lex.span().start + 8 + label_length + value_length),
+                lex.extras.clone(),
+                lex.source(),
+            )?;
+        }
+    }
+
+    if slices.next() != None {
+        return Err(ParseDefineError::TooManyOperandError(
+            DefineManyOperandError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: lex.span().into(),
+            },
+        ));
+    }
+
+    if arg_number != 2 {
+        return Err(ParseDefineError::TooFewOperandError(
+            DefineFewOperandError {
+                src: NamedSource::new(lex.extras.clone(), lex.source().to_owned()),
+                src_span: lex.span().into(),
+            },
+        ));
     }
+
+    Ok(result)
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::lexer::lexer_error::ParseDefineError;
+
     use super::*;
 
     #[test]
@@ -178,7 +330,9 @@ mod tests {
     }
 
     #[test]
-    fn test_labels_token() {
+    fn test_labels() {
+        use LexingError::LabelError;
+
         let mut lex = Token::lexer("some_label:");
         assert_eq!(
             lex.next(),
@@ -190,6 +344,19 @@ mod tests {
             lex.next(),
             Some(Ok(Token::Label(String::from("SOME_LABEL"))))
         );
+
+        // wrong syntax
+
+        // Multiple use of the same name
+        let mut lex = Token::lexer("test:\nDEFINE test 0");
+        assert!(matches!(lex.next(), Some(Err(LabelError(_)))));
+
+        let mut lex = Token::lexer("test:\ntest:");
+        assert!(matches!(lex.next(), Some(Err(LabelError(_)))));
+
+        // wrong label name
+        /*         let mut lex = Token::lexer("te/st:");
+        assert!(matches!(lex.next(), Some(Err(LabelError(_))))); // to fix: return Err(LexingError::Utoken) */
     }
 
     #[test]
@@ -203,4 +370,59 @@ mod tests {
             assert_eq!(lex.next(), Some(Ok(Token::Number(r))))
         }
     }
+
+    #[test]
+    fn test_defines() {
+        use LexingError::DefineError;
+        use Token::Define;
+        use ParseDefineError::LabelError;
+        use ParseDefineError::ValueError;
+
+        // good syntax
+        let mut lex = Token::lexer("DEFINE test 0");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 0)))));
+
+        let mut lex = Token::lexer("DEFINE test 1");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 1)))));
+
+        let mut lex = Token::lexer("DEFINE test 32767");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 32767)))));
+
+        let mut lex = Token::lexer("DEFINE test 0x0");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 0)))));
+
+        let mut lex = Token::lexer("DEFINE test 0xff");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 255)))));
+
+        let mut lex = Token::lexer("DEFINE test 0x7fff");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 32767)))));
+
+        let mut lex = Token::lexer("DEFINE test 0b0");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("test"), 0)))));
+
+        let mut lex = Token::lexer("DEFINE TOTO 0b11");
+        assert_eq!(lex.next(), Some(Ok(Define((String::from("TOTO"), 3)))));
+
+        let mut lex = Token::lexer("DEFINE titi_test 0b111111111111111");
+        assert_eq!(
+            lex.next(),
+            Some(Ok(Define((String::from("titi_test"), 32767))))
+        );
+
+        // wrong syntax
+
+        // Multiple use of the same name
+        let mut lex = Token::lexer("DEFINE test 0\ntest:");
+        assert!(matches!(lex.next(), Some(Err(DefineError(LabelError(_))))));
+
+        let mut lex = Token::lexer("DEFINE test 0\nDEFINE test 0");
+        assert!(matches!(lex.next(), Some(Err(DefineError(LabelError(_))))));
+
+        // Value Error
+        let mut lex = Token::lexer("DEFINE test 0feaj138");
+        assert!(matches!(lex.next(), Some(Err(DefineError(ValueError(_))))));
+
+        let mut lex = Token::lexer("DEFINE test 0x8000"); // load value overflow
+        assert!(matches!(lex.next(), Some(Err(DefineError(ValueError(_))))));
+    }
 }
diff --git a/src/lexer/lexer_error.rs b/src/lexer/lexer_error.rs
index dace221..3e5dd37 100644
--- a/src/lexer/lexer_error.rs
+++ b/src/lexer/lexer_error.rs
@@ -13,6 +13,8 @@ pub enum LexingError {
     LabelError(#[from] ParseLabelError),
 
     ValueError(#[from] ParseValueError),
+
+    DefineError(#[from] ParseDefineError),
 }
 
 impl Default for LexingError {
@@ -24,6 +26,51 @@ impl Default for LexingError {
     }
 }
 
+#[derive(Error, Diagnostic, Debug, Clone, PartialEq)]
+#[error(transparent)]
+#[diagnostic(transparent)]
+pub enum ParseDefineError {
+    LabelError(#[from] ParseLabelError),
+
+    ValueError(#[from] ParseValueError),
+
+    TooFewOperandError(#[from] DefineFewOperandError),
+
+    TooManyOperandError(#[from] DefineManyOperandError),
+
+    InvalidName(#[from] NameError),
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Name should be of the form [a-zA-Z_]+")]
+pub struct NameError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("here")]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Too few operands after DEFINE")]
+pub struct DefineFewOperandError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("missing operand for define")]
+    pub src_span: SourceSpan,
+}
+
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Too many operands after DEFINE")]
+pub struct DefineManyOperandError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("additional operand here")]
+    pub src_span: SourceSpan,
+}
+
 #[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
 #[error("Unrecognized Token")]
 #[diagnostic(
@@ -39,26 +86,26 @@ pub struct UnrecognizedToken {
 }
 
 #[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
-#[error("Multiple Definitions of the same label")]
-#[diagnostic(code(lexer::parse_label))]
+#[error("Multiple Definitions/Labels with the same name")]
+#[diagnostic(code(lexer::parse_text_raw))]
 pub struct ParseLabelError {
     #[source_code]
     pub src: NamedSource<String>,
 
-    #[label("Can't declare this label")]
-    pub previous_label_span: SourceSpan,
-
-    #[label("This label is already defined here")]
+    #[label("Can't use this name")]
     pub src_span: SourceSpan,
+
+    #[label("the name is already declared here")]
+    pub previous_label_span: SourceSpan,
 }
 
 #[derive(Error, Diagnostic, Debug, PartialEq, Clone)]
 pub enum ParseValueError {
     #[error(transparent)]
-    #[diagnostic(
-        code(lexer::parse_value),
-        help("try finding clues in std::num::IntErrorKind")
-    )]
+    #[diagnostic(transparent)]
+    WrongDigitError(#[from] InvalidDigitError),
+
+    #[error(transparent)]
     ParseIntError(#[from] std::num::ParseIntError),
 
     #[error(transparent)]
@@ -66,6 +113,20 @@ pub enum ParseValueError {
     OverflowError(#[from] LoadValueOverflowError),
 }
 
+#[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
+#[error("Invalid digit found in string")]
+#[diagnostic(
+    code(lexer::parse_value),
+    help("Verify the base prefix and the digits")
+)]
+pub struct InvalidDigitError {
+    #[source_code]
+    pub src: NamedSource<String>,
+
+    #[label("Invalid digit here")]
+    pub src_span: SourceSpan,
+}
+
 #[derive(Error, Debug, Diagnostic, Clone, PartialEq)]
 #[error("Value Load Overflow")]
 #[diagnostic(
@@ -86,7 +147,7 @@ pub struct LoadValueOverflowError {
 pub enum AppError {
     #[error(transparent)]
     #[diagnostic(transparent)]
-    A(LexingError),
+    A(LexingError), // todo: change this name
     #[error("Io error")]
     IoError,
 }
diff --git a/tests/test.asm b/tests/test.asm
index 7640c86..23cfdc0 100644
--- a/tests/test.asm
+++ b/tests/test.asm
@@ -1,9 +1,29 @@
-0x8000
-A <- A majhkdf & 3
-V <- V + 1
+// testing decimal numbers
+DEFINE titi 0
+DEFINE tito 1
+DEFINE toto 32767
+// DEFINE tata 32768 // error here
+// DEFINE big_number 99999999999999 // error here
 
+// testing hexadecimal numbers
+DEFINE a 0xff
+DEFINE b 0x0
+DEFINE c 0x7fff
+// DEFINE d 0x8001 // error here
 
-main:
-0x033
+// testing binary numbers
+DEFINE d 0b0
+DEFINE e 0b11001
+DEFINE f 0b111111111111111
+// DEFINE g 0b1111111111111111 // error here
 
-main:
\ No newline at end of file
+// error below
+// DEFINE foo 0x0
+// DEFINE foo 0x0
+
+DEFINE test // error here
+// DEFINE many 0x0 argument // error here
+
+// DEFINE test test // error here
+// DEFINE t*est 0x0 // error here
+// DEFINE t/est 0x0 // error here

From 42d04ee9003d560983ed9873b93892b085e28dd5 Mon Sep 17 00:00:00 2001
From: benoitlx <benoitleroux41@gmail.com>
Date: Sun, 2 Feb 2025 14:02:52 +0100
Subject: [PATCH 5/5] chore: not pursuing this branch

---
 .gitignore                 | 2 ++
 tests/test.asm             | 4 +++-
 tests/two_labels_error.asm | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 tests/two_labels_error.asm

diff --git a/.gitignore b/.gitignore
index efe3eb1..f594f86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,5 @@ Cargo.lock
 # Added by cargo
 
 /target
+
+.vscode/
diff --git a/tests/test.asm b/tests/test.asm
index 23cfdc0..5ca6970 100644
--- a/tests/test.asm
+++ b/tests/test.asm
@@ -21,9 +21,11 @@ DEFINE f 0b111111111111111
 // DEFINE foo 0x0
 // DEFINE foo 0x0
 
-DEFINE test // error here
+// DEFINE test // error here
 // DEFINE many 0x0 argument // error here
 
 // DEFINE test test // error here
 // DEFINE t*est 0x0 // error here
 // DEFINE t/est 0x0 // error here
+
+A:
diff --git a/tests/two_labels_error.asm b/tests/two_labels_error.asm
new file mode 100644
index 0000000..90b4924
--- /dev/null
+++ b/tests/two_labels_error.asm
@@ -0,0 +1,2 @@
+A_aaAZ:
+A_aaAZ:
\ No newline at end of file