microsoft · joao-boechat · Jun 4, 2026 · Jun 5, 2026 · Jun 9, 2026 · Jun 9, 2026
@@ -16,6 +16,7 @@ members = [
     "source/compiler/qsc_hir",
     "source/compiler/qsc_openqasm_compiler",
     "source/compiler/qsc_openqasm_parser",
+    "source/compiler/qsc_stim_parser",
     "source/compiler/qsc_linter",
     "source/compiler/qsc_lowerer",
     "source/compiler/qsc_parse",

@@ -0,0 +1,10 @@
+[package]
+name = "qsc_stim_parser"
+edition.workspace = true
+version.workspace = true
+
+[dependencies]
+enum-iterator.workspace = true
+qsc_data_structures = { path = "../qsc_data_structures" }
+
+[dev-dependencies]
@@ -0,0 +1,33 @@
+use qsc_stim_parser::lex::Lexer;
+use std::fs;
+use std::io::Write;
+
+fn main() {
+    let stim_code =
+        fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim");
+
+    let mut out =
+        fs::File::create("examples/lex_output.txt").expect("Failed to create output file");
+
+    writeln!(out, "{:<20} {:<10} {:}", "TOKEN KIND", "SPAN", "TEXT").unwrap();
+    writeln!(out, "{:-<50}", "").unwrap();
+
+    let lexer = Lexer::new(&stim_code);
+    for token in lexer {
+        let text = &stim_code[token.span.lo as usize..token.span.hi as usize];
+        let text_display = match token.kind {
+            qsc_stim_parser::lex::TokenKind::Newline => "\\n".to_string(),
+            _ => format!("{:?}", text),
+        };
+        writeln!(
+            out,
+            "{:<20} {:<10} {}",
+            token.kind.to_string(),
+            format!("{}..{}", token.span.lo, token.span.hi),
+            text_display
+        )
+        .unwrap();
+    }
+
+    println!("Wrote examples/lex_output.txt");
+}
@@ -0,0 +1,89 @@
+use qsc_stim_parser::parser::{Circuit, Instruction, Item, Pauli, Target, TargetKind, parse};
+use std::fs;
+use std::io::Write;
+
+fn write_circuit(out: &mut impl Write, circuit: &Circuit) {
+    writeln!(out, "(circuit").unwrap();
+    for item in &circuit.items {
+        write_item(out, item, 1);
+    }
+    writeln!(out, ")").unwrap();
+}
+
+fn write_item(out: &mut impl Write, item: &Item, indent: usize) {
+    let pad = "  ".repeat(indent);
+    match item {
+        Item::Line(line) => {
+            write!(out, "{pad}(").unwrap();
+            write_instruction(out, &line.instruction);
+            writeln!(out, ")").unwrap();
+        }
+        Item::Block(block) => {
+            write!(out, "{pad}(").unwrap();
+            write_instruction(out, &block.block_instruction);
+            writeln!(out).unwrap();
+            for item in &block.items {
+                write_item(out, item, indent + 1);
+            }
+            writeln!(out, "{pad})").unwrap();
+        }
+    }
+}
+
+fn write_instruction(out: &mut impl Write, instr: &Instruction) {
+    write!(out, "{}", instr.name).unwrap();
+    if let Some(tag) = &instr.tag {
+        write!(out, "[{}]", tag).unwrap();
+    }
+    if !instr.args.is_empty() {
+        for arg in &instr.args {
+            write!(out, " {}", arg).unwrap();
+        }
+    }
+    for target in &instr.targets {
+        write!(out, " ").unwrap();
+        write_target(out, target);
+    }
+}
+
+fn write_target(out: &mut impl Write, target: &Target) {
+    match &target.kind {
+        TargetKind::Qubit { negated, value } => {
+            if *negated {
+                write!(out, "!").unwrap();
+            }
+            write!(out, "{}", value).unwrap();
+        }
+        TargetKind::MeasurementRecord { value } => write!(out, "rec[-{}]", value).unwrap(),
+        TargetKind::SweepBit { value } => write!(out, "sweep[{}]", value).unwrap(),
+        TargetKind::Pauli {
+            negated,
+            pauli,
+            value,
+        } => {
+            if *negated {
+                write!(out, "!").unwrap();
+            }
+            let p = match pauli {
+                Pauli::X => "X",
+                Pauli::Y => "Y",
+                Pauli::Z => "Z",
+            };
+            write!(out, "{}{}", p, value).unwrap();
+        }
+        TargetKind::Combiner => write!(out, "*").unwrap(),
+    }
+}
+
+fn main() {
+    let stim_code =
+        fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim");
+
+    let circuit = parse(&stim_code);
+
+    let mut out =
+        fs::File::create("examples/parse_output.txt").expect("Failed to create output file");
+    write_circuit(&mut out, &circuit);
+
+    println!("Wrote examples/parse_output.txt");
+}
@@ -0,0 +1,195 @@
+use enum_iterator::Sequence;
+use qsc_data_structures::span::Span;
+use std::str::CharIndices;
+use std::{
+    fmt::{self, Display, Formatter},
+    iter::Peekable,
+};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct Token {
+    pub kind: TokenKind,
+    pub span: Span,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)]
+pub enum TokenKind {
+    Newline,         // \n
+    Uint,            // unsigned integers
+    Double,          // floating-point numbers
+    InstructionName, // H, X, CNOT, etc.
+    Rec,             // rec[- ...]
+    Sweep,           // sweep[...]
+    Tag,             // "[...]"
+    Open(Delim),     // ( {
+    Close(Delim),    // ) }
+    Star,            // *
+    Bang,            // !
+    Comma,           // ,
+    Unknown,         // unknown token
+}
+
+impl Display for TokenKind {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            TokenKind::Newline => f.write_str("newline"),
+            TokenKind::Uint => f.write_str("uint"),
+            TokenKind::Double => f.write_str("double"),
+            TokenKind::InstructionName => f.write_str("instruction_name"),
+            TokenKind::Rec => f.write_str("rec"),
+            TokenKind::Sweep => f.write_str("sweep"),
+            TokenKind::Tag => write!(f, "tag"),
+            TokenKind::Open(delim) => write!(f, "open({})", delim),
+            TokenKind::Close(delim) => write!(f, "close({})", delim),
+            TokenKind::Star => write!(f, "star"),
+            TokenKind::Bang => write!(f, "bang"),
+            TokenKind::Comma => write!(f, "comma"),
+            TokenKind::Unknown => write!(f, "unknown"),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)]
+pub enum Delim {
+    Paren,
+    Brace,
+}
+
+impl Display for Delim {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            Delim::Paren => f.write_str("paren"),
+            Delim::Brace => f.write_str("brace"),
+        }
+    }
+}
+
+pub struct Lexer<'a> {
+    input: &'a str,
+    input_len: u32,
+    chars: Peekable<CharIndices<'a>>,
+}
+
+impl<'a> Lexer<'a> {
+    pub fn new(input: &'a str) -> Self {
+        Self {
+            input,
+            input_len: input
+                .len()
+                .try_into()
+                .expect("input length should fit into u32"),
+            chars: input.char_indices().peekable(),
+        }
+    }
+
+    fn eat_while(&mut self, mut f: impl Fn(char) -> bool) {
+        while self.chars.next_if(|i| f(i.1)).is_some() {}
+    }
+
+    fn whitespace(&mut self) {
+        self.eat_while(char::is_whitespace);
+    }
+
+    fn comment(&mut self) {
+        self.eat_while(|c| c != '\n');
+        self.whitespace();
+    }
+
+    fn scan_number(&mut self) -> TokenKind {
+        self.eat_while(|c| c.is_ascii_digit());
+        let mut is_double = false;
+        if self.chars.next_if(|(_, c)| *c == '.').is_some() {
+            self.eat_while(|c| c.is_ascii_digit());
+            is_double = true;
+        }
+        if self
+            .chars
+            .next_if(|(_, c)| *c == 'e' || *c == 'E')
+            .is_some()
+        {
+            // scientific notation
+            self.chars.next_if(|(_, c)| *c == '+' || *c == '-');
+            self.eat_while(|c| c.is_ascii_digit());
+            is_double = true;
+        }
+        if is_double {
+            TokenKind::Double
+        } else {
+            TokenKind::Uint
+        }
+    }
+
+    fn scan_identifier(&mut self, lo: usize) -> TokenKind {
+        self.eat_while(|c| c.is_alphanumeric() || c == '_');
+        let hi: usize = self
+            .chars
+            .peek()
+            .map_or(self.input_len as usize, |(i, _)| *i);
+        // TODO: What if some identifier starts with "rec" but is not a rec token?
+        match &self.input[lo..hi] {
+            "rec" => {
+                self.eat_while(|c| c != ']');
+                self.chars.next_if(|(_, c)| *c == ']');
+                TokenKind::Rec
+            }
+            "sweep" => {
+                self.eat_while(|c| c != ']');
+                self.chars.next_if(|(_, c)| *c == ']');
+                TokenKind::Sweep
+            }
+            _ => TokenKind::InstructionName,
+        }
+    }
+}
+
+impl Iterator for Lexer<'_> {
+    type Item = Token;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        use Delim::{Brace, Paren};
+        let (offset, c) = self.chars.next()?;
+        let lo: u32 = offset.try_into().expect("offset should fit into u32");
+        let token_kind = match c {
+            '\n' => {
+                self.whitespace();
+                TokenKind::Newline
+            }
+            ' ' | '\t' => {
+                self.whitespace();
+                return self.next();
+            }
+            '#' => {
+                if self.chars.next_if(|(_, c)| *c == '!').is_some() {
+                    self.eat_while(|c| !c.is_whitespace());
+                    TokenKind::InstructionName
+                } else {
+                    self.comment();
+                    return self.next();
+                }
+            }
+            '(' => TokenKind::Open(Paren),
+            ')' => TokenKind::Close(Paren),
+            '{' => TokenKind::Open(Brace),
+            '}' => TokenKind::Close(Brace),
+            '*' => TokenKind::Star,
+            '!' => TokenKind::Bang,
+            ',' => TokenKind::Comma,
+            '0'..='9' => self.scan_number(),
+            'A'..='Z' | 'a'..='z' => self.scan_identifier(lo as usize),
+            '[' => {
+                self.eat_while(|c| c != ']');
+                self.chars.next_if(|(_, c)| *c == ']');
+                TokenKind::Tag
+            }
+            _ => TokenKind::Unknown,
+        };
+
+        let hi: u32 = self.chars.peek().map_or(self.input_len, |(i, _)| *i as u32);
+        Some(Token {
+            kind: token_kind,
+            span: Span { lo, hi },
+        })
+    }
+}
+
+//TODO: Deal with escaping
@@ -0,0 +1,2 @@
+pub mod lex;
+pub mod parser;