-
Notifications
You must be signed in to change notification settings - Fork 206
Stim compiler #3305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Stim compiler #3305
Changes from 12 commits
cacd362
8716674
35804ba
d250224
5462765
c66a59d
2c03a10
561e3e9
c07ee11
a73fc12
85b0955
a6cbe23
9c724bb
3a30469
ff29da3
8bcb3e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| [package] | ||
| name = "qsc_stim_parser" | ||
| edition.workspace = true | ||
| version.workspace = true | ||
|
|
||
| [dependencies] | ||
| enum-iterator.workspace = true | ||
| qsc_data_structures = { path = "../qsc_data_structures" } | ||
|
|
||
| [dev-dependencies] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| use qsc_stim_parser::lex::Lexer; | ||
| use std::fs; | ||
| use std::io::Write; | ||
|
|
||
| fn main() { | ||
| let stim_code = | ||
| fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim"); | ||
|
|
||
| let mut out = | ||
| fs::File::create("examples/lex_output.txt").expect("Failed to create output file"); | ||
|
|
||
| writeln!(out, "{:<20} {:<10} {:}", "TOKEN KIND", "SPAN", "TEXT").unwrap(); | ||
| writeln!(out, "{:-<50}", "").unwrap(); | ||
|
|
||
| let lexer = Lexer::new(&stim_code); | ||
| for token in lexer { | ||
| let text = &stim_code[token.span.lo as usize..token.span.hi as usize]; | ||
| let text_display = match token.kind { | ||
| qsc_stim_parser::lex::TokenKind::Newline => "\\n".to_string(), | ||
| _ => format!("{:?}", text), | ||
| }; | ||
| writeln!( | ||
| out, | ||
| "{:<20} {:<10} {}", | ||
| token.kind.to_string(), | ||
| format!("{}..{}", token.span.lo, token.span.hi), | ||
| text_display | ||
| ) | ||
| .unwrap(); | ||
| } | ||
|
|
||
| println!("Wrote examples/lex_output.txt"); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| use qsc_stim_parser::parser::{Circuit, Instruction, Item, Pauli, Target, TargetKind, parse}; | ||
| use std::fs; | ||
| use std::io::Write; | ||
|
|
||
| fn write_circuit(out: &mut impl Write, circuit: &Circuit) { | ||
| writeln!(out, "(circuit").unwrap(); | ||
| for item in &circuit.items { | ||
| write_item(out, item, 1); | ||
| } | ||
| writeln!(out, ")").unwrap(); | ||
| } | ||
|
|
||
| fn write_item(out: &mut impl Write, item: &Item, indent: usize) { | ||
| let pad = " ".repeat(indent); | ||
| match item { | ||
| Item::Line(line) => { | ||
| write!(out, "{pad}(").unwrap(); | ||
| write_instruction(out, &line.instruction); | ||
| writeln!(out, ")").unwrap(); | ||
| } | ||
| Item::Block(block) => { | ||
| write!(out, "{pad}(").unwrap(); | ||
| write_instruction(out, &block.block_instruction); | ||
| writeln!(out).unwrap(); | ||
| for item in &block.items { | ||
| write_item(out, item, indent + 1); | ||
| } | ||
| writeln!(out, "{pad})").unwrap(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn write_instruction(out: &mut impl Write, instr: &Instruction) { | ||
| write!(out, "{}", instr.name).unwrap(); | ||
| if let Some(tag) = &instr.tag { | ||
| write!(out, "[{}]", tag).unwrap(); | ||
| } | ||
| if !instr.args.is_empty() { | ||
| for arg in &instr.args { | ||
| write!(out, " {}", arg).unwrap(); | ||
| } | ||
| } | ||
| for target in &instr.targets { | ||
| write!(out, " ").unwrap(); | ||
| write_target(out, target); | ||
| } | ||
| } | ||
|
|
||
| fn write_target(out: &mut impl Write, target: &Target) { | ||
| match &target.kind { | ||
| TargetKind::Qubit { negated, value } => { | ||
| if *negated { | ||
| write!(out, "!").unwrap(); | ||
| } | ||
| write!(out, "{}", value).unwrap(); | ||
| } | ||
| TargetKind::MeasurementRecord { value } => write!(out, "rec[-{}]", value).unwrap(), | ||
| TargetKind::SweepBit { value } => write!(out, "sweep[{}]", value).unwrap(), | ||
| TargetKind::Pauli { | ||
| negated, | ||
| pauli, | ||
| value, | ||
| } => { | ||
| if *negated { | ||
| write!(out, "!").unwrap(); | ||
| } | ||
| let p = match pauli { | ||
| Pauli::X => "X", | ||
| Pauli::Y => "Y", | ||
| Pauli::Z => "Z", | ||
| }; | ||
| write!(out, "{}{}", p, value).unwrap(); | ||
| } | ||
| TargetKind::Combiner => write!(out, "*").unwrap(), | ||
| } | ||
| } | ||
|
|
||
| fn main() { | ||
| let stim_code = | ||
| fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim"); | ||
|
|
||
| let circuit = parse(&stim_code); | ||
|
|
||
| let mut out = | ||
| fs::File::create("examples/parse_output.txt").expect("Failed to create output file"); | ||
| write_circuit(&mut out, &circuit); | ||
|
|
||
| println!("Wrote examples/parse_output.txt"); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| use enum_iterator::Sequence; | ||
| use qsc_data_structures::span::Span; | ||
| use std::str::CharIndices; | ||
| use std::{ | ||
| fmt::{self, Display, Formatter}, | ||
| iter::Peekable, | ||
| }; | ||
|
|
||
| #[derive(Clone, Copy, Debug, Eq, PartialEq)] | ||
| pub struct Token { | ||
| pub kind: TokenKind, | ||
| pub span: Span, | ||
| } | ||
|
|
||
| #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] | ||
| pub enum TokenKind { | ||
| Newline, // \n | ||
| Uint, // unsigned integers | ||
| Double, // floating-point numbers | ||
| InstructionName, // H, X, CNOT, etc. | ||
| Rec, // rec[- ...] | ||
| Sweep, // sweep[...] | ||
| Tag, // "[...]" | ||
| Open(Delim), // ( { | ||
| Close(Delim), // ) } | ||
| Star, // * | ||
| Bang, // ! | ||
| Comma, // , | ||
| Unknown, // unknown token | ||
| } | ||
|
|
||
| impl Display for TokenKind { | ||
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
| match self { | ||
| TokenKind::Newline => f.write_str("newline"), | ||
| TokenKind::Uint => f.write_str("uint"), | ||
| TokenKind::Double => f.write_str("double"), | ||
| TokenKind::InstructionName => f.write_str("instruction_name"), | ||
| TokenKind::Rec => f.write_str("rec"), | ||
| TokenKind::Sweep => f.write_str("sweep"), | ||
| TokenKind::Tag => write!(f, "tag"), | ||
| TokenKind::Open(delim) => write!(f, "open({})", delim), | ||
| TokenKind::Close(delim) => write!(f, "close({})", delim), | ||
| TokenKind::Star => write!(f, "star"), | ||
| TokenKind::Bang => write!(f, "bang"), | ||
| TokenKind::Comma => write!(f, "comma"), | ||
| TokenKind::Unknown => write!(f, "unknown"), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] | ||
| pub enum Delim { | ||
| Paren, | ||
| Brace, | ||
| } | ||
|
|
||
| impl Display for Delim { | ||
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
| match self { | ||
| Delim::Paren => f.write_str("paren"), | ||
| Delim::Brace => f.write_str("brace"), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub struct Lexer<'a> { | ||
| input: &'a str, | ||
| input_len: u32, | ||
| chars: Peekable<CharIndices<'a>>, | ||
| } | ||
|
|
||
| impl<'a> Lexer<'a> { | ||
| pub fn new(input: &'a str) -> Self { | ||
| Self { | ||
| input, | ||
| input_len: input | ||
| .len() | ||
| .try_into() | ||
| .expect("input length should fit into u32"), | ||
| chars: input.char_indices().peekable(), | ||
| } | ||
| } | ||
|
|
||
| fn eat_while(&mut self, mut f: impl Fn(char) -> bool) { | ||
| while self.chars.next_if(|i| f(i.1)).is_some() {} | ||
| } | ||
|
|
||
| fn whitespace(&mut self) { | ||
| self.eat_while(char::is_whitespace); | ||
| } | ||
|
|
||
| fn comment(&mut self) { | ||
| self.eat_while(|c| c != '\n'); | ||
| self.whitespace(); | ||
| } | ||
|
|
||
| fn scan_number(&mut self) -> TokenKind { | ||
| self.eat_while(|c| c.is_ascii_digit()); | ||
| let mut is_double = false; | ||
| if self.chars.next_if(|(_, c)| *c == '.').is_some() { | ||
| self.eat_while(|c| c.is_ascii_digit()); | ||
| is_double = true; | ||
| } | ||
| if self | ||
| .chars | ||
| .next_if(|(_, c)| *c == 'e' || *c == 'E') | ||
| .is_some() | ||
| { | ||
| // scientific notation | ||
| self.chars.next_if(|(_, c)| *c == '+' || *c == '-'); | ||
| self.eat_while(|c| c.is_ascii_digit()); | ||
| is_double = true; | ||
| } | ||
| if is_double { | ||
| TokenKind::Double | ||
| } else { | ||
| TokenKind::Uint | ||
| } | ||
| } | ||
|
|
||
| fn scan_identifier(&mut self, lo: usize) -> TokenKind { | ||
| self.eat_while(|c| c.is_alphanumeric() || c == '_'); | ||
| let hi: usize = self | ||
| .chars | ||
| .peek() | ||
| .map_or(self.input_len as usize, |(i, _)| *i); | ||
| // TODO: What if some identifier starts with "rec" but is not a rec token? | ||
Check noticeCode scanning / devskim A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note
Suspicious comment
|
||
|
|
||
| match &self.input[lo..hi] { | ||
| "rec" => { | ||
| self.eat_while(|c| c != ']'); | ||
| self.chars.next_if(|(_, c)| *c == ']'); | ||
| TokenKind::Rec | ||
| } | ||
| "sweep" => { | ||
| self.eat_while(|c| c != ']'); | ||
| self.chars.next_if(|(_, c)| *c == ']'); | ||
| TokenKind::Sweep | ||
| } | ||
| _ => TokenKind::InstructionName, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl Iterator for Lexer<'_> { | ||
| type Item = Token; | ||
|
|
||
| fn next(&mut self) -> Option<Self::Item> { | ||
| use Delim::{Brace, Paren}; | ||
| let (offset, c) = self.chars.next()?; | ||
| let lo: u32 = offset.try_into().expect("offset should fit into u32"); | ||
| let token_kind = match c { | ||
| '\n' => { | ||
| self.whitespace(); | ||
| TokenKind::Newline | ||
| } | ||
| ' ' | '\t' => { | ||
| self.whitespace(); | ||
| return self.next(); | ||
| } | ||
| '#' => { | ||
| if self.chars.next_if(|(_, c)| *c == '!').is_some() { | ||
| self.eat_while(|c| !c.is_whitespace()); | ||
| TokenKind::InstructionName | ||
| } else { | ||
| self.comment(); | ||
| return self.next(); | ||
| } | ||
| } | ||
| '(' => TokenKind::Open(Paren), | ||
| ')' => TokenKind::Close(Paren), | ||
| '{' => TokenKind::Open(Brace), | ||
| '}' => TokenKind::Close(Brace), | ||
| '*' => TokenKind::Star, | ||
| '!' => TokenKind::Bang, | ||
| ',' => TokenKind::Comma, | ||
| '0'..='9' => self.scan_number(), | ||
| 'A'..='Z' | 'a'..='z' => self.scan_identifier(lo as usize), | ||
| '[' => { | ||
| self.eat_while(|c| c != ']'); | ||
| self.chars.next_if(|(_, c)| *c == ']'); | ||
| TokenKind::Tag | ||
| } | ||
| _ => TokenKind::Unknown, | ||
| }; | ||
|
|
||
| let hi: u32 = self.chars.peek().map_or(self.input_len, |(i, _)| *i as u32); | ||
| Some(Token { | ||
| kind: token_kind, | ||
| span: Span { lo, hi }, | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| //TODO: Deal with escaping | ||
Check noticeCode scanning / devskim A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note
Suspicious comment
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| pub mod lex; | ||
| pub mod parser; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see this file anywhere in the PR. Also, most of our tests use inline string of input/output to verify, rather than external files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is for my manual testing, just so I can parse whole files at a time without having to edit the code every time. I will make sure to remove it before the official PR