Skip to content
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ members = [
"source/compiler/qsc_hir",
"source/compiler/qsc_openqasm_compiler",
"source/compiler/qsc_openqasm_parser",
"source/compiler/qsc_stim_parser",
"source/compiler/qsc_linter",
"source/compiler/qsc_lowerer",
"source/compiler/qsc_parse",
Expand Down
10 changes: 10 additions & 0 deletions source/compiler/qsc_stim_parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "qsc_stim_parser"
edition.workspace = true
version.workspace = true

[dependencies]
enum-iterator.workspace = true
qsc_data_structures = { path = "../qsc_data_structures" }

[dev-dependencies]
33 changes: 33 additions & 0 deletions source/compiler/qsc_stim_parser/examples/lex_stim.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use qsc_stim_parser::lex::Lexer;
use std::fs;
use std::io::Write;

fn main() {
let stim_code =
fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim");

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see this file anywhere in the PR. Also, most of our tests use inline string of input/output to verify, rather than external files.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is for my manual testing, just so I can parse whole files at a time without having to edit the code every time. I will make sure to remove it before the official PR


let mut out =
fs::File::create("examples/lex_output.txt").expect("Failed to create output file");

writeln!(out, "{:<20} {:<10} {:}", "TOKEN KIND", "SPAN", "TEXT").unwrap();
writeln!(out, "{:-<50}", "").unwrap();

let lexer = Lexer::new(&stim_code);
for token in lexer {
let text = &stim_code[token.span.lo as usize..token.span.hi as usize];
let text_display = match token.kind {
qsc_stim_parser::lex::TokenKind::Newline => "\\n".to_string(),
_ => format!("{:?}", text),
};
writeln!(
out,
"{:<20} {:<10} {}",
token.kind.to_string(),
format!("{}..{}", token.span.lo, token.span.hi),
text_display
)
.unwrap();
}

println!("Wrote examples/lex_output.txt");
}
89 changes: 89 additions & 0 deletions source/compiler/qsc_stim_parser/examples/parse_stim.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use qsc_stim_parser::parser::{Circuit, Instruction, Item, Pauli, Target, TargetKind, parse};
use std::fs;
use std::io::Write;

fn write_circuit(out: &mut impl Write, circuit: &Circuit) {
writeln!(out, "(circuit").unwrap();
for item in &circuit.items {
write_item(out, item, 1);
}
writeln!(out, ")").unwrap();
}

fn write_item(out: &mut impl Write, item: &Item, indent: usize) {
let pad = " ".repeat(indent);
match item {
Item::Line(line) => {
write!(out, "{pad}(").unwrap();
write_instruction(out, &line.instruction);
writeln!(out, ")").unwrap();
}
Item::Block(block) => {
write!(out, "{pad}(").unwrap();
write_instruction(out, &block.block_instruction);
writeln!(out).unwrap();
for item in &block.items {
write_item(out, item, indent + 1);
}
writeln!(out, "{pad})").unwrap();
}
}
}

fn write_instruction(out: &mut impl Write, instr: &Instruction) {
write!(out, "{}", instr.name).unwrap();
if let Some(tag) = &instr.tag {
write!(out, "[{}]", tag).unwrap();
}
if !instr.args.is_empty() {
for arg in &instr.args {
write!(out, " {}", arg).unwrap();
}
}
for target in &instr.targets {
write!(out, " ").unwrap();
write_target(out, target);
}
}

fn write_target(out: &mut impl Write, target: &Target) {
match &target.kind {
TargetKind::Qubit { negated, value } => {
if *negated {
write!(out, "!").unwrap();
}
write!(out, "{}", value).unwrap();
}
TargetKind::MeasurementRecord { value } => write!(out, "rec[-{}]", value).unwrap(),
TargetKind::SweepBit { value } => write!(out, "sweep[{}]", value).unwrap(),
TargetKind::Pauli {
negated,
pauli,
value,
} => {
if *negated {
write!(out, "!").unwrap();
}
let p = match pauli {
Pauli::X => "X",
Pauli::Y => "Y",
Pauli::Z => "Z",
};
write!(out, "{}{}", p, value).unwrap();
}
TargetKind::Combiner => write!(out, "*").unwrap(),
}
}

fn main() {
let stim_code =
fs::read_to_string("examples/example.stim").expect("Failed to read examples/example.stim");

let circuit = parse(&stim_code);

let mut out =
fs::File::create("examples/parse_output.txt").expect("Failed to create output file");
write_circuit(&mut out, &circuit);

println!("Wrote examples/parse_output.txt");
}
195 changes: 195 additions & 0 deletions source/compiler/qsc_stim_parser/src/lex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
use enum_iterator::Sequence;
use qsc_data_structures::span::Span;
use std::str::CharIndices;
use std::{
fmt::{self, Display, Formatter},
iter::Peekable,
};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}

#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)]
pub enum TokenKind {
Newline, // \n
Uint, // unsigned integers
Double, // floating-point numbers
InstructionName, // H, X, CNOT, etc.
Rec, // rec[- ...]
Sweep, // sweep[...]
Tag, // "[...]"
Open(Delim), // ( {
Close(Delim), // ) }
Star, // *
Bang, // !
Comma, // ,
Unknown, // unknown token
}

impl Display for TokenKind {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
TokenKind::Newline => f.write_str("newline"),
TokenKind::Uint => f.write_str("uint"),
TokenKind::Double => f.write_str("double"),
TokenKind::InstructionName => f.write_str("instruction_name"),
TokenKind::Rec => f.write_str("rec"),
TokenKind::Sweep => f.write_str("sweep"),
TokenKind::Tag => write!(f, "tag"),
TokenKind::Open(delim) => write!(f, "open({})", delim),
TokenKind::Close(delim) => write!(f, "close({})", delim),
TokenKind::Star => write!(f, "star"),
TokenKind::Bang => write!(f, "bang"),
TokenKind::Comma => write!(f, "comma"),
TokenKind::Unknown => write!(f, "unknown"),
}
}
}

#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)]
pub enum Delim {
Paren,
Brace,
}

impl Display for Delim {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Delim::Paren => f.write_str("paren"),
Delim::Brace => f.write_str("brace"),
}
}
}

pub struct Lexer<'a> {
input: &'a str,
input_len: u32,
chars: Peekable<CharIndices<'a>>,
}

impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
input_len: input
.len()
.try_into()
.expect("input length should fit into u32"),
chars: input.char_indices().peekable(),
}
}

fn eat_while(&mut self, mut f: impl Fn(char) -> bool) {
while self.chars.next_if(|i| f(i.1)).is_some() {}
}

fn whitespace(&mut self) {
self.eat_while(char::is_whitespace);
}

fn comment(&mut self) {
self.eat_while(|c| c != '\n');
self.whitespace();
}

fn scan_number(&mut self) -> TokenKind {
self.eat_while(|c| c.is_ascii_digit());
let mut is_double = false;
if self.chars.next_if(|(_, c)| *c == '.').is_some() {
self.eat_while(|c| c.is_ascii_digit());
is_double = true;
}
if self
.chars
.next_if(|(_, c)| *c == 'e' || *c == 'E')
.is_some()
{
// scientific notation
self.chars.next_if(|(_, c)| *c == '+' || *c == '-');
self.eat_while(|c| c.is_ascii_digit());
is_double = true;
}
if is_double {
TokenKind::Double
} else {
TokenKind::Uint
}
}

fn scan_identifier(&mut self, lo: usize) -> TokenKind {
self.eat_while(|c| c.is_alphanumeric() || c == '_');
let hi: usize = self
.chars
.peek()
.map_or(self.input_len as usize, |(i, _)| *i);
// TODO: What if some identifier starts with "rec" but is not a rec token?

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
match &self.input[lo..hi] {
"rec" => {
self.eat_while(|c| c != ']');
self.chars.next_if(|(_, c)| *c == ']');
TokenKind::Rec
}
"sweep" => {
self.eat_while(|c| c != ']');
self.chars.next_if(|(_, c)| *c == ']');
TokenKind::Sweep
}
_ => TokenKind::InstructionName,
}
}
}

impl Iterator for Lexer<'_> {
type Item = Token;

fn next(&mut self) -> Option<Self::Item> {
use Delim::{Brace, Paren};
let (offset, c) = self.chars.next()?;
let lo: u32 = offset.try_into().expect("offset should fit into u32");
let token_kind = match c {
'\n' => {
self.whitespace();
TokenKind::Newline
}
' ' | '\t' => {
self.whitespace();
return self.next();
}
'#' => {
if self.chars.next_if(|(_, c)| *c == '!').is_some() {
self.eat_while(|c| !c.is_whitespace());
TokenKind::InstructionName
} else {
self.comment();
return self.next();
}
}
'(' => TokenKind::Open(Paren),
')' => TokenKind::Close(Paren),
'{' => TokenKind::Open(Brace),
'}' => TokenKind::Close(Brace),
'*' => TokenKind::Star,
'!' => TokenKind::Bang,
',' => TokenKind::Comma,
'0'..='9' => self.scan_number(),
'A'..='Z' | 'a'..='z' => self.scan_identifier(lo as usize),
'[' => {
self.eat_while(|c| c != ']');
self.chars.next_if(|(_, c)| *c == ']');
TokenKind::Tag
}
_ => TokenKind::Unknown,
};

let hi: u32 = self.chars.peek().map_or(self.input_len, |(i, _)| *i as u32);
Some(Token {
kind: token_kind,
span: Span { lo, hi },
})
}
}

//TODO: Deal with escaping

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
2 changes: 2 additions & 0 deletions source/compiler/qsc_stim_parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod lex;
pub mod parser;
Loading
Loading