//! Macro expansion for the utf8 parser state table use std::fmt; use syntex::Registry; use syntex_syntax::ast::{self, ExprKind, Arm, Expr, PatKind, LitKind, Pat}; use syntex_syntax::codemap::Span; use syntex_syntax::ext::base::{ExtCtxt, MacEager, MacResult, DummyResult}; use syntex_syntax::ext::build::AstBuilder; use syntex_syntax::parse::token::{Token, DelimToken}; use syntex_syntax::parse::parser::Parser; use syntex_syntax::parse::PResult; use syntex_syntax::ptr::P; use syntex_syntax::tokenstream::TokenTree; #[path="../../../utf8parse/src/types.rs"] mod types; use self::types::{State, Action, pack}; pub fn register(registry: &mut Registry) { registry.add_macro("utf8_state_table", expand_state_table); } fn state_from_str(s: &S) -> Result where S: AsRef { Ok(match s.as_ref() { "State::Ground" => State::Ground, "State::Tail3" => State::Tail3, "State::Tail2" => State::Tail2, "State::Tail1" => State::Tail1, "State::U3_2_e0" => State::U3_2_e0, "State::U3_2_ed" => State::U3_2_ed, "State::Utf8_4_3_f0" => State::Utf8_4_3_f0, "State::Utf8_4_3_f4" => State::Utf8_4_3_f4, _ => return Err(()) }) } fn action_from_str(s: &S) -> Result where S: AsRef { Ok(match s.as_ref() { "Action::InvalidSequence" => Action::InvalidSequence, "Action::EmitByte" => Action::EmitByte, "Action::SetByte1" => Action::SetByte1, "Action::SetByte2" => Action::SetByte2, "Action::SetByte2Top" => Action::SetByte2Top, "Action::SetByte3" => Action::SetByte3, "Action::SetByte3Top" => Action::SetByte3Top, "Action::SetByte4" => Action::SetByte4, _ => return Err(()) }) } fn parse_table_input_mappings<'a>(parser: &mut Parser<'a>) -> PResult<'a, Vec> { // Must start on open brace try!(parser.expect(&Token::OpenDelim(DelimToken::Brace))); let mut arms: Vec = Vec::new(); while parser.token != Token::CloseDelim(DelimToken::Brace) { match parser.parse_arm() { Ok(arm) => arms.push(arm), Err(e) => { // Recover by skipping to the end of the block. return Err(e); } } } // Consume the closing brace parser.bump(); Ok(arms) } /// Expressions describing state transitions and actions #[derive(Debug)] struct TableDefinitionExprs { state_expr: P, mapping_arms: Vec, } fn state_from_expr(expr: P, cx: &mut ExtCtxt) -> Result { let s = match expr.node { ExprKind::Path(ref _qself, ref path) => { path.to_string() }, _ => { cx.span_err(expr.span, "expected State"); return Err(()) } }; state_from_str(&s).map_err(|_| { cx.span_err(expr.span, "expected State"); () }) } fn u8_lit_from_expr(expr: &Expr, cx: &mut ExtCtxt) -> Result { static MSG: &'static str = "expected u8 int literal"; match expr.node { ExprKind::Lit(ref lit) => { match lit.node { LitKind::Int(val, _) => { Ok(val as u8) }, _ => { cx.span_err(lit.span, MSG); return Err(()); } } }, _ => { cx.span_err(expr.span, MSG); return Err(()); } } } fn input_mapping_from_arm(arm: Arm, cx: &mut ExtCtxt) -> Result { let Arm { pats, body, .. } = arm; let input = try!(InputDefinition::from_pat(&pats[0], cx)); let transition = try!(Transition::from_expr(&body, cx)); Ok(InputMapping { input: input, transition: transition, }) } /// What happens when certain input is received #[derive(Copy, Clone)] enum Transition { State(State), Action(Action), StateAction(State, Action), } impl fmt::Debug for Transition { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { Transition::State(state) => try!(write!(f, "State({:?})", state)), Transition::Action(action) => try!(write!(f, "Action({:?})", action)), Transition::StateAction(state, action) => { try!(write!(f, "StateAction({:?}, {:?})", state, action)); } } write!(f, " -> {:?}", self.pack_u8()) } } impl Transition { // State is stored in the top 4 bits fn pack_u8(&self) -> u8 { match *self { Transition::State(state) => pack(state, Action::InvalidSequence), Transition::Action(action) => pack(State::Ground, action), Transition::StateAction(state, action) => pack(state, action), } } } impl Transition { fn from_expr(expr: &Expr, cx: &mut ExtCtxt) -> Result { match expr.node { ExprKind::Tup(ref tup_exprs) => { let mut action = None; let mut state = None; for tup_expr in tup_exprs { if let ExprKind::Path(_, ref path) = tup_expr.node { let path_str = path.to_string(); if path_str.starts_with('A') { action = Some(try!(action_from_str(&path_str) .map_err(|_| { cx.span_err(expr.span, "invalid action"); }))); } else { state = Some(try!(state_from_str(&path_str) .map_err(|_| { cx.span_err(expr.span, "invalid state"); }))); } } } match (action, state) { (Some(action), Some(state)) => Ok(Transition::StateAction(state, action)), (None, Some(state)) => Ok(Transition::State(state)), (Some(action), None) => Ok(Transition::Action(action)), _ => { cx.span_err(expr.span, "expected Action and/or State"); Err(()) } } }, ExprKind::Path(_, ref path) => { // Path can be Action or State let path_str = path.to_string(); if path_str.starts_with('A') { let action = try!(action_from_str(&path_str) .map_err(|_| { cx.span_err(expr.span, "invalid action"); })); Ok(Transition::Action(action)) } else { let state = try!(state_from_str(&path_str) .map_err(|_| { cx.span_err(expr.span, "invalid state"); })); Ok(Transition::State(state)) } }, _ => { cx.span_err(expr.span, "expected Action and/or State"); Err(()) } } } } #[derive(Debug)] enum InputDefinition { Specific(u8), Range { start: u8, end: u8 } } impl InputDefinition { fn from_pat(pat: &Pat, cx: &mut ExtCtxt) -> Result { Ok(match pat.node { PatKind::Lit(ref lit_expr) => { InputDefinition::Specific(try!(u8_lit_from_expr(&lit_expr, cx))) }, PatKind::Range(ref start_expr, ref end_expr) => { InputDefinition::Range { start: try!(u8_lit_from_expr(start_expr, cx)), end: try!(u8_lit_from_expr(end_expr, cx)), } }, _ => { cx.span_err(pat.span, "expected literal or range expression"); return Err(()) } }) } } #[derive(Debug)] struct InputMapping { input: InputDefinition, transition: Transition, } #[derive(Debug)] struct TableDefinition { state: State, mappings: Vec, } fn parse_raw_definitions( definitions: Vec, cx: &mut ExtCtxt ) -> Result, ()> { let mut out = Vec::new(); for raw in definitions { let TableDefinitionExprs { state_expr, mapping_arms } = raw; let state = try!(state_from_expr(state_expr, cx)); let mut mappings = Vec::new(); for arm in mapping_arms { mappings.push(try!(input_mapping_from_arm(arm, cx))); } out.push(TableDefinition { state: state, mappings: mappings, }) } Ok(out) } fn parse_table_definition<'a>(parser: &mut Parser<'a>) -> PResult<'a, TableDefinitionExprs> { let state_expr = try!(parser.parse_expr()); try!(parser.expect(&Token::FatArrow)); let mappings = try!(parse_table_input_mappings(parser)); Ok(TableDefinitionExprs { state_expr: state_expr, mapping_arms: mappings }) } fn parse_table_definition_list<'a>(parser: &mut Parser<'a>) -> PResult<'a, Vec> { let mut definitions = Vec::new(); while parser.token != Token::Eof { definitions.push(try!(parse_table_definition(parser))); parser.eat(&Token::Comma); } Ok(definitions) } fn build_state_tables(defs: T) -> [[u8; 256]; 8] where T: AsRef<[TableDefinition]> { let mut result = [[0u8; 256]; 8]; for def in defs.as_ref() { let state = def.state; let state = state as u8; let transitions = &mut result[state as usize]; for mapping in &def.mappings { let trans = mapping.transition.pack_u8(); match mapping.input { InputDefinition::Specific(idx) => { transitions[idx as usize] = trans; }, InputDefinition::Range { start, end } => { for idx in start..end { transitions[idx as usize] = trans; } transitions[end as usize] = trans; }, } } } result } fn build_table_ast(cx: &mut ExtCtxt, sp: Span, table: [[u8; 256]; 8]) -> P { let table = table.iter() .map(|list| { let exprs = list.iter() .map(|num| cx.expr_u8(sp, *num)) .collect(); cx.expr_vec(sp, exprs) }) .collect(); cx.expr_vec(sp, table) } fn expand_state_table<'cx>( cx: &'cx mut ExtCtxt, sp: Span, args: &[TokenTree]) -> Box { macro_rules! ptry { ($pres:expr) => { match $pres { Ok(val) => val, Err(mut err) => { err.emit(); return DummyResult::any(sp); } } } } // Parse the lookup spec let mut parser: Parser = cx.new_parser_from_tts(args); let definitions = ptry!(parse_table_definition_list(&mut parser)); let definitions = match parse_raw_definitions(definitions, cx) { Ok(definitions) => definitions, Err(_) => return DummyResult::any(sp), }; let table = build_state_tables(&definitions); let ast = build_table_ast(cx, sp, table); MacEager::expr(ast) }