#![no_std]
extern crate utf8parse as utf8;
use core::mem;
mod table;
mod definitions;
use definitions::{Action, State, unpack};
use table::{EXIT_ACTIONS, ENTRY_ACTIONS, STATE_CHANGE};
impl State {
#[inline(always)]
pub fn exit_action(&self) -> Action {
unsafe {
*EXIT_ACTIONS.get_unchecked(*self as usize)
}
}
#[inline(always)]
pub fn entry_action(&self) -> Action {
unsafe {
*ENTRY_ACTIONS.get_unchecked(*self as usize)
}
}
}
const MAX_INTERMEDIATES: usize = 2;
const MAX_OSC_RAW: usize = 1024;
const MAX_PARAMS: usize = 16;
struct VtUtf8Receiver<'a, P: Perform + 'a>(&'a mut P, &'a mut State);
impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> {
fn codepoint(&mut self, c: char) {
self.0.print(c);
*self.1 = State::Ground;
}
fn invalid_sequence(&mut self) {
self.0.print('�');
*self.1 = State::Ground;
}
}
pub struct Parser {
state: State,
intermediates: [u8; MAX_INTERMEDIATES],
intermediate_idx: usize,
params: [i64; MAX_PARAMS],
param: i64,
collecting_param: bool,
num_params: usize,
osc_raw: [u8; MAX_OSC_RAW],
osc_params: [(usize, usize); MAX_PARAMS],
osc_idx: usize,
osc_num_params: usize,
ignoring: bool,
utf8_parser: utf8::Parser,
}
impl Parser {
pub fn new() -> Parser {
Parser {
state: State::Ground,
intermediates: [0u8; MAX_INTERMEDIATES],
intermediate_idx: 0,
params: [0i64; MAX_PARAMS],
param: 0,
collecting_param: false,
num_params: 0,
osc_raw: [0; MAX_OSC_RAW],
osc_params: [(0, 0); MAX_PARAMS],
osc_idx: 0,
osc_num_params: 0,
ignoring: false,
utf8_parser: utf8::Parser::new(),
}
}
#[inline]
fn params(&self) -> &[i64] {
&self.params[..self.num_params]
}
#[inline]
fn intermediates(&self) -> &[u8] {
&self.intermediates[..self.intermediate_idx]
}
#[inline]
pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
if let State::Utf8 = self.state {
self.process_utf8(performer, byte);
return;
}
let mut change = STATE_CHANGE[State::Anywhere as usize][byte as usize];
if change == 0 {
change = STATE_CHANGE[self.state as usize][byte as usize];
}
let (state, action) = unpack(change);
self.perform_state_change(performer, state, action, byte);
}
#[inline]
fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
where P: Perform
{
let mut receiver = VtUtf8Receiver(performer, &mut self.state);
let utf8_parser = &mut self.utf8_parser;
utf8_parser.advance(&mut receiver, byte);
}
#[inline]
fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
where P: Perform
{
macro_rules! maybe_action {
($action:expr, $arg:expr) => {
match $action {
Action::None => (),
action => {
self.perform_action(performer, action, $arg);
},
}
}
}
match state {
State::Anywhere => {
self.perform_action(performer, action, byte);
},
state => {
let exit_action = self.state.exit_action();
maybe_action!(exit_action, 0);
maybe_action!(action, byte);
maybe_action!(state.entry_action(), 0);
self.state = state;
}
}
}
#[inline]
fn osc_dispatch<P: Perform>(&self, performer: &mut P) {
let mut slices: [&[u8]; MAX_PARAMS] = unsafe { mem::uninitialized() };
for i in 0..self.osc_num_params {
let indices = self.osc_params[i];
slices[i] = &self.osc_raw[indices.0..indices.1];
}
performer.osc_dispatch(
&slices[..self.osc_num_params],
);
}
#[inline]
fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
match action {
Action::Print => performer.print(byte as char),
Action::Execute => performer.execute(byte),
Action::Hook => {
performer.hook(
self.params(),
self.intermediates(),
self.ignoring,
);
},
Action::Put => performer.put(byte),
Action::OscStart => {
self.osc_idx = 0;
self.osc_num_params = 0;
},
Action::OscPut => {
let idx = self.osc_idx;
if idx == self.osc_raw.len() {
return;
}
if byte == b';' {
let param_idx = self.osc_num_params;
match param_idx {
MAX_PARAMS => return,
0 => {
self.osc_params[param_idx] = (0, idx);
},
_ => {
let prev = self.osc_params[param_idx - 1];
let begin = prev.1;
self.osc_params[param_idx] = (begin, idx);
}
}
self.osc_num_params += 1;
} else {
self.osc_raw[idx] = byte;
self.osc_idx += 1;
}
},
Action::OscEnd => {
let param_idx = self.osc_num_params;
let idx = self.osc_idx;
match param_idx {
MAX_PARAMS => (),
0 => {
self.osc_params[param_idx] = (0, idx);
self.osc_num_params += 1;
},
_ => {
let prev = self.osc_params[param_idx - 1];
let begin = prev.1;
self.osc_params[param_idx] = (begin, idx);
self.osc_num_params += 1;
}
}
self.osc_dispatch(performer);
},
Action::Unhook => performer.unhook(),
Action::CsiDispatch => {
if self.collecting_param {
let idx = self.num_params;
self.params[idx] = self.param;
self.num_params += 1;
}
performer.csi_dispatch(
self.params(),
self.intermediates(),
self.ignoring,
byte as char
);
self.num_params = 0;
self.param = 0;
self.collecting_param = false;
}
Action::EscDispatch => {
performer.esc_dispatch(
self.params(),
self.intermediates(),
self.ignoring,
byte
);
},
Action::Ignore | Action::None => (),
Action::Collect => {
if self.intermediate_idx == MAX_INTERMEDIATES {
self.ignoring = true;
} else {
self.intermediates[self.intermediate_idx] = byte;
self.intermediate_idx += 1;
}
},
Action::Param => {
if byte == b';' {
let idx = self.num_params;
if idx == MAX_PARAMS - 1 {
return;
}
self.params[idx] = self.param;
self.param = 0;
self.num_params += 1;
self.collecting_param = false;
} else {
self.param = self.param.saturating_mul(10);
self.param = self.param.saturating_add((byte - b'0') as i64);
self.collecting_param = true;
}
},
Action::Clear => {
self.intermediate_idx = 0;
self.num_params = 0;
self.ignoring = false;
},
Action::BeginUtf8 => {
self.process_utf8(performer, byte);
},
}
}
}
pub trait Perform {
fn print(&mut self, char);
fn execute(&mut self, byte: u8);
fn hook(&mut self, params: &[i64], intermediates: &[u8], ignore: bool);
fn put(&mut self, byte: u8);
fn unhook(&mut self);
fn osc_dispatch(&mut self, params: &[&[u8]]);
fn csi_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, char);
fn esc_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, byte: u8);
}
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
mod tests {
use std::vec::Vec;
use super::{Parser, Perform};
use core::i64;
static OSC_BYTES: &'static [u8] = &[0x1b, 0x5d,
b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l',
b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o',
b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y',
0x07
];
#[derive(Default)]
struct OscDispatcher {
dispatched_osc: bool,
params: Vec<Vec<u8>>,
}
impl Perform for OscDispatcher {
fn print(&mut self, _: char) {}
fn execute(&mut self, _byte: u8) {}
fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
fn put(&mut self, _byte: u8) {}
fn unhook(&mut self) {}
fn osc_dispatch(&mut self, params: &[&[u8]]) {
self.dispatched_osc = true;
self.params = params.iter().map(|p| p.to_vec()).collect();
}
fn csi_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {}
fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}
#[derive(Default)]
struct CsiDispatcher {
dispatched_csi: bool,
params: Vec<Vec<i64>>,
}
impl Perform for CsiDispatcher {
fn print(&mut self, _: char) {}
fn execute(&mut self, _byte: u8) {}
fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
fn put(&mut self, _byte: u8) {}
fn unhook(&mut self) {}
fn osc_dispatch(&mut self, _params: &[&[u8]]) { }
fn csi_dispatch(&mut self, params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {
self.dispatched_csi = true;
self.params.push(params.to_vec());
}
fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}
#[test]
fn parse_osc() {
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
let mut parser = Parser::new();
for byte in OSC_BYTES {
parser.advance(&mut dispatcher, *byte);
}
assert!(dispatcher.dispatched_osc);
assert_eq!(dispatcher.params.len(), 2);
assert_eq!(dispatcher.params[0], &OSC_BYTES[2..3]);
assert_eq!(dispatcher.params[1], &OSC_BYTES[4..(OSC_BYTES.len() - 1)]);
}
#[test]
fn parse_empty_osc() {
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
let mut parser = Parser::new();
for byte in &[0x1b, 0x5d, 0x07] {
parser.advance(&mut dispatcher, *byte);
}
assert!(dispatcher.dispatched_osc);
}
#[test]
fn parse_osc_max_params() {
use MAX_PARAMS;
static INPUT: &'static [u8] = b"\x1b];;;;;;;;;;;;;;;;;\x1b";
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
assert!(dispatcher.dispatched_osc);
assert_eq!(dispatcher.params.len(), MAX_PARAMS);
for param in dispatcher.params.iter() {
assert_eq!(param.len(), 0);
}
}
#[test]
fn parse_csi_max_params() {
use MAX_PARAMS;
static INPUT: &'static [u8] = b"\x1b[1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;p";
let mut dispatcher = CsiDispatcher::default();
assert!(!dispatcher.dispatched_csi);
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
assert!(dispatcher.dispatched_csi);
assert_eq!(dispatcher.params.len(), 1);
assert_eq!(dispatcher.params[0].len(), MAX_PARAMS);
}
#[test]
fn parse_semi_set_underline() {
let mut dispatcher = CsiDispatcher::default();
let mut parser = Parser::new();
for byte in b"\x1b[;4m" {
parser.advance(&mut dispatcher, *byte);
}
assert_eq!(dispatcher.params[0], &[0, 4]);
}
#[test]
fn parse_long_csi_param() {
static INPUT: &'static [u8] = b"\x1b[9223372036854775808m";
let mut dispatcher = CsiDispatcher::default();
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
assert_eq!(dispatcher.params[0], &[i64::MAX as i64]);
}
#[test]
fn parse_osc_with_utf8_arguments() {
static INPUT: &'static [u8] = &[
0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27,
0xc2, 0xaf, 0x5c, 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f,
0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, 0x20, 0x73, 0x6c, 0x65, 0x65,
0x70, 0x20, 0x31, 0x07
];
let mut dispatcher = OscDispatcher { params: vec![], dispatched_osc: false };
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
assert_eq!(dispatcher.params[0], &[b'2']);
assert_eq!(dispatcher.params[1], &INPUT[5..(INPUT.len() - 1)]);
}
}