1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
//! Types supporting the UTF-8 parser #![allow(non_camel_case_types)] use core::mem; /// States the parser can be in. /// /// There is a state for each initial input of the 3 and 4 byte sequences since /// the following bytes are subject to different conditions than a tail byte. #[allow(dead_code)] #[derive(Debug, Copy, Clone)] pub enum State { /// Ground state; expect anything Ground = 0, /// 3 tail bytes Tail3 = 1, /// 2 tail bytes Tail2 = 2, /// 1 tail byte Tail1 = 3, /// UTF8-3 starting with E0 U3_2_e0 = 4, /// UTF8-3 starting with ED U3_2_ed = 5, /// UTF8-4 starting with F0 Utf8_4_3_f0 = 6, /// UTF8-4 starting with F4 Utf8_4_3_f4 = 7, } /// Action to take when receiving a byte #[allow(dead_code)] #[derive(Debug, Copy, Clone)] pub enum Action { /// Unexpected byte; sequence is invalid InvalidSequence = 0, /// Received valid 7-bit ASCII byte which can be directly emitted. EmitByte = 1, /// Set the bottom continuation byte SetByte1 = 2, /// Set the 2nd-from-last continuation byte SetByte2 = 3, /// Set the 2nd-from-last byte which is part of a two byte sequence SetByte2Top = 4, /// Set the 3rd-from-last continuation byte SetByte3 = 5, /// Set the 3rd-from-last byte which is part of a three byte sequence SetByte3Top = 6, /// Set the top byte of a four byte sequence. SetByte4 = 7, } /// Convert a state and action to a u8 /// /// State will be the bottom 4 bits and action the top 4 #[inline] #[allow(dead_code)] pub fn pack(state: State, action: Action) -> u8 { ((action as u8) << 4) | (state as u8) } /// Convert a u8 to a state and action /// /// # Unsafety /// /// If this function is called with a byte that wasn't encoded with the `pack` /// function in this module, there is no guarantee that a valid state and action /// can be produced. #[inline] pub unsafe fn unpack(val: u8) -> (State, Action) { ( // State is stored in bottom 4 bits mem::transmute(val & 0x0f), // Action is stored in top 4 bits mem::transmute(val >> 4), ) }