Line data Source code
1 : //! Helpers for serializing escaped strings.
2 : //!
3 : //! ## License
4 : //!
5 : //! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L1514-L1552>
6 : //! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L2081-L2157>
7 : //! Licensed by David Tolnay under MIT or Apache-2.0.
8 : //!
9 : //! With modifications by Conrad Ludgate on behalf of Databricks.
10 :
11 : use std::fmt::{self, Write};
12 :
13 : /// Represents a character escape code in a type-safe manner.
14 : pub enum CharEscape {
15 : /// An escaped quote `"`
16 : Quote,
17 : /// An escaped reverse solidus `\`
18 : ReverseSolidus,
19 : // /// An escaped solidus `/`
20 : // Solidus,
21 : /// An escaped backspace character (usually escaped as `\b`)
22 : Backspace,
23 : /// An escaped form feed character (usually escaped as `\f`)
24 : FormFeed,
25 : /// An escaped line feed character (usually escaped as `\n`)
26 : LineFeed,
27 : /// An escaped carriage return character (usually escaped as `\r`)
28 : CarriageReturn,
29 : /// An escaped tab character (usually escaped as `\t`)
30 : Tab,
31 : /// An escaped ASCII plane control character (usually escaped as
32 : /// `\u00XX` where `XX` are two hex characters)
33 : AsciiControl(u8),
34 : }
35 :
36 : impl CharEscape {
37 : #[inline]
38 5 : fn from_escape_table(escape: u8, byte: u8) -> CharEscape {
39 5 : match escape {
40 0 : self::BB => CharEscape::Backspace,
41 0 : self::TT => CharEscape::Tab,
42 1 : self::NN => CharEscape::LineFeed,
43 0 : self::FF => CharEscape::FormFeed,
44 0 : self::RR => CharEscape::CarriageReturn,
45 3 : self::QU => CharEscape::Quote,
46 1 : self::BS => CharEscape::ReverseSolidus,
47 0 : self::UU => CharEscape::AsciiControl(byte),
48 0 : _ => unreachable!(),
49 : }
50 5 : }
51 : }
52 :
53 44 : pub(crate) fn format_escaped_str(writer: &mut Vec<u8>, value: &str) {
54 44 : writer.reserve(2 + value.len());
55 :
56 44 : writer.push(b'"');
57 :
58 44 : let rest = format_escaped_str_contents(writer, value);
59 44 : writer.extend_from_slice(rest);
60 :
61 44 : writer.push(b'"');
62 44 : }
63 :
64 1 : pub(crate) fn format_escaped_fmt(writer: &mut Vec<u8>, args: fmt::Arguments) {
65 1 : writer.push(b'"');
66 :
67 1 : Collect { buf: writer }
68 1 : .write_fmt(args)
69 1 : .expect("formatting should not error");
70 :
71 1 : writer.push(b'"');
72 1 : }
73 :
74 : struct Collect<'buf> {
75 : buf: &'buf mut Vec<u8>,
76 : }
77 :
78 : impl fmt::Write for Collect<'_> {
79 3 : fn write_str(&mut self, s: &str) -> fmt::Result {
80 3 : let last = format_escaped_str_contents(self.buf, s);
81 3 : self.buf.extend(last);
82 3 : Ok(())
83 3 : }
84 : }
85 :
86 : // writes any escape sequences, and returns the suffix still needed to be written.
87 47 : fn format_escaped_str_contents<'a>(writer: &mut Vec<u8>, value: &'a str) -> &'a [u8] {
88 47 : let bytes = value.as_bytes();
89 :
90 47 : let mut start = 0;
91 :
92 159 : for (i, &byte) in bytes.iter().enumerate() {
93 159 : let escape = ESCAPE[byte as usize];
94 159 : if escape == 0 {
95 154 : continue;
96 5 : }
97 :
98 5 : writer.extend_from_slice(&bytes[start..i]);
99 :
100 5 : let char_escape = CharEscape::from_escape_table(escape, byte);
101 5 : write_char_escape(writer, char_escape);
102 :
103 5 : start = i + 1;
104 : }
105 :
106 47 : &bytes[start..]
107 47 : }
108 :
109 : const BB: u8 = b'b'; // \x08
110 : const TT: u8 = b't'; // \x09
111 : const NN: u8 = b'n'; // \x0A
112 : const FF: u8 = b'f'; // \x0C
113 : const RR: u8 = b'r'; // \x0D
114 : const QU: u8 = b'"'; // \x22
115 : const BS: u8 = b'\\'; // \x5C
116 : const UU: u8 = b'u'; // \x00...\x1F except the ones above
117 : const __: u8 = 0;
118 :
119 : // Lookup table of escape sequences. A value of b'x' at index i means that byte
120 : // i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
121 : static ESCAPE: [u8; 256] = [
122 : // 1 2 3 4 5 6 7 8 9 A B C D E F
123 : UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
124 : UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
125 : __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
126 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
127 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
128 : __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
129 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
130 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
131 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
132 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
133 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
134 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
135 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
136 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
137 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
138 : __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
139 : ];
140 :
141 5 : fn write_char_escape(writer: &mut Vec<u8>, char_escape: CharEscape) {
142 5 : let s = match char_escape {
143 3 : CharEscape::Quote => b"\\\"",
144 1 : CharEscape::ReverseSolidus => b"\\\\",
145 : // CharEscape::Solidus => b"\\/",
146 0 : CharEscape::Backspace => b"\\b",
147 0 : CharEscape::FormFeed => b"\\f",
148 1 : CharEscape::LineFeed => b"\\n",
149 0 : CharEscape::CarriageReturn => b"\\r",
150 0 : CharEscape::Tab => b"\\t",
151 0 : CharEscape::AsciiControl(byte) => {
152 : static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
153 0 : let bytes = &[
154 0 : b'\\',
155 0 : b'u',
156 0 : b'0',
157 0 : b'0',
158 0 : HEX_DIGITS[(byte >> 4) as usize],
159 0 : HEX_DIGITS[(byte & 0xF) as usize],
160 0 : ];
161 0 : return writer.extend_from_slice(bytes);
162 : }
163 : };
164 :
165 5 : writer.extend_from_slice(s);
166 5 : }
|