Line data Source code
1 : use json::{ListSer, ObjectSer, ValueSer};
2 : use postgres_client::Row;
3 : use postgres_client::types::{Kind, Type};
4 : use serde_json::Value;
5 :
6 : //
7 : // Convert json non-string types to strings, so that they can be passed to Postgres
8 : // as parameters.
9 : //
10 10 : pub(crate) fn json_to_pg_text(json: Vec<Value>) -> Vec<Option<String>> {
11 10 : json.iter().map(json_value_to_pg_text).collect()
12 10 : }
13 :
14 11 : fn json_value_to_pg_text(value: &Value) -> Option<String> {
15 11 : match value {
16 : // special care for nulls
17 1 : Value::Null => None,
18 :
19 : // convert to text with escaping
20 3 : v @ (Value::Bool(_) | Value::Number(_) | Value::Object(_)) => Some(v.to_string()),
21 :
22 : // avoid escaping here, as we pass this as a parameter
23 4 : Value::String(s) => Some(s.clone()),
24 :
25 : // special care for arrays
26 3 : Value::Array(_) => json_array_to_pg_array(value),
27 : }
28 11 : }
29 :
30 : //
31 : // Serialize a JSON array to a Postgres array. Contrary to the strings in the params
32 : // in the array we need to escape the strings. Postgres is okay with arrays of form
33 : // '{1,"2",3}'::int[], so we don't check that array holds values of the same type, leaving
34 : // it for Postgres to check.
35 : //
36 : // Example of the same escaping in node-postgres: packages/pg/lib/utils.js
37 : //
38 23 : fn json_array_to_pg_array(value: &Value) -> Option<String> {
39 23 : match value {
40 : // special care for nulls
41 2 : Value::Null => None,
42 :
43 : // convert to text with escaping
44 : // here string needs to be escaped, as it is part of the array
45 13 : v @ (Value::Bool(_) | Value::Number(_) | Value::String(_)) => Some(v.to_string()),
46 2 : v @ Value::Object(_) => json_array_to_pg_array(&Value::String(v.to_string())),
47 :
48 : // recurse into array
49 6 : Value::Array(arr) => {
50 6 : let vals = arr
51 6 : .iter()
52 6 : .map(json_array_to_pg_array)
53 18 : .map(|v| v.unwrap_or_else(|| "NULL".to_string()))
54 6 : .collect::<Vec<_>>()
55 6 : .join(",");
56 :
57 6 : Some(format!("{{{vals}}}"))
58 : }
59 : }
60 23 : }
61 :
62 : #[derive(Debug, thiserror::Error)]
63 : pub(crate) enum JsonConversionError {
64 : #[error("internal error compute returned invalid data: {0}")]
65 : AsTextError(postgres_client::Error),
66 : #[error("parse int error: {0}")]
67 : ParseIntError(#[from] std::num::ParseIntError),
68 : #[error("parse float error: {0}")]
69 : ParseFloatError(#[from] std::num::ParseFloatError),
70 : #[error("parse json error: {0}")]
71 : ParseJsonError(#[from] serde_json::Error),
72 : #[error("unbalanced array")]
73 : UnbalancedArray,
74 : #[error("unbalanced quoted string")]
75 : UnbalancedString,
76 : }
77 :
78 : enum OutputMode<'a> {
79 : Array(ListSer<'a>),
80 : Object(ObjectSer<'a>),
81 : }
82 :
83 : impl OutputMode<'_> {
84 0 : fn key(&mut self, key: &str) -> ValueSer<'_> {
85 0 : match self {
86 0 : OutputMode::Array(values) => values.entry(),
87 0 : OutputMode::Object(map) => map.key(key),
88 : }
89 0 : }
90 :
91 0 : fn finish(self) {
92 0 : match self {
93 0 : OutputMode::Array(values) => values.finish(),
94 0 : OutputMode::Object(map) => map.finish(),
95 : }
96 0 : }
97 : }
98 :
99 : //
100 : // Convert postgres row with text-encoded values to JSON object
101 : //
102 0 : pub(crate) fn pg_text_row_to_json(
103 0 : output: ValueSer,
104 0 : row: &Row,
105 0 : raw_output: bool,
106 0 : array_mode: bool,
107 0 : ) -> Result<(), JsonConversionError> {
108 0 : let mut entries = if array_mode {
109 0 : OutputMode::Array(output.list())
110 : } else {
111 0 : OutputMode::Object(output.object())
112 : };
113 :
114 0 : for (i, column) in row.columns().iter().enumerate() {
115 0 : let pg_value = row.as_text(i).map_err(JsonConversionError::AsTextError)?;
116 :
117 0 : let value = entries.key(column.name());
118 :
119 0 : match pg_value {
120 0 : Some(v) if raw_output => value.value(v),
121 0 : Some(v) => pg_text_to_json(value, v, column.type_())?,
122 0 : None => value.value(json::Null),
123 : }
124 : }
125 :
126 0 : entries.finish();
127 0 : Ok(())
128 0 : }
129 :
130 : //
131 : // Convert postgres text-encoded value to JSON value
132 : //
133 76 : fn pg_text_to_json(output: ValueSer, val: &str, pg_type: &Type) -> Result<(), JsonConversionError> {
134 76 : if let Kind::Array(elem_type) = pg_type.kind() {
135 : // todo: we should fetch this from postgres.
136 0 : let delimiter = ',';
137 :
138 0 : json::value_as_list!(|output| pg_array_parse(output, val, elem_type, delimiter)?);
139 0 : return Ok(());
140 76 : }
141 :
142 76 : match *pg_type {
143 12 : Type::BOOL => output.value(val == "t"),
144 : Type::INT2 | Type::INT4 => {
145 14 : let val = val.parse::<i32>()?;
146 14 : output.value(val);
147 : }
148 : Type::FLOAT4 | Type::FLOAT8 => {
149 23 : let fval = val.parse::<f64>()?;
150 23 : if fval.is_finite() {
151 14 : output.value(fval);
152 14 : } else {
153 9 : // Pass Nan, Inf, -Inf as strings
154 9 : // JS JSON.stringify() does converts them to null, but we
155 9 : // want to preserve them, so we pass them as strings
156 9 : output.value(val);
157 9 : }
158 : }
159 : // we assume that the string value is valid json.
160 7 : Type::JSON | Type::JSONB => output.write_raw_json(val.as_bytes()),
161 20 : _ => output.value(val),
162 : }
163 :
164 76 : Ok(())
165 76 : }
166 :
167 : /// Parse postgres array into JSON array.
168 : ///
169 : /// This is a bit involved because we need to handle nested arrays and quoted
170 : /// values. Unlike postgres we don't check that all nested arrays have the same
171 : /// dimensions, we just return them as is.
172 : ///
173 : /// <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-IO>
174 : ///
175 : /// The external text representation of an array value consists of items that are interpreted
176 : /// according to the I/O conversion rules for the array's element type, plus decoration that
177 : /// indicates the array structure. The decoration consists of curly braces (`{` and `}`) around
178 : /// the array value plus delimiter characters between adjacent items. The delimiter character
179 : /// is usually a comma (,) but can be something else: it is determined by the typdelim setting
180 : /// for the array's element type. Among the standard data types provided in the PostgreSQL
181 : /// distribution, all use a comma, except for type box, which uses a semicolon (;).
182 : ///
183 : /// In a multidimensional array, each dimension (row, plane, cube, etc.)
184 : /// gets its own level of curly braces, and delimiters must be written between adjacent
185 : /// curly-braced entities of the same level.
186 22 : fn pg_array_parse(
187 22 : elements: &mut ListSer,
188 22 : mut pg_array: &str,
189 22 : elem: &Type,
190 22 : delim: char,
191 22 : ) -> Result<(), JsonConversionError> {
192 : // skip bounds decoration, eg:
193 : // `[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}`
194 : // technically these are significant, but we have no way to represent them in json.
195 22 : if let Some('[') = pg_array.chars().next() {
196 1 : let Some((_bounds, array)) = pg_array.split_once('=') else {
197 0 : return Err(JsonConversionError::UnbalancedArray);
198 : };
199 1 : pg_array = array;
200 21 : }
201 :
202 : // whitespace might preceed a `{`.
203 22 : let pg_array = pg_array.trim_start();
204 :
205 22 : let rest = pg_array_parse_inner(elements, pg_array, elem, delim)?;
206 22 : if !rest.is_empty() {
207 0 : return Err(JsonConversionError::UnbalancedArray);
208 22 : }
209 :
210 22 : Ok(())
211 22 : }
212 :
213 : /// reads a single array from the `pg_array` string and pushes each values to `elements`.
214 : /// returns the rest of the `pg_array` string that was not read.
215 39 : fn pg_array_parse_inner<'a>(
216 39 : elements: &mut ListSer,
217 39 : mut pg_array: &'a str,
218 39 : elem: &Type,
219 39 : delim: char,
220 39 : ) -> Result<&'a str, JsonConversionError> {
221 : // array should have a `{` prefix.
222 39 : pg_array = pg_array
223 39 : .strip_prefix('{')
224 39 : .ok_or(JsonConversionError::UnbalancedArray)?;
225 :
226 39 : let mut q = String::new();
227 :
228 : loop {
229 90 : let value = elements.entry();
230 90 : pg_array = pg_array_parse_item(value, &mut q, pg_array, elem, delim)?;
231 :
232 : // check for separator.
233 90 : if let Some(next) = pg_array.strip_prefix(delim) {
234 51 : // next item.
235 51 : pg_array = next;
236 51 : } else {
237 39 : break;
238 : }
239 : }
240 :
241 39 : let Some(next) = pg_array.strip_prefix('}') else {
242 : // missing `}` terminator.
243 0 : return Err(JsonConversionError::UnbalancedArray);
244 : };
245 :
246 : // whitespace might follow a `}`.
247 39 : Ok(next.trim_start())
248 39 : }
249 :
250 : /// reads a single item from the `pg_array` string.
251 : /// returns the rest of the `pg_array` string that was not read.
252 : ///
253 : /// `quoted` is a scratch allocation that has no defined output.
254 90 : fn pg_array_parse_item<'a>(
255 90 : output: ValueSer,
256 90 : quoted: &mut String,
257 90 : mut pg_array: &'a str,
258 90 : elem: &Type,
259 90 : delim: char,
260 90 : ) -> Result<&'a str, JsonConversionError> {
261 : // We are trying to parse an array item.
262 : // This could be a new array, if this is a multi-dimentional array.
263 : // This could be a quoted string representing `elem`.
264 : // This could be an unquoted string representing `elem`.
265 :
266 : // whitespace might preceed an item.
267 90 : pg_array = pg_array.trim_start();
268 :
269 90 : if pg_array.starts_with('{') {
270 : // nested array.
271 : pg_array =
272 17 : json::value_as_list!(|output| pg_array_parse_inner(output, pg_array, elem, delim))?;
273 17 : return Ok(pg_array);
274 73 : }
275 :
276 73 : if let Some(mut pg_array) = pg_array.strip_prefix('"') {
277 : // the parsed string is un-escaped and written into quoted.
278 15 : pg_array = pg_array_parse_quoted(quoted, pg_array)?;
279 :
280 : // we have un-escaped the string, parse it as pgtext.
281 15 : pg_text_to_json(output, quoted, elem)?;
282 :
283 15 : return Ok(pg_array);
284 58 : }
285 :
286 : // we need to parse an item. read until we find a delimiter or `}`.
287 58 : let index = pg_array
288 58 : .find([delim, '}'])
289 58 : .ok_or(JsonConversionError::UnbalancedArray)?;
290 :
291 : let item;
292 58 : (item, pg_array) = pg_array.split_at(index);
293 :
294 : // item might have trailing whitespace that we need to ignore.
295 58 : let item = item.trim_end();
296 :
297 : // we might have an item string:
298 : // check for null
299 58 : if item == "NULL" {
300 7 : output.value(json::Null);
301 7 : } else {
302 51 : pg_text_to_json(output, item, elem)?;
303 : }
304 :
305 58 : Ok(pg_array)
306 90 : }
307 :
308 : /// reads a single quoted item from the `pg_array` string.
309 : ///
310 : /// Returns the rest of the `pg_array` string that was not read.
311 : /// The output is written into `quoted`.
312 : ///
313 : /// The pg_array string must have a `"` terminator, but the `"` initial value
314 : /// must have already been removed from the input. The terminator is removed.
315 15 : fn pg_array_parse_quoted<'a>(
316 15 : quoted: &mut String,
317 15 : mut pg_array: &'a str,
318 15 : ) -> Result<&'a str, JsonConversionError> {
319 : // The array output routine will put double quotes around element values if they are empty strings,
320 : // contain curly braces, delimiter characters, double quotes, backslashes, or white space,
321 : // or match the word `NULL`. Double quotes and backslashes embedded in element values will be backslash-escaped.
322 : // For numeric data types it is safe to assume that double quotes will never appear,
323 : // but for textual data types one should be prepared to cope with either the presence or absence of quotes.
324 :
325 15 : quoted.clear();
326 :
327 : // We write to quoted in chunks terminated by an escape character.
328 : // Eg if we have the input `foo\"bar"`, then we write `foo`, then `"`, then finally `bar`.
329 :
330 : loop {
331 : // we need to parse an chunk. read until we find a '\\' or `"`.
332 30 : let i = pg_array
333 30 : .find(['\\', '"'])
334 30 : .ok_or(JsonConversionError::UnbalancedString)?;
335 :
336 : let chunk: &str;
337 30 : (chunk, pg_array) = pg_array
338 30 : .split_at_checked(i)
339 30 : .expect("i is guaranteed to be in-bounds of pg_array");
340 :
341 : // push the chunk.
342 30 : quoted.push_str(chunk);
343 :
344 : // consume the chunk_end character.
345 : let chunk_end: char;
346 30 : (chunk_end, pg_array) =
347 30 : split_first_char(pg_array).expect("pg_array should start with either '\\\\' or '\"'");
348 :
349 : // finished.
350 30 : if chunk_end == '"' {
351 : // whitespace might follow the '"'.
352 15 : pg_array = pg_array.trim_start();
353 :
354 15 : break Ok(pg_array);
355 15 : }
356 :
357 : // consume the escaped character.
358 : let escaped: char;
359 15 : (escaped, pg_array) =
360 15 : split_first_char(pg_array).ok_or(JsonConversionError::UnbalancedString)?;
361 :
362 15 : quoted.push(escaped);
363 : }
364 15 : }
365 :
366 45 : fn split_first_char(s: &str) -> Option<(char, &str)> {
367 45 : let mut chars = s.chars();
368 45 : let c = chars.next()?;
369 45 : Some((c, chars.as_str()))
370 45 : }
371 :
372 : #[cfg(test)]
373 : mod tests {
374 : use serde_json::json;
375 :
376 : use super::*;
377 :
378 : #[test]
379 1 : fn test_atomic_types_to_pg_params() {
380 1 : let json = vec![Value::Bool(true), Value::Bool(false)];
381 1 : let pg_params = json_to_pg_text(json);
382 1 : assert_eq!(
383 : pg_params,
384 1 : vec![Some("true".to_owned()), Some("false".to_owned())]
385 : );
386 :
387 1 : let json = vec![Value::Number(serde_json::Number::from(42))];
388 1 : let pg_params = json_to_pg_text(json);
389 1 : assert_eq!(pg_params, vec![Some("42".to_owned())]);
390 :
391 1 : let json = vec![Value::String("foo\"".to_string())];
392 1 : let pg_params = json_to_pg_text(json);
393 1 : assert_eq!(pg_params, vec![Some("foo\"".to_owned())]);
394 :
395 1 : let json = vec![Value::Null];
396 1 : let pg_params = json_to_pg_text(json);
397 1 : assert_eq!(pg_params, vec![None]);
398 1 : }
399 :
400 : #[test]
401 1 : fn test_json_array_to_pg_array() {
402 : // atoms and escaping
403 1 : let json = "[true, false, null, \"NULL\", 42, \"foo\", \"bar\\\"-\\\\\"]";
404 1 : let json: Value = serde_json::from_str(json).unwrap();
405 1 : let pg_params = json_to_pg_text(vec![json]);
406 1 : assert_eq!(
407 : pg_params,
408 1 : vec![Some(
409 1 : "{true,false,NULL,\"NULL\",42,\"foo\",\"bar\\\"-\\\\\"}".to_owned()
410 1 : )]
411 : );
412 :
413 : // nested arrays
414 1 : let json = "[[true, false], [null, 42], [\"foo\", \"bar\\\"-\\\\\"]]";
415 1 : let json: Value = serde_json::from_str(json).unwrap();
416 1 : let pg_params = json_to_pg_text(vec![json]);
417 1 : assert_eq!(
418 : pg_params,
419 1 : vec![Some(
420 1 : "{{true,false},{NULL,42},{\"foo\",\"bar\\\"-\\\\\"}}".to_owned()
421 1 : )]
422 : );
423 : // array of objects
424 1 : let json = r#"[{"foo": 1},{"bar": 2}]"#;
425 1 : let json: Value = serde_json::from_str(json).unwrap();
426 1 : let pg_params = json_to_pg_text(vec![json]);
427 1 : assert_eq!(
428 : pg_params,
429 1 : vec![Some(r#"{"{\"foo\":1}","{\"bar\":2}"}"#.to_owned())]
430 : );
431 1 : }
432 :
433 10 : fn pg_text_to_json(val: &str, pg_type: &Type) -> Value {
434 10 : let output = json::value_to_string!(|v| super::pg_text_to_json(v, val, pg_type).unwrap());
435 10 : serde_json::from_str(&output).unwrap()
436 10 : }
437 :
438 22 : fn pg_array_parse(pg_array: &str, pg_type: &Type) -> Value {
439 22 : let output = json::value_to_string!(|v| json::value_as_list!(|v| {
440 22 : super::pg_array_parse(v, pg_array, pg_type, ',').unwrap();
441 22 : }));
442 22 : serde_json::from_str(&output).unwrap()
443 22 : }
444 :
445 : #[test]
446 1 : fn test_atomic_types_parse() {
447 1 : assert_eq!(pg_text_to_json("foo", &Type::TEXT), json!("foo"));
448 1 : assert_eq!(pg_text_to_json("42", &Type::INT4), json!(42));
449 1 : assert_eq!(pg_text_to_json("42", &Type::INT2), json!(42));
450 1 : assert_eq!(pg_text_to_json("42", &Type::INT8), json!("42"));
451 1 : assert_eq!(pg_text_to_json("42.42", &Type::FLOAT8), json!(42.42));
452 1 : assert_eq!(pg_text_to_json("42.42", &Type::FLOAT4), json!(42.42));
453 1 : assert_eq!(pg_text_to_json("NaN", &Type::FLOAT4), json!("NaN"));
454 1 : assert_eq!(
455 1 : pg_text_to_json("Infinity", &Type::FLOAT4),
456 1 : json!("Infinity")
457 : );
458 1 : assert_eq!(
459 1 : pg_text_to_json("-Infinity", &Type::FLOAT4),
460 1 : json!("-Infinity")
461 : );
462 :
463 1 : let json: Value =
464 1 : serde_json::from_str("{\"s\":\"str\",\"n\":42,\"f\":4.2,\"a\":[null,3,\"a\"]}")
465 1 : .unwrap();
466 1 : assert_eq!(
467 1 : pg_text_to_json(
468 1 : r#"{"s":"str","n":42,"f":4.2,"a":[null,3,"a"]}"#,
469 1 : &Type::JSONB
470 : ),
471 : json
472 : );
473 1 : }
474 :
475 : #[test]
476 1 : fn test_pg_array_parse_text() {
477 4 : fn pt(pg_arr: &str) -> Value {
478 4 : pg_array_parse(pg_arr, &Type::TEXT)
479 4 : }
480 1 : assert_eq!(
481 1 : pt(r#"{"aa\"\\\,a",cha,"bbbb"}"#),
482 1 : json!(["aa\"\\,a", "cha", "bbbb"])
483 : );
484 1 : assert_eq!(
485 1 : pt(r#"{{"foo","bar"},{"bee","bop"}}"#),
486 1 : json!([["foo", "bar"], ["bee", "bop"]])
487 : );
488 1 : assert_eq!(
489 1 : pt(r#"{{{{"foo",NULL,"bop",bup}}}}"#),
490 1 : json!([[[["foo", null, "bop", "bup"]]]])
491 : );
492 1 : assert_eq!(
493 1 : pt(r#"{{"1",2,3},{4,NULL,6},{NULL,NULL,NULL}}"#),
494 1 : json!([["1", "2", "3"], ["4", null, "6"], [null, null, null]])
495 : );
496 1 : }
497 :
498 : #[test]
499 1 : fn test_pg_array_parse_bool() {
500 4 : fn pb(pg_arr: &str) -> Value {
501 4 : pg_array_parse(pg_arr, &Type::BOOL)
502 4 : }
503 1 : assert_eq!(pb(r#"{t,f,t}"#), json!([true, false, true]));
504 1 : assert_eq!(pb(r#"{{t,f,t}}"#), json!([[true, false, true]]));
505 1 : assert_eq!(
506 1 : pb(r#"{{t,f},{f,t}}"#),
507 1 : json!([[true, false], [false, true]])
508 : );
509 1 : assert_eq!(
510 1 : pb(r#"{{t,NULL},{NULL,f}}"#),
511 1 : json!([[true, null], [null, false]])
512 : );
513 1 : }
514 :
515 : #[test]
516 1 : fn test_pg_array_parse_numbers() {
517 9 : fn pn(pg_arr: &str, ty: &Type) -> Value {
518 9 : pg_array_parse(pg_arr, ty)
519 9 : }
520 1 : assert_eq!(pn(r#"{1,2,3}"#, &Type::INT4), json!([1, 2, 3]));
521 1 : assert_eq!(pn(r#"{1,2,3}"#, &Type::INT2), json!([1, 2, 3]));
522 1 : assert_eq!(pn(r#"{1,2,3}"#, &Type::INT8), json!(["1", "2", "3"]));
523 1 : assert_eq!(pn(r#"{1,2,3}"#, &Type::FLOAT4), json!([1.0, 2.0, 3.0]));
524 1 : assert_eq!(pn(r#"{1,2,3}"#, &Type::FLOAT8), json!([1.0, 2.0, 3.0]));
525 1 : assert_eq!(
526 1 : pn(r#"{1.1,2.2,3.3}"#, &Type::FLOAT4),
527 1 : json!([1.1, 2.2, 3.3])
528 : );
529 1 : assert_eq!(
530 1 : pn(r#"{1.1,2.2,3.3}"#, &Type::FLOAT8),
531 1 : json!([1.1, 2.2, 3.3])
532 : );
533 1 : assert_eq!(
534 1 : pn(r#"{NaN,Infinity,-Infinity}"#, &Type::FLOAT4),
535 1 : json!(["NaN", "Infinity", "-Infinity"])
536 : );
537 1 : assert_eq!(
538 1 : pn(r#"{NaN,Infinity,-Infinity}"#, &Type::FLOAT8),
539 1 : json!(["NaN", "Infinity", "-Infinity"])
540 : );
541 1 : }
542 :
543 : #[test]
544 1 : fn test_pg_array_with_decoration() {
545 1 : fn p(pg_arr: &str) -> Value {
546 1 : pg_array_parse(pg_arr, &Type::INT2)
547 1 : }
548 1 : assert_eq!(
549 1 : p(r#"[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}"#),
550 1 : json!([[[1, 2, 3], [4, 5, 6]]])
551 : );
552 1 : }
553 :
554 : #[test]
555 1 : fn test_pg_array_parse_json() {
556 4 : fn pt(pg_arr: &str) -> Value {
557 4 : pg_array_parse(pg_arr, &Type::JSONB)
558 4 : }
559 1 : assert_eq!(pt(r#"{"{}"}"#), json!([{}]));
560 1 : assert_eq!(
561 1 : pt(r#"{"{\"foo\": 1, \"bar\": 2}"}"#),
562 1 : json!([{"foo": 1, "bar": 2}])
563 : );
564 1 : assert_eq!(
565 1 : pt(r#"{"{\"foo\": 1}", "{\"bar\": 2}"}"#),
566 1 : json!([{"foo": 1}, {"bar": 2}])
567 : );
568 1 : assert_eq!(
569 1 : pt(r#"{{"{\"foo\": 1}", "{\"bar\": 2}"}}"#),
570 1 : json!([[{"foo": 1}, {"bar": 2}]])
571 : );
572 1 : }
573 : }
|