detail/charconv/detail/parser.hpp

Line	Hits	Source Code
1		// Copyright 2023 Matt Borland
2		// Distributed under the Boost Software License, Version 1.0.
3		// https://www.boost.org/LICENSE_1_0.txt
4
5		#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
6		#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
7
8		#include <boost/json/detail/charconv/detail/config.hpp>
9		#include <boost/json/detail/charconv/detail/from_chars_result.hpp>
10		#include <boost/json/detail/charconv/detail/from_chars_integer_impl.hpp>
11		#include <boost/json/detail/charconv/detail/integer_search_trees.hpp>
12		#include <boost/json/detail/charconv/limits.hpp>
13		#include <boost/json/detail/charconv/chars_format.hpp>
14		#include <system_error>
15		#include <type_traits>
16		#include <limits>
17		#include <cerrno>
18		#include <cstdint>
19		#include <cstring>
20
21		#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
22		# pragma GCC diagnostic push
23		# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
24		#endif
25
26		namespace boost { namespace json { namespace detail { namespace charconv { namespace detail {
27
28	✗	inline bool is_integer_char(char c) noexcept
29		{
30	✗	return (c >= '0') && (c <= '9');
31		}
32
33	✗	inline bool is_hex_char(char c) noexcept
34		{
35	✗	return is_integer_char(c) \|\| (((c >= 'a') && (c <= 'f')) \|\| ((c >= 'A') && (c <= 'F')));
36		}
37
38	✗	inline bool is_delimiter(char c, chars_format fmt) noexcept
39		{
40	✗	if (fmt != chars_format::hex)
41		{
42	✗	return !is_integer_char(c) && c != 'e' && c != 'E';
43		}
44
45	✗	return !is_hex_char(c) && c != 'p' && c != 'P';
46		}
47
48		template <typename Unsigned_Integer, typename Integer>
49	✗	inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
50		{
51	✗	if (first > last)
52		{
53	✗	return {first, std::errc::invalid_argument};
54		}
55
56	✗	auto next = first;
57	✗	bool all_zeros = true;
58
59		// First extract the sign
60	✗	if (*next == '-')
61		{
62	✗	sign = true;
63	✗	++next;
64		}
65	✗	else if (*next == '+')
66		{
67	✗	return {next, std::errc::invalid_argument};
68		}
69		else
70		{
71	✗	sign = false;
72		}
73
74		// Ignore leading zeros (e.g. 00005 or -002.3e+5)
75	✗	while (*next == '0' && next != last)
76		{
77	✗	++next;
78		}
79
80		// If the number is 0 we can abort now
81		char exp_char;
82		char capital_exp_char;
83	✗	if (fmt != chars_format::hex)
84		{
85	✗	exp_char = 'e';
86	✗	capital_exp_char = 'E';
87		}
88		else
89		{
90	✗	exp_char = 'p';
91	✗	capital_exp_char = 'P';
92		}
93
94	✗	if (next == last \|\| next == exp_char \|\| next == -capital_exp_char)
95		{
96	✗	significand = 0;
97	✗	exponent = 0;
98	✗	return {next, std::errc()};
99		}
100
101		// Next we get the significand
102	✗	constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16
103	✗	char significand_buffer[significand_buffer_size] {};
104	✗	std::size_t i = 0;
105	✗	std::size_t dot_position = 0;
106	✗	Integer extra_zeros = 0;
107	✗	Integer leading_zero_powers = 0;
108	✗	const auto char_validation_func = (fmt != charconv::chars_format::hex) ? is_integer_char : is_hex_char;
109	✗	const int base = (fmt != charconv::chars_format::hex) ? 10 : 16;
110
111	✗	while (char_validation_func(*next) && next != last && i < significand_buffer_size)
112		{
113	✗	all_zeros = false;
114	✗	significand_buffer[i] = *next;
115	✗	++next;
116	✗	++i;
117		}
118
119	✗	bool fractional = false;
120	✗	if (next == last)
121		{
122		// if fmt is chars_format::scientific the e is required
123	✗	if (fmt == chars_format::scientific)
124		{
125	✗	return {first, std::errc::invalid_argument};
126		}
127
128	✗	exponent = 0;
129	✗	std::size_t offset = i;
130
131	✗	from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
132	✗	switch (r.ec)
133		{
134	✗	case std::errc::invalid_argument:
135	✗	return {first, std::errc::invalid_argument};
136	✗	case std::errc::result_out_of_range:
137	✗	return {next, std::errc::result_out_of_range};
138	✗	default:
139	✗	return {next, std::errc()};
140		}
141		}
142	✗	else if (*next == '.')
143		{
144	✗	++next;
145	✗	fractional = true;
146	✗	dot_position = i;
147
148		// Process the fractional part if we have it
149		//
150		// if fmt is chars_format::scientific the e is required
151		// if fmt is chars_format::fixed and not scientific the e is disallowed
152		// if fmt is chars_format::general (which is scientific and fixed) the e is optional
153
154		// If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
155		// so that we get the useful parts of the fraction
156	✗	if (all_zeros)
157		{
158	✗	while (*next == '0' && next != last)
159		{
160	✗	++next;
161	✗	--leading_zero_powers;
162		}
163
164	✗	if (next == last)
165		{
166	✗	return {last, std::errc()};
167		}
168		}
169
170	✗	while (char_validation_func(*next) && next != last && i < significand_buffer_size)
171		{
172	✗	significand_buffer[i] = *next;
173	✗	++next;
174	✗	++i;
175		}
176		}
177
178	✗	if (i == significand_buffer_size)
179		{
180		// We can not process any more significant figures into the significand so skip to the end
181		// or the exponent part and capture the additional orders of magnitude for the exponent
182	✗	bool found_dot = false;
183	✗	while ((char_validation_func(next) \|\| next == '.') && next != last)
184		{
185	✗	++next;
186	✗	if (!fractional && !found_dot)
187		{
188	✗	++extra_zeros;
189		}
190	✗	if (*next == '.')
191		{
192	✗	found_dot = true;
193		}
194		}
195		}
196
197	✗	if (next == last \|\| is_delimiter(*next, fmt))
198		{
199	✗	if (fmt == chars_format::scientific)
200		{
201	✗	return {first, std::errc::invalid_argument};
202		}
203	✗	if (dot_position != 0 \|\| fractional)
204		{
205	✗	exponent = static_cast<Integer>(dot_position) - i + extra_zeros + leading_zero_powers;
206		}
207		else
208		{
209	✗	exponent = extra_zeros + leading_zero_powers;
210		}
211	✗	std::size_t offset = i;
212
213	✗	from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
214	✗	switch (r.ec)
215		{
216	✗	case std::errc::invalid_argument:
217	✗	return {first, std::errc::invalid_argument};
218	✗	case std::errc::result_out_of_range:
219	✗	return {next, std::errc::result_out_of_range};
220	✗	default:
221	✗	return {next, std::errc()};
222		}
223		}
224	✗	else if (next == exp_char \|\| next == capital_exp_char)
225		{
226		// Would be a number without a significand e.g. e+03
227	✗	if (next == first)
228		{
229	✗	return {next, std::errc::invalid_argument};
230		}
231
232	✗	++next;
233	✗	if (fmt == chars_format::fixed)
234		{
235	✗	return {first, std::errc::invalid_argument};
236		}
237
238	✗	exponent = i - 1;
239	✗	std::size_t offset = i;
240	✗	bool round = false;
241		// If more digits are present than representable in the significand of the target type
242		// we set the maximum
243	✗	if (offset > significand_buffer_size)
244		{
245	✗	offset = significand_buffer_size - 1;
246	✗	i = significand_buffer_size;
247	✗	if (significand_buffer[offset] == '5' \|\|
248	✗	significand_buffer[offset] == '6' \|\|
249	✗	significand_buffer[offset] == '7' \|\|
250	✗	significand_buffer[offset] == '8' \|\|
251	✗	significand_buffer[offset] == '9')
252		{
253	✗	round = true;
254		}
255		}
256
257		// If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
258		// but it is a valid value. We need to continue parsing to get the correct value of ptr even
259		// though we know we could bail now.
260		//
261		// See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
262	✗	if (offset != 0)
263		{
264	✗	from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
265	✗	switch (r.ec)
266		{
267	✗	case std::errc::invalid_argument:
268	✗	return {first, std::errc::invalid_argument};
269	✗	case std::errc::result_out_of_range:
270	✗	return {next, std::errc::result_out_of_range};
271	✗	default:
272	✗	break;
273		}
274
275	✗	if (round)
276		{
277	✗	significand += 1;
278		}
279		}
280	✗	}
281		else
282		{
283	✗	return {first, std::errc::invalid_argument};
284		}
285
286		// Finally we get the exponent
287	✗	constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
288	✗	char exponent_buffer[exponent_buffer_size] {};
289	✗	Integer significand_digits = i;
290	✗	i = 0;
291
292		// Get the sign first
293	✗	if (*next == '-')
294		{
295	✗	exponent_buffer[i] = *next;
296	✗	++next;
297	✗	++i;
298		}
299	✗	else if (*next == '+')
300		{
301	✗	++next;
302		}
303
304		// Next strip any leading zeros
305	✗	while (*next == '0')
306		{
307	✗	++next;
308		}
309
310		// Process the significant values
311	✗	while (is_integer_char(*next) && next != last && i < exponent_buffer_size)
312		{
313	✗	exponent_buffer[i] = *next;
314	✗	++next;
315	✗	++i;
316		}
317
318		// If the exponent can't fit in the buffer the number is not representable
319	✗	if (next != last && i == exponent_buffer_size)
320		{
321	✗	return {next, std::errc::result_out_of_range};
322		}
323
324		// If the exponent was e+00 or e-00
325	✗	if (i == 0 \|\| (i == 1 && exponent_buffer[0] == '-'))
326		{
327	✗	if (fractional)
328		{
329	✗	exponent = static_cast<Integer>(dot_position) - significand_digits;
330		}
331		else
332		{
333	✗	exponent = extra_zeros;
334		}
335
336	✗	return {next, std::errc()};
337		}
338
339	✗	const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
340
341	✗	exponent += leading_zero_powers;
342
343	✗	switch (r.ec)
344		{
345	✗	case std::errc::invalid_argument:
346	✗	return {first, std::errc::invalid_argument};
347	✗	case std::errc::result_out_of_range:
348	✗	return {next, std::errc::result_out_of_range};
349	✗	default:
350	✗	if (fractional)
351		{
352		// Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
353		// so the exponent is off by the number of digits in the significand - 1
354	✗	if (fmt == chars_format::hex)
355		{
356		// In hex the number of digits parsed is possibly less than the number of digits in base10
357	✗	exponent -= num_digits(significand) - dot_position;
358		}
359		else
360		{
361	✗	exponent -= significand_digits - dot_position;
362		}
363		}
364		else
365		{
366	✗	exponent += extra_zeros;
367		}
368	✗	return {next, std::errc()};
369		}
370		}
371
372		}}}}} // Namespaces
373
374		#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
375		# pragma GCC diagnostic pop
376		#endif
377
378		#endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
379

1

2

// Distributed under the Boost Software License, Version 1.0.

3

// https://www.boost.org/LICENSE_1_0.txt

4

5

#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP

6

#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP

7

8

#include <boost/json/detail/charconv/detail/config.hpp>

9

#include <boost/json/detail/charconv/detail/from_chars_result.hpp>

10

#include <boost/json/detail/charconv/detail/from_chars_integer_impl.hpp>

11

#include <boost/json/detail/charconv/detail/integer_search_trees.hpp>

12

#include <boost/json/detail/charconv/limits.hpp>

13

#include <boost/json/detail/charconv/chars_format.hpp>

14

#include <system_error>

15

#include <type_traits>

16

#include <limits>

17

#include <cerrno>

18

#include <cstdint>

19

#include <cstring>

20

21

#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)

22

# pragma GCC diagnostic push

23

# pragma GCC diagnostic ignored "-Wmissing-field-initializers"

24

#endif

25

26

namespace boost { namespace json { namespace detail { namespace charconv { namespace detail {

27

28

✗

inline bool is_integer_char(char c) noexcept

29

{

30

✗

return (c >= '0') && (c <= '9');

31

}

32

33

✗

inline bool is_hex_char(char c) noexcept

34

{

35

✗

return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));

36

}

37

38

✗

inline bool is_delimiter(char c, chars_format fmt) noexcept

39

{

40

✗

if (fmt != chars_format::hex)

41

{

42

✗

return !is_integer_char(c) && c != 'e' && c != 'E';

43

}

44

45

✗

return !is_hex_char(c) && c != 'p' && c != 'P';

46

}

47

48

template <typename Unsigned_Integer, typename Integer>

49

✗

inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept

50

{

51

✗

if (first > last)

52

{

53

✗

return {first, std::errc::invalid_argument};

54

}

55

56

✗

auto next = first;

57

✗

bool all_zeros = true;

58

59

// First extract the sign

60

✗

if (*next == '-')

61

{

62

✗

sign = true;

63

✗

++next;

64

}

65

✗

else if (*next == '+')

66

{

67

✗

return {next, std::errc::invalid_argument};

68

}

69

else

70

{

71

✗

sign = false;

72

}

73

74

// Ignore leading zeros (e.g. 00005 or -002.3e+5)

75

✗

while (*next == '0' && next != last)

76

{

77

✗

++next;

78

}

79

80

// If the number is 0 we can abort now

81

char exp_char;

82

char capital_exp_char;

83

✗

if (fmt != chars_format::hex)

84

{

85

✗

exp_char = 'e';

86

✗

capital_exp_char = 'E';

87

}

88

else

89

{

90

✗

exp_char = 'p';

91

✗

capital_exp_char = 'P';

92

}

93

94

✗

if (next == last || *next == exp_char || *next == -capital_exp_char)

95

{

96

✗

significand = 0;

97

✗

exponent = 0;

98

✗

return {next, std::errc()};

99

}

100

101

// Next we get the significand

102

✗

constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16

103

✗

char significand_buffer[significand_buffer_size] {};

104

✗

std::size_t i = 0;

105

✗

std::size_t dot_position = 0;

106

✗

Integer extra_zeros = 0;

107

✗

Integer leading_zero_powers = 0;

108

✗

const auto char_validation_func = (fmt != charconv::chars_format::hex) ? is_integer_char : is_hex_char;

109

✗

const int base = (fmt != charconv::chars_format::hex) ? 10 : 16;

110

111

✗

while (char_validation_func(*next) && next != last && i < significand_buffer_size)

112

{

113

✗

all_zeros = false;

114

✗

significand_buffer[i] = *next;

115

✗

++next;

116

✗

++i;

117

}

118

119

✗

bool fractional = false;

120

✗

if (next == last)

121

{

122

// if fmt is chars_format::scientific the e is required

123

✗

if (fmt == chars_format::scientific)

124

{

125

✗

return {first, std::errc::invalid_argument};

126

}

127

128

✗

exponent = 0;

129

✗

std::size_t offset = i;

130

131

✗

from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);

132

✗

switch (r.ec)

133

{

134

✗

case std::errc::invalid_argument:

135

✗

return {first, std::errc::invalid_argument};

136

✗

case std::errc::result_out_of_range:

137

✗

return {next, std::errc::result_out_of_range};

138

✗

default:

139

✗

return {next, std::errc()};

140

}

141

}

142

✗

else if (*next == '.')

143

{

144

✗

++next;

145

✗

fractional = true;

146

✗

dot_position = i;

147

148

// Process the fractional part if we have it

149

//

150

// if fmt is chars_format::scientific the e is required

151

// if fmt is chars_format::fixed and not scientific the e is disallowed

152

// if fmt is chars_format::general (which is scientific and fixed) the e is optional

153

154

// If we have the value 0.00001 we can continue to chop zeros and adjust the exponent

155

// so that we get the useful parts of the fraction

156

✗

if (all_zeros)

157

{

158

✗

while (*next == '0' && next != last)

159

{

160

✗

++next;

161

✗

--leading_zero_powers;

162

}

163

164

✗

if (next == last)

165

{

166

✗

return {last, std::errc()};

167

}

168

}

169

170

✗

while (char_validation_func(*next) && next != last && i < significand_buffer_size)

171

{

172

✗

significand_buffer[i] = *next;

173

✗

++next;

174

✗

++i;

175

}

176

}

177

178

✗

if (i == significand_buffer_size)

179

{

180

// We can not process any more significant figures into the significand so skip to the end

181

// or the exponent part and capture the additional orders of magnitude for the exponent

182

✗

bool found_dot = false;

183

✗

while ((char_validation_func(*next) || *next == '.') && next != last)

184

{

185

✗

++next;

186

✗

if (!fractional && !found_dot)

187

{

188

✗

++extra_zeros;

189

}

190

✗

if (*next == '.')

191

{

192

✗

found_dot = true;

193

}

194

}

195

}

196

197

✗

if (next == last || is_delimiter(*next, fmt))

198

{

199

✗

if (fmt == chars_format::scientific)

200

{

201

✗

return {first, std::errc::invalid_argument};

202

}

203

✗

if (dot_position != 0 || fractional)

204

{

205

✗

exponent = static_cast<Integer>(dot_position) - i + extra_zeros + leading_zero_powers;

206

}

207

else

208

{

209

✗

exponent = extra_zeros + leading_zero_powers;

210

}

211

✗

std::size_t offset = i;

212

213

✗

from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);

214

✗

switch (r.ec)

215

{

216

✗

case std::errc::invalid_argument:

217

✗

return {first, std::errc::invalid_argument};

218

✗

case std::errc::result_out_of_range:

219

✗

return {next, std::errc::result_out_of_range};

220

✗

default:

221

✗

return {next, std::errc()};

222

}

223

}

224

✗

else if (*next == exp_char || *next == capital_exp_char)

225

{

226

// Would be a number without a significand e.g. e+03

227

✗

if (next == first)

228

{

229

✗

return {next, std::errc::invalid_argument};

230

}

231

232

✗

++next;

233

✗

if (fmt == chars_format::fixed)

234

{

235

✗

return {first, std::errc::invalid_argument};

236

}

237

238

✗

exponent = i - 1;

239

✗

std::size_t offset = i;

240

✗

bool round = false;

241

// If more digits are present than representable in the significand of the target type

242

// we set the maximum

243

✗

if (offset > significand_buffer_size)

244

{

245

✗

offset = significand_buffer_size - 1;

246

✗

i = significand_buffer_size;

247

✗

if (significand_buffer[offset] == '5' ||

248

✗

significand_buffer[offset] == '6' ||

249

✗

significand_buffer[offset] == '7' ||

250

✗

significand_buffer[offset] == '8' ||

251

✗

significand_buffer[offset] == '9')

252

{

253

✗

round = true;

254

}

255

}

256

257

// If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,

258

// but it is a valid value. We need to continue parsing to get the correct value of ptr even

259

// though we know we could bail now.

260

//

261

// See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29

262

✗

if (offset != 0)

263

{

264

✗

from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);

265

✗

switch (r.ec)

266

{

267

✗

case std::errc::invalid_argument:

268

✗

return {first, std::errc::invalid_argument};

269

✗

case std::errc::result_out_of_range:

270

✗

return {next, std::errc::result_out_of_range};

271

✗

default:

272

✗

break;

273

}

274

275

✗

if (round)

276

{

277

✗

significand += 1;

278

}

279

}

280

✗

}

281

else

282

{

283

✗

return {first, std::errc::invalid_argument};

284

}

285

286

// Finally we get the exponent

287

✗

constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382

288

✗

char exponent_buffer[exponent_buffer_size] {};

289

✗

Integer significand_digits = i;

290

✗

i = 0;

291

292

// Get the sign first

293

✗

if (*next == '-')

294

{

295

✗

exponent_buffer[i] = *next;

296

✗

++next;

297

✗

++i;

298

}

299

✗

else if (*next == '+')

300

{

301

✗

++next;

302

}

303

304

// Next strip any leading zeros

305

✗

while (*next == '0')

306

{

307

✗

++next;

308

}

309

310

// Process the significant values

311

✗

while (is_integer_char(*next) && next != last && i < exponent_buffer_size)

312

{

313

✗

exponent_buffer[i] = *next;

314

✗

++next;

315

✗

++i;

316

}

317

318

// If the exponent can't fit in the buffer the number is not representable

319

✗

if (next != last && i == exponent_buffer_size)

320

{

321

✗

return {next, std::errc::result_out_of_range};

322

}

323

324

// If the exponent was e+00 or e-00

325

✗

if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))

326

{

327

✗

if (fractional)

328

{

329

✗

exponent = static_cast<Integer>(dot_position) - significand_digits;

330

}

331

else

332

{

333

✗

exponent = extra_zeros;

334

}

335

336

✗

return {next, std::errc()};

337

}

338

339

✗

const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);

340

341

✗

exponent += leading_zero_powers;

342

343

✗

switch (r.ec)

344

{

345

✗

case std::errc::invalid_argument:

346

✗

return {first, std::errc::invalid_argument};

347

✗

case std::errc::result_out_of_range:

348

✗

return {next, std::errc::result_out_of_range};

349

✗

default:

350

✗

if (fractional)

351

{

352

// Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer

353

// so the exponent is off by the number of digits in the significand - 1

354

✗

if (fmt == chars_format::hex)

355

{

356

// In hex the number of digits parsed is possibly less than the number of digits in base10

357

✗

exponent -= num_digits(significand) - dot_position;

358

}

359

else

360

{

361

✗

exponent -= significand_digits - dot_position;

362

}

363

}

364

else

365

{

366

✗

exponent += extra_zeros;

367

}

368

✗

return {next, std::errc()};

369

}

370

}

371

372

}}}}} // Namespaces

373

374

#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)

375

# pragma GCC diagnostic pop

376

#endif

377

378

#endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP

379