Bug Summary

File:out/../deps/icu-small/source/common/ucnvlat1.cpp
Warning:line 414, column 42
Although the value stored to 'sourceLimit' is used in the enclosing expression, the value is never actually read from 'sourceLimit'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnvlat1.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ucnvlat1.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2000-2015, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: ucnvlat1.cpp
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2000feb07
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_CONVERSION0
20
21#include "unicode/ucnv.h"
22#include "unicode/uset.h"
23#include "unicode/utf8.h"
24#include "ucnv_bld.h"
25#include "ucnv_cnv.h"
26#include "ustr_imp.h"
27
28/* control optimizations according to the platform */
29#define LATIN1_UNROLL_FROM_UNICODE1 1
30
31/* ISO 8859-1 --------------------------------------------------------------- */
32
33/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
34U_CDECL_BEGINextern "C" {
35static void U_CALLCONV
36_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
37 UErrorCode *pErrorCode) {
38 const uint8_t *source;
39 UChar *target;
40 int32_t targetCapacity, length;
41 int32_t *offsets;
42
43 int32_t sourceIndex;
44
45 /* set up the local pointers */
46 source=(const uint8_t *)pArgs->source;
47 target=pArgs->target;
48 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
49 offsets=pArgs->offsets;
50
51 sourceIndex=0;
52
53 /*
54 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
55 * for the minimum of the sourceLength and targetCapacity
56 */
57 length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
58 if(length<=targetCapacity) {
59 targetCapacity=length;
60 } else {
61 /* target will be full */
62 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
63 length=targetCapacity;
64 }
65
66 if(targetCapacity>=8) {
67 /* This loop is unrolled for speed and improved pipelining. */
68 int32_t count, loops;
69
70 loops=count=targetCapacity>>3;
71 length=targetCapacity&=0x7;
72 do {
73 target[0]=source[0];
74 target[1]=source[1];
75 target[2]=source[2];
76 target[3]=source[3];
77 target[4]=source[4];
78 target[5]=source[5];
79 target[6]=source[6];
80 target[7]=source[7];
81 target+=8;
82 source+=8;
83 } while(--count>0);
84
85 if(offsets!=NULL__null) {
86 do {
87 offsets[0]=sourceIndex++;
88 offsets[1]=sourceIndex++;
89 offsets[2]=sourceIndex++;
90 offsets[3]=sourceIndex++;
91 offsets[4]=sourceIndex++;
92 offsets[5]=sourceIndex++;
93 offsets[6]=sourceIndex++;
94 offsets[7]=sourceIndex++;
95 offsets+=8;
96 } while(--loops>0);
97 }
98 }
99
100 /* conversion loop */
101 while(targetCapacity>0) {
102 *target++=*source++;
103 --targetCapacity;
104 }
105
106 /* write back the updated pointers */
107 pArgs->source=(const char *)source;
108 pArgs->target=target;
109
110 /* set offsets */
111 if(offsets!=NULL__null) {
112 while(length>0) {
113 *offsets++=sourceIndex++;
114 --length;
115 }
116 pArgs->offsets=offsets;
117 }
118}
119
120/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
121static UChar32 U_CALLCONV
122_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
123 UErrorCode *pErrorCode) {
124 const uint8_t *source=(const uint8_t *)pArgs->source;
125 if(source<(const uint8_t *)pArgs->sourceLimit) {
126 pArgs->source=(const char *)(source+1);
127 return *source;
128 }
129
130 /* no output because of empty input */
131 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
132 return 0xffff;
133}
134
135/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
136static void U_CALLCONV
137_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
138 UErrorCode *pErrorCode) {
139 UConverter *cnv;
140 const UChar *source, *sourceLimit;
141 uint8_t *target, *oldTarget;
142 int32_t targetCapacity, length;
143 int32_t *offsets;
144
145 UChar32 cp;
146 UChar c, max;
147
148 int32_t sourceIndex;
149
150 /* set up the local pointers */
151 cnv=pArgs->converter;
152 source=pArgs->source;
153 sourceLimit=pArgs->sourceLimit;
154 target=oldTarget=(uint8_t *)pArgs->target;
155 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
156 offsets=pArgs->offsets;
157
158 if(cnv->sharedData==&_Latin1Data_Latin1Data_71) {
159 max=0xff; /* Latin-1 */
160 } else {
161 max=0x7f; /* US-ASCII */
162 }
163
164 /* get the converter state from UConverter */
165 cp=cnv->fromUChar32;
166
167 /* sourceIndex=-1 if the current character began in the previous buffer */
168 sourceIndex= cp==0 ? 0 : -1;
169
170 /*
171 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
172 * for the minimum of the sourceLength and targetCapacity
173 */
174 length=(int32_t)(sourceLimit-source);
175 if(length<targetCapacity) {
176 targetCapacity=length;
177 }
178
179 /* conversion loop */
180 if(cp!=0 && targetCapacity>0) {
181 goto getTrail;
182 }
183
184#if LATIN1_UNROLL_FROM_UNICODE1
185 /* unroll the loop with the most common case */
186 if(targetCapacity>=16) {
187 int32_t count, loops;
188 UChar u, oredChars;
189
190 loops=count=targetCapacity>>4;
191 do {
192 oredChars=u=*source++;
193 *target++=(uint8_t)u;
194 oredChars|=u=*source++;
195 *target++=(uint8_t)u;
196 oredChars|=u=*source++;
197 *target++=(uint8_t)u;
198 oredChars|=u=*source++;
199 *target++=(uint8_t)u;
200 oredChars|=u=*source++;
201 *target++=(uint8_t)u;
202 oredChars|=u=*source++;
203 *target++=(uint8_t)u;
204 oredChars|=u=*source++;
205 *target++=(uint8_t)u;
206 oredChars|=u=*source++;
207 *target++=(uint8_t)u;
208 oredChars|=u=*source++;
209 *target++=(uint8_t)u;
210 oredChars|=u=*source++;
211 *target++=(uint8_t)u;
212 oredChars|=u=*source++;
213 *target++=(uint8_t)u;
214 oredChars|=u=*source++;
215 *target++=(uint8_t)u;
216 oredChars|=u=*source++;
217 *target++=(uint8_t)u;
218 oredChars|=u=*source++;
219 *target++=(uint8_t)u;
220 oredChars|=u=*source++;
221 *target++=(uint8_t)u;
222 oredChars|=u=*source++;
223 *target++=(uint8_t)u;
224
225 /* were all 16 entries really valid? */
226 if(oredChars>max) {
227 /* no, return to the first of these 16 */
228 source-=16;
229 target-=16;
230 break;
231 }
232 } while(--count>0);
233 count=loops-count;
234 targetCapacity-=16*count;
235
236 if(offsets!=NULL__null) {
237 oldTarget+=16*count;
238 while(count>0) {
239 *offsets++=sourceIndex++;
240 *offsets++=sourceIndex++;
241 *offsets++=sourceIndex++;
242 *offsets++=sourceIndex++;
243 *offsets++=sourceIndex++;
244 *offsets++=sourceIndex++;
245 *offsets++=sourceIndex++;
246 *offsets++=sourceIndex++;
247 *offsets++=sourceIndex++;
248 *offsets++=sourceIndex++;
249 *offsets++=sourceIndex++;
250 *offsets++=sourceIndex++;
251 *offsets++=sourceIndex++;
252 *offsets++=sourceIndex++;
253 *offsets++=sourceIndex++;
254 *offsets++=sourceIndex++;
255 --count;
256 }
257 }
258 }
259#endif
260
261 /* conversion loop */
262 c=0;
263 while(targetCapacity>0 && (c=*source++)<=max) {
264 /* convert the Unicode code point */
265 *target++=(uint8_t)c;
266 --targetCapacity;
267 }
268
269 if(c>max) {
270 cp=c;
271 if(!U_IS_SURROGATE(cp)(((cp)&0xfffff800)==0xd800)) {
272 /* callback(unassigned) */
273 } else if(U_IS_SURROGATE_LEAD(cp)(((cp)&0x400)==0)) {
274getTrail:
275 if(source<sourceLimit) {
276 /* test the following code unit */
277 UChar trail=*source;
278 if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
279 ++source;
280 cp=U16_GET_SUPPLEMENTARY(cp, trail)(((UChar32)(cp)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000))
;
281 /* this codepage does not map supplementary code points */
282 /* callback(unassigned) */
283 } else {
284 /* this is an unmatched lead code unit (1st surrogate) */
285 /* callback(illegal) */
286 }
287 } else {
288 /* no more input */
289 cnv->fromUChar32=cp;
290 goto noMoreInput;
291 }
292 } else {
293 /* this is an unmatched trail code unit (2nd surrogate) */
294 /* callback(illegal) */
295 }
296
297 *pErrorCode= U_IS_SURROGATE(cp)(((cp)&0xfffff800)==0xd800) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
298 cnv->fromUChar32=cp;
299 }
300noMoreInput:
301
302 /* set offsets since the start */
303 if(offsets!=NULL__null) {
304 size_t count=target-oldTarget;
305 while(count>0) {
306 *offsets++=sourceIndex++;
307 --count;
308 }
309 }
310
311 if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
312 /* target is full */
313 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
314 }
315
316 /* write back the updated pointers */
317 pArgs->source=source;
318 pArgs->target=(char *)target;
319 pArgs->offsets=offsets;
320}
321
322/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
323static void U_CALLCONV
324ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
325 UConverterToUnicodeArgs *pToUArgs,
326 UErrorCode *pErrorCode) {
327 UConverter *utf8;
328 const uint8_t *source, *sourceLimit;
329 uint8_t *target;
330 int32_t targetCapacity;
331
332 UChar32 c;
333 uint8_t b, t1;
334
335 /* set up the local pointers */
336 utf8=pToUArgs->converter;
337 source=(uint8_t *)pToUArgs->source;
338 sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
339 target=(uint8_t *)pFromUArgs->target;
340 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
341
342 /* get the converter state from the UTF-8 UConverter */
343 if (utf8->toULength > 0) {
344 c=(UChar32)utf8->toUnicodeStatus;
345 } else {
346 c = 0;
347 }
348 if(c!=0 && source<sourceLimit) {
349 if(targetCapacity==0) {
350 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
351 return;
352 } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
353 ++source;
354 *target++=(uint8_t)(((c&3)<<6)|t1);
355 --targetCapacity;
356
357 utf8->toUnicodeStatus=0;
358 utf8->toULength=0;
359 } else {
360 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
361 *pErrorCode=U_USING_DEFAULT_WARNING;
362 return;
363 }
364 }
365
366 /*
367 * Make sure that the last byte sequence before sourceLimit is complete
368 * or runs into a lead byte.
369 * In the conversion loop compare source with sourceLimit only once
370 * per multi-byte character.
371 * For Latin-1, adjust sourceLimit only for 1 trail byte because
372 * the conversion loop handles at most 2-byte sequences.
373 */
374 if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))((uint8_t)((*(sourceLimit-1))-0xc2)<=0x32)) {
375 --sourceLimit;
376 }
377
378 /* conversion loop */
379 while(source<sourceLimit) {
380 if(targetCapacity>0) {
381 b=*source++;
382 if(U8_IS_SINGLE(b)(((b)&0x80)==0)) {
383 /* convert ASCII */
384 *target++=(uint8_t)b;
385 --targetCapacity;
386 } else if( /* handle U+0080..U+00FF inline */
387 b>=0xc2 && b<=0xc3 &&
388 (t1=(uint8_t)(*source-0x80)) <= 0x3f
389 ) {
390 ++source;
391 *target++=(uint8_t)(((b&3)<<6)|t1);
392 --targetCapacity;
393 } else {
394 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
395 pToUArgs->source=(char *)(source-1);
396 pFromUArgs->target=(char *)target;
397 *pErrorCode=U_USING_DEFAULT_WARNING;
398 return;
399 }
400 } else {
401 /* target is full */
402 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
403 break;
404 }
405 }
406
407 /*
408 * The sourceLimit may have been adjusted before the conversion loop
409 * to stop before a truncated sequence.
410 * If so, then collect the truncated sequence now.
411 * For Latin-1, there is at most exactly one lead byte because of the
412 * smaller sourceLimit adjustment logic.
413 */
414 if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
Although the value stored to 'sourceLimit' is used in the enclosing expression, the value is never actually read from 'sourceLimit'
415 utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
416 utf8->toULength=1;
417 utf8->mode=U8_COUNT_BYTES(b)((((b)&0x80)==0) ? 1 : (((uint8_t)((b)-0xc2)<=0x32) ? (
(uint8_t)(b)>=0xe0)+((uint8_t)(b)>=0xf0)+2 : 0))
;
418 }
419
420 /* write back the updated pointers */
421 pToUArgs->source=(char *)source;
422 pFromUArgs->target=(char *)target;
423}
424
425static void U_CALLCONV
426_Latin1GetUnicodeSet(const UConverter *cnv,
427 const USetAdder *sa,
428 UConverterUnicodeSet which,
429 UErrorCode *pErrorCode) {
430 (void)cnv;
431 (void)which;
432 (void)pErrorCode;
433 sa->addRange(sa->set, 0, 0xff);
434}
435U_CDECL_END}
436
437
438static const UConverterImpl _Latin1Impl={
439 UCNV_LATIN_1,
440
441 NULL__null,
442 NULL__null,
443
444 NULL__null,
445 NULL__null,
446 NULL__null,
447
448 _Latin1ToUnicodeWithOffsets,
449 _Latin1ToUnicodeWithOffsets,
450 _Latin1FromUnicodeWithOffsets,
451 _Latin1FromUnicodeWithOffsets,
452 _Latin1GetNextUChar,
453
454 NULL__null,
455 NULL__null,
456 NULL__null,
457 NULL__null,
458 _Latin1GetUnicodeSet,
459
460 NULL__null,
461 ucnv_Latin1FromUTF8
462};
463
464static const UConverterStaticData _Latin1StaticData={
465 sizeof(UConverterStaticData),
466 "ISO-8859-1",
467 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
468 { 0x1a, 0, 0, 0 }, 1, FALSE0, FALSE0,
469 0,
470 0,
471 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
472};
473
474const UConverterSharedData _Latin1Data_Latin1Data_71=
475 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, &
_Latin1StaticData, false, false, &_Latin1Impl, 0, { 0, 0,
0, 0, __null, __null, __null, __null, __null, __null, { 0 },
__null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null
, __null } }
;
476
477/* US-ASCII ----------------------------------------------------------------- */
478
479U_CDECL_BEGINextern "C" {
480/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
481static void U_CALLCONV
482_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
483 UErrorCode *pErrorCode) {
484 const uint8_t *source, *sourceLimit;
485 UChar *target, *oldTarget;
486 int32_t targetCapacity, length;
487 int32_t *offsets;
488
489 int32_t sourceIndex;
490
491 uint8_t c;
492
493 /* set up the local pointers */
494 source=(const uint8_t *)pArgs->source;
495 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
496 target=oldTarget=pArgs->target;
497 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
498 offsets=pArgs->offsets;
499
500 /* sourceIndex=-1 if the current character began in the previous buffer */
501 sourceIndex=0;
502
503 /*
504 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
505 * for the minimum of the sourceLength and targetCapacity
506 */
507 length=(int32_t)(sourceLimit-source);
508 if(length<targetCapacity) {
509 targetCapacity=length;
510 }
511
512 if(targetCapacity>=8) {
513 /* This loop is unrolled for speed and improved pipelining. */
514 int32_t count, loops;
515 UChar oredChars;
516
517 loops=count=targetCapacity>>3;
518 do {
519 oredChars=target[0]=source[0];
520 oredChars|=target[1]=source[1];
521 oredChars|=target[2]=source[2];
522 oredChars|=target[3]=source[3];
523 oredChars|=target[4]=source[4];
524 oredChars|=target[5]=source[5];
525 oredChars|=target[6]=source[6];
526 oredChars|=target[7]=source[7];
527
528 /* were all 16 entries really valid? */
529 if(oredChars>0x7f) {
530 /* no, return to the first of these 16 */
531 break;
532 }
533 source+=8;
534 target+=8;
535 } while(--count>0);
536 count=loops-count;
537 targetCapacity-=count*8;
538
539 if(offsets!=NULL__null) {
540 oldTarget+=count*8;
541 while(count>0) {
542 offsets[0]=sourceIndex++;
543 offsets[1]=sourceIndex++;
544 offsets[2]=sourceIndex++;
545 offsets[3]=sourceIndex++;
546 offsets[4]=sourceIndex++;
547 offsets[5]=sourceIndex++;
548 offsets[6]=sourceIndex++;
549 offsets[7]=sourceIndex++;
550 offsets+=8;
551 --count;
552 }
553 }
554 }
555
556 /* conversion loop */
557 c=0;
558 while(targetCapacity>0 && (c=*source++)<=0x7f) {
559 *target++=c;
560 --targetCapacity;
561 }
562
563 if(c>0x7f) {
564 /* callback(illegal); copy the current bytes to toUBytes[] */
565 UConverter *cnv=pArgs->converter;
566 cnv->toUBytes[0]=c;
567 cnv->toULength=1;
568 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
569 } else if(source<sourceLimit && target>=pArgs->targetLimit) {
570 /* target is full */
571 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
572 }
573
574 /* set offsets since the start */
575 if(offsets!=NULL__null) {
576 size_t count=target-oldTarget;
577 while(count>0) {
578 *offsets++=sourceIndex++;
579 --count;
580 }
581 }
582
583 /* write back the updated pointers */
584 pArgs->source=(const char *)source;
585 pArgs->target=target;
586 pArgs->offsets=offsets;
587}
588
589/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
590static UChar32 U_CALLCONV
591_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
592 UErrorCode *pErrorCode) {
593 const uint8_t *source;
594 uint8_t b;
595
596 source=(const uint8_t *)pArgs->source;
597 if(source<(const uint8_t *)pArgs->sourceLimit) {
598 b=*source++;
599 pArgs->source=(const char *)source;
600 if(b<=0x7f) {
601 return b;
602 } else {
603 UConverter *cnv=pArgs->converter;
604 cnv->toUBytes[0]=b;
605 cnv->toULength=1;
606 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
607 return 0xffff;
608 }
609 }
610
611 /* no output because of empty input */
612 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613 return 0xffff;
614}
615
616/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
617static void U_CALLCONV
618ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
619 UConverterToUnicodeArgs *pToUArgs,
620 UErrorCode *pErrorCode) {
621 const uint8_t *source, *sourceLimit;
622 uint8_t *target;
623 int32_t targetCapacity, length;
624
625 uint8_t c;
626
627 if(pToUArgs->converter->toULength > 0) {
628 /* no handling of partial UTF-8 characters here, fall back to pivoting */
629 *pErrorCode=U_USING_DEFAULT_WARNING;
630 return;
631 }
632
633 /* set up the local pointers */
634 source=(const uint8_t *)pToUArgs->source;
635 sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
636 target=(uint8_t *)pFromUArgs->target;
637 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
638
639 /*
640 * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
641 * for the minimum of the sourceLength and targetCapacity
642 */
643 length=(int32_t)(sourceLimit-source);
644 if(length<targetCapacity) {
645 targetCapacity=length;
646 }
647
648 /* unroll the loop with the most common case */
649 if(targetCapacity>=16) {
650 int32_t count, loops;
651 uint8_t oredChars;
652
653 loops=count=targetCapacity>>4;
654 do {
655 oredChars=*target++=*source++;
656 oredChars|=*target++=*source++;
657 oredChars|=*target++=*source++;
658 oredChars|=*target++=*source++;
659 oredChars|=*target++=*source++;
660 oredChars|=*target++=*source++;
661 oredChars|=*target++=*source++;
662 oredChars|=*target++=*source++;
663 oredChars|=*target++=*source++;
664 oredChars|=*target++=*source++;
665 oredChars|=*target++=*source++;
666 oredChars|=*target++=*source++;
667 oredChars|=*target++=*source++;
668 oredChars|=*target++=*source++;
669 oredChars|=*target++=*source++;
670 oredChars|=*target++=*source++;
671
672 /* were all 16 entries really valid? */
673 if(oredChars>0x7f) {
674 /* no, return to the first of these 16 */
675 source-=16;
676 target-=16;
677 break;
678 }
679 } while(--count>0);
680 count=loops-count;
681 targetCapacity-=16*count;
682 }
683
684 /* conversion loop */
685 c=0;
686 while(targetCapacity>0 && (c=*source)<=0x7f) {
687 ++source;
688 *target++=c;
689 --targetCapacity;
690 }
691
692 if(c>0x7f) {
693 /* non-ASCII character, handle in standard converter */
694 *pErrorCode=U_USING_DEFAULT_WARNING;
695 } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
696 /* target is full */
697 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
698 }
699
700 /* write back the updated pointers */
701 pToUArgs->source=(const char *)source;
702 pFromUArgs->target=(char *)target;
703}
704
705static void U_CALLCONV
706_ASCIIGetUnicodeSet(const UConverter *cnv,
707 const USetAdder *sa,
708 UConverterUnicodeSet which,
709 UErrorCode *pErrorCode) {
710 (void)cnv;
711 (void)which;
712 (void)pErrorCode;
713 sa->addRange(sa->set, 0, 0x7f);
714}
715U_CDECL_END}
716
717static const UConverterImpl _ASCIIImpl={
718 UCNV_US_ASCII,
719
720 NULL__null,
721 NULL__null,
722
723 NULL__null,
724 NULL__null,
725 NULL__null,
726
727 _ASCIIToUnicodeWithOffsets,
728 _ASCIIToUnicodeWithOffsets,
729 _Latin1FromUnicodeWithOffsets,
730 _Latin1FromUnicodeWithOffsets,
731 _ASCIIGetNextUChar,
732
733 NULL__null,
734 NULL__null,
735 NULL__null,
736 NULL__null,
737 _ASCIIGetUnicodeSet,
738
739 NULL__null,
740 ucnv_ASCIIFromUTF8
741};
742
743static const UConverterStaticData _ASCIIStaticData={
744 sizeof(UConverterStaticData),
745 "US-ASCII",
746 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
747 { 0x1a, 0, 0, 0 }, 1, FALSE0, FALSE0,
748 0,
749 0,
750 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
751};
752
753const UConverterSharedData _ASCIIData_ASCIIData_71=
754 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, &
_ASCIIStaticData, false, false, &_ASCIIImpl, 0, { 0, 0, 0
, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null
, __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null
} }
;
755
756#endif