../deps/icu-small/source/common/ucnvmbcs.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/ucnvmbcs.cpp
Warning:	line 3609, column 21 Value stored to 'targetCapacity' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnvmbcs.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ucnvmbcs.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 2000-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	* file name: ucnvmbcs.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2000jul03
16	* created by: Markus W. Scherer
17	*
18	* The current code in this file replaces the previous implementation
19	* of conversion code from multi-byte codepages to Unicode and back.
20	* This implementation supports the following:
21	* - legacy variable-length codepages with up to 4 bytes per character
22	* - all Unicode code points (up to 0x10ffff)
23	* - efficient distinction of unassigned vs. illegal byte sequences
24	* - it is possible in fromUnicode() to directly deal with simple
25	* stateful encodings (used for EBCDIC_STATEFUL)
26	* - it is possible to convert Unicode code points
27	* to a single zero byte (but not as a fallback except for SBCS)
28	*
29	* Remaining limitations in fromUnicode:
30	* - byte sequences must not have leading zero bytes
31	* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
32	* - limitation to up to 4 bytes per character
33	*
34	* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
35	* limitations and adds m:n character mappings and other features.
36	* See ucnv_ext.h for details.
37	*
38	* Change history:
39	*
40	* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
41	* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
42	* macros to ucnvmbcs.h file
43	*/
44
45	#include "unicode/utypes.h"
46
47	#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_NO_LEGACY_CONVERSION0
48
49	#include "unicode/ucnv.h"
50	#include "unicode/ucnv_cb.h"
51	#include "unicode/udata.h"
52	#include "unicode/uset.h"
53	#include "unicode/utf8.h"
54	#include "unicode/utf16.h"
55	#include "ucnv_bld.h"
56	#include "ucnvmbcs.h"
57	#include "ucnv_ext.h"
58	#include "ucnv_cnv.h"
59	#include "cmemory.h"
60	#include "cstring.h"
61	#include "umutex.h"
62	#include "ustr_imp.h"
63
64	/* control optimizations according to the platform */
65	#define MBCS_UNROLL_SINGLE_TO_BMP1 1
66	#define MBCS_UNROLL_SINGLE_FROM_BMP0 0
67
68	/*
69	* _MBCSHeader versions 5.3 & 4.3
70	* (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
71	*
72	* This version is optional. Version 5 is used for incompatible data format changes.
73	* makeconv will continue to generate version 4 files if possible.
74	*
75	* Changes from version 4:
76	*
77	* The main difference is an additional _MBCSHeader field with
78	* - the length (number of uint32_t) of the _MBCSHeader
79	* - flags for further incompatible data format changes
80	* - flags for further, backward compatible data format changes
81	*
82	* The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
83	* the file and needs to be reconstituted at load time.
84	* This requires a utf8Friendly format with an additional mbcsIndex table for fast
85	* (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
86	* (For details about these structures see below, and see ucnvmbcs.h.)
87	*
88	* utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
89	* of the Unicode code points. (This requires that the .ucm file has the \|0 etc.
90	* precision markers for all mappings.)
91	*
92	* All fallbacks have been moved to the extension table, leaving only roundtrips in the
93	* omitted data that can be reconstituted from the toUnicode data.
94	*
95	* Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
96	* With only roundtrip mappings in the base fromUnicode data, this part is fully
97	* redundant with the mbcsIndex and will be reconstituted from that (also using the
98	* stage 1 table which contains the information about how stage 2 was compacted).
99	*
100	* The rest of the stage 2 table, the part for code points above maxFastUChar,
101	* is stored in the file and will be appended to the reconstituted part.
102	*
103	* The entire fromUBytes array is omitted from the file and will be reconstitued.
104	* This is done by enumerating all toUnicode roundtrip mappings, performing
105	* each mapping (using the stage 1 and reconstituted stage 2 tables) and
106	* writing instead of reading the byte values.
107	*
108	* _MBCSHeader version 4.3
109	*
110	* Change from version 4.2:
111	* - Optional utf8Friendly data structures, with 64-entry stage 3 block
112	* allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
113	* files which can be used instead of stages 1 & 2.
114	* Faster lookups for roundtrips from most commonly used characters,
115	* and lookups from UTF-8 byte sequences with a natural bit distribution.
116	* See ucnvmbcs.h for more details.
117	*
118	* Change from version 4.1:
119	* - Added an optional extension table structure at the end of the .cnv file.
120	* It is present if the upper bits of the header flags field contains a non-zero
121	* byte offset to it.
122	* Files that contain only a conversion table and no base table
123	* use the special outputType MBCS_OUTPUT_EXT_ONLY.
124	* These contain the base table name between the MBCS header and the extension
125	* data.
126	*
127	* Change from version 4.0:
128	* - Replace header.reserved with header.fromUBytesLength so that all
129	* fields in the data have length.
130	*
131	* Changes from version 3 (for performance improvements):
132	* - new bit distribution for state table entries
133	* - reordered action codes
134	* - new data structure for single-byte fromUnicode
135	* + stage 2 only contains indexes
136	* + stage 3 stores 16 bits per character with classification bits 15..8
137	* - no multiplier for stage 1 entries
138	* - stage 2 for non-single-byte codepages contains the index and the flags in
139	* one 32-bit value
140	* - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
141	*
142	* For more details about old versions of the MBCS data structure, see
143	* the corresponding versions of this file.
144	*
145	* Converting stateless codepage data ---------------------------------------***
146	* (or codepage data with simple states) to Unicode.
147	*
148	* Data structure and algorithm for converting from complex legacy codepages
149	* to Unicode. (Designed before 2000-may-22.)
150	*
151	* The basic idea is that the structure of legacy codepages can be described
152	* with state tables.
153	* When reading a byte stream, each input byte causes a state transition.
154	* Some transitions result in the output of a code point, some result in
155	* "unassigned" or "illegal" output.
156	* This is used here for character conversion.
157	*
158	* The data structure begins with a state table consisting of a row
159	* per state, with 256 entries (columns) per row for each possible input
160	* byte value.
161	* Each entry is 32 bits wide, with two formats distinguished by
162	* the sign bit (bit 31):
163	*
164	* One format for transitional entries (bit 31 not set) for non-final bytes, and
165	* one format for final entries (bit 31 set).
166	* Both formats contain the number of the next state in the same bit
167	* positions.
168	* State 0 is the initial state.
169	*
170	* Most of the time, the offset values of subsequent states are added
171	* up to a scalar value. This value will eventually be the index of
172	* the Unicode code point in a table that follows the state table.
173	* The effect is that the code points for final state table rows
174	* are contiguous. The code points of final state rows follow each other
175	* in the order of the references to those final states by previous
176	* states, etc.
177	*
178	* For some terminal states, the offset is itself the output Unicode
179	* code point (16 bits for a BMP code point or 20 bits for a supplementary
180	* code point (stored as code point minus 0x10000 so that 20 bits are enough).
181	* For others, the code point in the Unicode table is stored with either
182	* one or two code units: one for BMP code points, two for a pair of
183	* surrogates.
184	* All code points for a final state entry take up the same number of code
185	* units, regardless of whether they all actually _use_ the same number
186	* of code units. This is necessary for simple array access.
187	*
188	* An additional feature comes in with what in ICU is called "fallback"
189	* mappings:
190	*
191	* In addition to round-trippable, precise, 1:1 mappings, there are often
192	* mappings defined between similar, though not the same, characters.
193	* Typically, such mappings occur only in fromUnicode mapping tables because
194	* Unicode has a superset repertoire of most other codepages. However, it
195	* is possible to provide such mappings in the toUnicode tables, too.
196	* In this case, the fallback mappings are partly integrated into the
197	* general state tables because the structure of the encoding includes their
198	* byte sequences.
199	* For final entries in an initial state, fallback mappings are stored in
200	* the entry itself like with roundtrip mappings.
201	* For other final entries, they are stored in the code units table if
202	* the entry is for a pair of code units.
203	* For single-unit results in the code units table, there is no space to
204	* alternatively hold a fallback mapping; in this case, the code unit
205	* is stored as U+fffe (unassigned), and the fallback mapping needs to
206	* be looked up by the scalar offset value in a separate table.
207	*
208	* "Unassigned" state entries really mean "structurally unassigned",
209	* i.e., such a byte sequence will never have a mapping result.
210	*
211	* The interpretation of the bits in each entry is as follows:
212	*
213	* Bit 31 not set, not a terminal entry ("transitional"):
214	* 30..24 next state
215	* 23..0 offset delta, to be added up
216	*
217	* Bit 31 set, terminal ("final") entry:
218	* 30..24 next state (regardless of action code)
219	* 23..20 action code:
220	* action codes 0 and 1 result in precise-mapping Unicode code points
221	* 0 valid byte sequence
222	* 19..16 not used, 0
223	* 15..0 16-bit Unicode BMP code point
224	* never U+fffe or U+ffff
225	* 1 valid byte sequence
226	* 19..0 20-bit Unicode supplementary code point
227	* never U+fffe or U+ffff
228	*
229	* action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
230	* 2 valid byte sequence (fallback)
231	* 19..16 not used, 0
232	* 15..0 16-bit Unicode BMP code point as fallback result
233	* 3 valid byte sequence (fallback)
234	* 19..0 20-bit Unicode supplementary code point as fallback result
235	*
236	* action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
237	* depending on the code units they result in
238	* 4 valid byte sequence
239	* 19..9 not used, 0
240	* 8..0 final offset delta
241	* pointing to one 16-bit code unit which may be
242	* fffe unassigned -- look for a fallback for this offset
243	* ffff illegal
244	* 5 valid byte sequence
245	* 19..9 not used, 0
246	* 8..0 final offset delta
247	* pointing to two 16-bit code units
248	* (typically UTF-16 surrogates)
249	* the result depends on the first code unit as follows:
250	* 0000..d7ff roundtrip BMP code point (1st alone)
251	* d800..dbff roundtrip surrogate pair (1st, 2nd)
252	* dc00..dfff fallback surrogate pair (1st-400, 2nd)
253	* e000 roundtrip BMP code point (2nd alone)
254	* e001 fallback BMP code point (2nd alone)
255	* fffe unassigned
256	* ffff illegal
257	* (the final offset deltas are at most 255 * 2,
258	* times 2 because of storing code unit pairs)
259	*
260	* 6 unassigned byte sequence
261	* 19..16 not used, 0
262	* 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
263	* this does not contain a final offset delta because the main
264	* purpose of this action code is to save scalar offset values;
265	* therefore, fallback values cannot be assigned to byte
266	* sequences that result in this action code
267	* 7 illegal byte sequence
268	* 19..16 not used, 0
269	* 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
270	* 8 state change only
271	* 19..0 not used, 0
272	* useful for state changes in simple stateful encodings,
273	* at Shift-In/Shift-Out codes
274	*
275	*
276	* 9..15 reserved for future use
277	* current implementations will only perform a state change
278	* and ignore bits 19..0
279	*
280	* An encoding with contiguous ranges of unassigned byte sequences, like
281	* Shift-JIS and especially EUC-TW, can be stored efficiently by having
282	* at least two states for the trail bytes:
283	* One trail byte state that results in code points, and one that only
284	* has "unassigned" and "illegal" terminal states.
285	*
286	* Note: partly by accident, this data structure supports simple stateful
287	* encodings without any additional logic.
288	* Currently, only simple Shift-In/Shift-Out schemes are handled with
289	* appropriate state tables (especially EBCDIC_STATEFUL!).
290	*
291	* MBCS version 2 added:
292	* unassigned and illegal action codes have U+fffe and U+ffff
293	* instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
294	*
295	* Converting from Unicode to codepage bytes --------------------------------***
296	*
297	* The conversion data structure for fromUnicode is designed for the known
298	* structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
299	* a sequence of 1..4 bytes, in addition to a flag that indicates if there is
300	* a roundtrip mapping.
301	*
302	* The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
303	* like in the character properties table.
304	* The beginning of the trie is at offsetFromUTable, the beginning of stage 3
305	* with the resulting bytes is at offsetFromUBytes.
306	*
307	* Beginning with version 4, single-byte codepages have a significantly different
308	* trie compared to other codepages.
309	* In all cases, the entry in stage 1 is directly the index of the block of
310	* 64 entries in stage 2.
311	*
312	* Single-byte lookup:
313	*
314	* Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
315	* Stage 3 contains one 16-bit word per result:
316	* Bits 15..8 indicate the kind of result:
317	* f roundtrip result
318	* c fallback result from private-use code point
319	* 8 fallback result from other code points
320	* 0 unassigned
321	* Bits 7..0 contain the codepage byte. A zero byte is always possible.
322	*
323	* In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
324	* file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
325	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
326	* ASCII code points can be looked up with a linear array access into stage 3.
327	* See maxFastUChar and other details in ucnvmbcs.h.
328	*
329	* Multi-byte lookup:
330	*
331	* Stage 2 contains a 32-bit word for each 16-block in stage 3:
332	* Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
333	* test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
334	* If this test is false, then a non-zero result will be interpreted as
335	* a fallback mapping.
336	* Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
337	*
338	* Stage 3 contains 2, 3, or 4 bytes per result.
339	* 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
340	* while 3 bytes are stored as bytes in big-endian order.
341	* Leading zero bytes are ignored, and the number of bytes is counted.
342	* A zero byte mapping result is possible as a roundtrip result.
343	* For some output types, the actual result is processed from this;
344	* see ucnv_MBCSFromUnicodeWithOffsets().
345	*
346	* Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
347	* or (version 3 and up) for BMP-only codepages, it contains 64 entries.
348	*
349	* In version 4.3, a utf8Friendly file contains an mbcsIndex table.
350	* For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
351	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
352	* ASCII code points can be looked up with a linear array access into stage 3.
353	* See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
354	*
355	* In version 3, stage 2 blocks may overlap by multiples of the multiplier
356	* for compaction.
357	* In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
358	* may overlap by any number of entries.
359	*
360	* MBCS version 2 added:
361	* the converter checks for known output types, which allows
362	* adding new ones without crashing an unaware converter
363	*/
364
365	/**
366	* Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
367	* consecutive sequences of bytes, starting from the one encoded in value,
368	* to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
369	* Does not currently support m:n mappings or reverse fallbacks.
370	* This function will not be called for sequences of bytes with leading zeros.
371	*
372	* @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
373	* @param value contains 1..4 bytes of the first byte sequence, right-aligned
374	* @param codePoints resulting Unicode code points, or negative if a byte sequence does
375	* not map to anything
376	* @return TRUE to continue enumeration, FALSE to stop
377	*/
378	typedef UBool U_CALLCONV
379	UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
380
381	static void U_CALLCONV
382	ucnv_MBCSLoad(UConverterSharedData *sharedData,
383	UConverterLoadArgs *pArgs,
384	const uint8_t *raw,
385	UErrorCode *pErrorCode);
386
387	static void U_CALLCONV
388	ucnv_MBCSUnload(UConverterSharedData *sharedData);
389
390	static void U_CALLCONV
391	ucnv_MBCSOpen(UConverter *cnv,
392	UConverterLoadArgs *pArgs,
393	UErrorCode *pErrorCode);
394
395	static UChar32 U_CALLCONV
396	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
397	UErrorCode *pErrorCode);
398
399	static void U_CALLCONV
400	ucnv_MBCSGetStarters(const UConverter* cnv,
401	UBool starters[256],
402	UErrorCode *pErrorCode);
403
404	U_CDECL_BEGINextern "C" {
405	static const char* U_CALLCONV
406	ucnv_MBCSGetName(const UConverter *cnv);
407	U_CDECL_END}
408
409	static void U_CALLCONV
410	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
411	int32_t offsetIndex,
412	UErrorCode *pErrorCode);
413
414	static UChar32 U_CALLCONV
415	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
416	UErrorCode *pErrorCode);
417
418	static void U_CALLCONV
419	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
420	UConverterToUnicodeArgs *pToUArgs,
421	UErrorCode *pErrorCode);
422
423	static void U_CALLCONV
424	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
425	const USetAdder *sa,
426	UConverterUnicodeSet which,
427	UErrorCode *pErrorCode);
428
429	static void U_CALLCONV
430	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
431	UConverterToUnicodeArgs *pToUArgs,
432	UErrorCode *pErrorCode);
433
434	static const UConverterImpl _SBCSUTF8Impl={
435	UCNV_MBCS,
436
437	ucnv_MBCSLoad,
438	ucnv_MBCSUnload,
439
440	ucnv_MBCSOpen,
441	NULL__null,
442	NULL__null,
443
444	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
445	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
446	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
447	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
448	ucnv_MBCSGetNextUChar,
449
450	ucnv_MBCSGetStarters,
451	ucnv_MBCSGetName,
452	ucnv_MBCSWriteSub,
453	NULL__null,
454	ucnv_MBCSGetUnicodeSet,
455
456	NULL__null,
457	ucnv_SBCSFromUTF8
458	};
459
460	static const UConverterImpl _DBCSUTF8Impl={
461	UCNV_MBCS,
462
463	ucnv_MBCSLoad,
464	ucnv_MBCSUnload,
465
466	ucnv_MBCSOpen,
467	NULL__null,
468	NULL__null,
469
470	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
471	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
472	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
473	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
474	ucnv_MBCSGetNextUChar,
475
476	ucnv_MBCSGetStarters,
477	ucnv_MBCSGetName,
478	ucnv_MBCSWriteSub,
479	NULL__null,
480	ucnv_MBCSGetUnicodeSet,
481
482	NULL__null,
483	ucnv_DBCSFromUTF8
484	};
485
486	static const UConverterImpl _MBCSImpl={
487	UCNV_MBCS,
488
489	ucnv_MBCSLoad,
490	ucnv_MBCSUnload,
491
492	ucnv_MBCSOpen,
493	NULL__null,
494	NULL__null,
495
496	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
497	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71,
498	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
499	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71,
500	ucnv_MBCSGetNextUChar,
501
502	ucnv_MBCSGetStarters,
503	ucnv_MBCSGetName,
504	ucnv_MBCSWriteSub,
505	NULL__null,
506	ucnv_MBCSGetUnicodeSet,
507	NULL__null,
508	NULL__null
509	};
510
511	/* Static data is in tools/makeconv/ucnvstat.c for data-based
512	* converters. Be sure to update it as well.
513	*/
514
515	const UConverterSharedData _MBCSData_MBCSData_71={
516	sizeof(UConverterSharedData), 1,
517	NULL__null, NULL__null, FALSE0, TRUE1, &_MBCSImpl,
518	0, UCNV_MBCS_TABLE_INITIALIZER{ 0, 0, 0, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null }
519	};
520
521
522	/* GB 18030 data ------------------------------------------------------------ */
523
524	/* helper macros for linear values for GB 18030 four-byte sequences */
525	#define LINEAR_18030(a, b, c, d)((((a)10+(b))126L+(c))10L+(d)) ((((a)10+(b))126L+(c))10L+(d))
526
527	#define LINEAR_18030_BASE((((0x81)10+(0x30))126L+(0x81))10L+(0x30)) LINEAR_18030(0x81, 0x30, 0x81, 0x30)((((0x81)10+(0x30))126L+(0x81))10L+(0x30))
528
529	#define LINEAR(x)((((x>>24)10+((x>>16)&0xff))126L+((x>> 8)&0xff))10L+(x&0xff)) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)((((x>>24)10+((x>>16)&0xff))126L+((x>> 8)&0xff))10L+(x&0xff))
530
531	/*
532	* Some ranges of GB 18030 where both the Unicode code points and the
533	* GB four-byte sequences are contiguous and are handled algorithmically by
534	* the special callback functions below.
535	* The values are start & end of Unicode & GB codes.
536	*
537	* Note that single surrogates are not mapped by GB 18030
538	* as of the re-released mapping tables from 2000-nov-30.
539	*/
540	static const uint32_t
541	gb18030Ranges[14][4]={
542	{0x10000, 0x10FFFF, LINEAR(0x90308130)((((0x90308130>>24)10+((0x90308130>>16)&0xff ))126L+((0x90308130>>8)&0xff))10L+(0x90308130& 0xff)), LINEAR(0xE3329A35)((((0xE3329A35>>24)10+((0xE3329A35>>16)&0xff ))126L+((0xE3329A35>>8)&0xff))10L+(0xE3329A35& 0xff))},
543	{0x9FA6, 0xD7FF, LINEAR(0x82358F33)((((0x82358F33>>24)10+((0x82358F33>>16)&0xff ))126L+((0x82358F33>>8)&0xff))10L+(0x82358F33& 0xff)), LINEAR(0x8336C738)((((0x8336C738>>24)10+((0x8336C738>>16)&0xff ))126L+((0x8336C738>>8)&0xff))10L+(0x8336C738& 0xff))},
544	{0x0452, 0x1E3E, LINEAR(0x8130D330)((((0x8130D330>>24)10+((0x8130D330>>16)&0xff ))126L+((0x8130D330>>8)&0xff))10L+(0x8130D330& 0xff)), LINEAR(0x8135F436)((((0x8135F436>>24)10+((0x8135F436>>16)&0xff ))126L+((0x8135F436>>8)&0xff))10L+(0x8135F436& 0xff))},
545	{0x1E40, 0x200F, LINEAR(0x8135F438)((((0x8135F438>>24)10+((0x8135F438>>16)&0xff ))126L+((0x8135F438>>8)&0xff))10L+(0x8135F438& 0xff)), LINEAR(0x8136A531)((((0x8136A531>>24)10+((0x8136A531>>16)&0xff ))126L+((0x8136A531>>8)&0xff))10L+(0x8136A531& 0xff))},
546	{0xE865, 0xF92B, LINEAR(0x8336D030)((((0x8336D030>>24)10+((0x8336D030>>16)&0xff ))126L+((0x8336D030>>8)&0xff))10L+(0x8336D030& 0xff)), LINEAR(0x84308534)((((0x84308534>>24)10+((0x84308534>>16)&0xff ))126L+((0x84308534>>8)&0xff))10L+(0x84308534& 0xff))},
547	{0x2643, 0x2E80, LINEAR(0x8137A839)((((0x8137A839>>24)10+((0x8137A839>>16)&0xff ))126L+((0x8137A839>>8)&0xff))10L+(0x8137A839& 0xff)), LINEAR(0x8138FD38)((((0x8138FD38>>24)10+((0x8138FD38>>16)&0xff ))126L+((0x8138FD38>>8)&0xff))10L+(0x8138FD38& 0xff))},
548	{0xFA2A, 0xFE2F, LINEAR(0x84309C38)((((0x84309C38>>24)10+((0x84309C38>>16)&0xff ))126L+((0x84309C38>>8)&0xff))10L+(0x84309C38& 0xff)), LINEAR(0x84318537)((((0x84318537>>24)10+((0x84318537>>16)&0xff ))126L+((0x84318537>>8)&0xff))10L+(0x84318537& 0xff))},
549	{0x3CE1, 0x4055, LINEAR(0x8231D438)((((0x8231D438>>24)10+((0x8231D438>>16)&0xff ))126L+((0x8231D438>>8)&0xff))10L+(0x8231D438& 0xff)), LINEAR(0x8232AF32)((((0x8232AF32>>24)10+((0x8232AF32>>16)&0xff ))126L+((0x8232AF32>>8)&0xff))10L+(0x8232AF32& 0xff))},
550	{0x361B, 0x3917, LINEAR(0x8230A633)((((0x8230A633>>24)10+((0x8230A633>>16)&0xff ))126L+((0x8230A633>>8)&0xff))10L+(0x8230A633& 0xff)), LINEAR(0x8230F237)((((0x8230F237>>24)10+((0x8230F237>>16)&0xff ))126L+((0x8230F237>>8)&0xff))10L+(0x8230F237& 0xff))},
551	{0x49B8, 0x4C76, LINEAR(0x8234A131)((((0x8234A131>>24)10+((0x8234A131>>16)&0xff ))126L+((0x8234A131>>8)&0xff))10L+(0x8234A131& 0xff)), LINEAR(0x8234E733)((((0x8234E733>>24)10+((0x8234E733>>16)&0xff ))126L+((0x8234E733>>8)&0xff))10L+(0x8234E733& 0xff))},
552	{0x4160, 0x4336, LINEAR(0x8232C937)((((0x8232C937>>24)10+((0x8232C937>>16)&0xff ))126L+((0x8232C937>>8)&0xff))10L+(0x8232C937& 0xff)), LINEAR(0x8232F837)((((0x8232F837>>24)10+((0x8232F837>>16)&0xff ))126L+((0x8232F837>>8)&0xff))10L+(0x8232F837& 0xff))},
553	{0x478E, 0x4946, LINEAR(0x8233E838)((((0x8233E838>>24)10+((0x8233E838>>16)&0xff ))126L+((0x8233E838>>8)&0xff))10L+(0x8233E838& 0xff)), LINEAR(0x82349638)((((0x82349638>>24)10+((0x82349638>>16)&0xff ))126L+((0x82349638>>8)&0xff))10L+(0x82349638& 0xff))},
554	{0x44D7, 0x464B, LINEAR(0x8233A339)((((0x8233A339>>24)10+((0x8233A339>>16)&0xff ))126L+((0x8233A339>>8)&0xff))10L+(0x8233A339& 0xff)), LINEAR(0x8233C931)((((0x8233C931>>24)10+((0x8233C931>>16)&0xff ))126L+((0x8233C931>>8)&0xff))10L+(0x8233C931& 0xff))},
555	{0xFFE6, 0xFFFF, LINEAR(0x8431A234)((((0x8431A234>>24)10+((0x8431A234>>16)&0xff ))126L+((0x8431A234>>8)&0xff))10L+(0x8431A234& 0xff)), LINEAR(0x8431A439)((((0x8431A439>>24)10+((0x8431A439>>16)&0xff ))126L+((0x8431A439>>8)&0xff))10L+(0x8431A439& 0xff))}
556	};
557
558	/* bit flag for UConverter.options indicating GB 18030 special handling */
559	#define _MBCS_OPTION_GB180300x8000 0x8000
560
561	/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
562	#define _MBCS_OPTION_KEIS0x01000 0x01000
563	#define _MBCS_OPTION_JEF0x02000 0x02000
564	#define _MBCS_OPTION_JIPS0x04000 0x04000
565
566	#define KEIS_SO_CHAR_10x0A 0x0A
567	#define KEIS_SO_CHAR_20x42 0x42
568	#define KEIS_SI_CHAR_10x0A 0x0A
569	#define KEIS_SI_CHAR_20x41 0x41
570
571	#define JEF_SO_CHAR0x28 0x28
572	#define JEF_SI_CHAR0x29 0x29
573
574	#define JIPS_SO_CHAR_10x1A 0x1A
575	#define JIPS_SO_CHAR_20x70 0x70
576	#define JIPS_SI_CHAR_10x1A 0x1A
577	#define JIPS_SI_CHAR_20x71 0x71
578
579	enum SISO_Option {
580	SI,
581	SO
582	};
583	typedef enum SISO_Option SISO_Option;
584
585	static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
586	int32_t SISOLength = 0;
587
588	switch (option) {
589	case SI:
590	if ((cnvOption&_MBCS_OPTION_KEIS0x01000)!=0) {
591	value[0] = KEIS_SI_CHAR_10x0A;
592	value[1] = KEIS_SI_CHAR_20x41;
593	SISOLength = 2;
594	} else if ((cnvOption&_MBCS_OPTION_JEF0x02000)!=0) {
595	value[0] = JEF_SI_CHAR0x29;
596	SISOLength = 1;
597	} else if ((cnvOption&_MBCS_OPTION_JIPS0x04000)!=0) {
598	value[0] = JIPS_SI_CHAR_10x1A;
599	value[1] = JIPS_SI_CHAR_20x71;
600	SISOLength = 2;
601	} else {
602	value[0] = UCNV_SI0x0F;
603	SISOLength = 1;
604	}
605	break;
606	case SO:
607	if ((cnvOption&_MBCS_OPTION_KEIS0x01000)!=0) {
608	value[0] = KEIS_SO_CHAR_10x0A;
609	value[1] = KEIS_SO_CHAR_20x42;
610	SISOLength = 2;
611	} else if ((cnvOption&_MBCS_OPTION_JEF0x02000)!=0) {
612	value[0] = JEF_SO_CHAR0x28;
613	SISOLength = 1;
614	} else if ((cnvOption&_MBCS_OPTION_JIPS0x04000)!=0) {
615	value[0] = JIPS_SO_CHAR_10x1A;
616	value[1] = JIPS_SO_CHAR_20x70;
617	SISOLength = 2;
618	} else {
619	value[0] = UCNV_SO0x0E;
620	SISOLength = 1;
621	}
622	break;
623	default:
624	/* Should never happen. */
625	break;
626	}
627
628	return SISOLength;
629	}
630
631	/* Miscellaneous ------------------------------------------------------------ */
632
633	/* similar to ucnv_MBCSGetNextUChar() but recursive */
634	static UBool
635	enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
636	int32_t state, uint32_t offset,
637	uint32_t value,
638	UConverterEnumToUCallback callback, const void context,
639	UErrorCode *pErrorCode) {
640	UChar32 codePoints[32];
641	const int32_t *row;
642	const uint16_t *unicodeCodeUnits;
643	UChar32 anyCodePoints;
644	int32_t b, limit;
645
646	row=mbcsTable->stateTable[state];
647	unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
648
649	value<<=8;
650	anyCodePoints=-1; /* becomes non-negative if there is a mapping */
651
652	b=(stateProps[state]&0x38)<<2;
653	if(b==0 && stateProps[state]>=0x40) {
654	/* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
655	codePoints[0]=U_SENTINEL(-1);
656	b=1;
657	}
658	limit=((stateProps[state]&7)+1)<<5;
659	while(b<limit) {
660	int32_t entry=row[b];
661	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
662	int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
663	if(stateProps[nextState]>=0) {
664	/* recurse to a state with non-ignorable actions */
665	if(!enumToU(
666	mbcsTable, stateProps, nextState,
667	offset+MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff),
668	value\|(uint32_t)b,
669	callback, context,
670	pErrorCode)) {
671	return FALSE0;
672	}
673	}
674	codePoints[b&0x1f]=U_SENTINEL(-1);
675	} else {
676	UChar32 c;
677	int32_t action;
678
679	/*
680	* An if-else-if chain provides more reliable performance for
681	* the most common cases compared to a switch.
682	*/
683	action=MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf);
684	if(action==MBCS_STATE_VALID_DIRECT_16) {
685	/* output BMP code point */
686	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
687	} else if(action==MBCS_STATE_VALID_16) {
688	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
689	c=unicodeCodeUnits[finalOffset];
690	if(c<0xfffe) {
691	/* output BMP code point */
692	} else {
693	c=U_SENTINEL(-1);
694	}
695	} else if(action==MBCS_STATE_VALID_16_PAIR) {
696	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
697	c=unicodeCodeUnits[finalOffset++];
698	if(c<0xd800) {
699	/* output BMP code point below 0xd800 */
700	} else if(c<=0xdbff) {
701	/* output roundtrip or fallback supplementary code point */
702	c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
703	} else if(c==0xe000) {
704	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
705	c=unicodeCodeUnits[finalOffset];
706	} else {
707	c=U_SENTINEL(-1);
708	}
709	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
710	/* output supplementary code point */
711	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff)+0x10000);
712	} else {
713	c=U_SENTINEL(-1);
714	}
715
716	codePoints[b&0x1f]=c;
717	anyCodePoints&=c;
718	}
719	if(((++b)&0x1f)==0) {
720	if(anyCodePoints>=0) {
721	if(!callback(context, value\|(uint32_t)(b-0x20), codePoints)) {
722	return FALSE0;
723	}
724	anyCodePoints=-1;
725	}
726	}
727	}
728	return TRUE1;
729	}
730
731	/*
732	* Only called if stateProps[state]==-1.
733	* A recursive call may do stateProps[state]\|=0x40 if this state is the target of an
734	* MBCS_STATE_CHANGE_ONLY.
735	*/
736	static int8_t
737	getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
738	const int32_t *row;
739	int32_t min, max, entry, nextState;
740
741	row=stateTable[state];
742	stateProps[state]=0;
743
744	/* find first non-ignorable state */
745	for(min=0;; ++min) {
746	entry=row[min];
747	nextState=MBCS_ENTRY_STATE(entry)((((uint32_t)entry)>>24)&0x7f);
748	if(stateProps[nextState]==-1) {
749	getStateProp(stateTable, stateProps, nextState);
750	}
751	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
752	if(stateProps[nextState]>=0) {
753	break;
754	}
755	} else if(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)<MBCS_STATE_UNASSIGNED) {
756	break;
757	}
758	if(min==0xff) {
759	stateProps[state]=-0x40; /* (int8_t)0xc0 */
760	return stateProps[state];
761	}
762	}
763	stateProps[state]\|=(int8_t)((min>>5)<<3);
764
765	/* find last non-ignorable state */
766	for(max=0xff; min<max; --max) {
767	entry=row[max];
768	nextState=MBCS_ENTRY_STATE(entry)((((uint32_t)entry)>>24)&0x7f);
769	if(stateProps[nextState]==-1) {
770	getStateProp(stateTable, stateProps, nextState);
771	}
772	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
773	if(stateProps[nextState]>=0) {
774	break;
775	}
776	} else if(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)<MBCS_STATE_UNASSIGNED) {
777	break;
778	}
779	}
780	stateProps[state]\|=(int8_t)(max>>5);
781
782	/* recurse further and collect direct-state information */
783	while(min<=max) {
784	entry=row[min];
785	nextState=MBCS_ENTRY_STATE(entry)((((uint32_t)entry)>>24)&0x7f);
786	if(stateProps[nextState]==-1) {
787	getStateProp(stateTable, stateProps, nextState);
788	}
789	if(MBCS_ENTRY_IS_FINAL(entry)((entry)<0)) {
790	stateProps[nextState]\|=0x40;
791	if(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)<=MBCS_STATE_FALLBACK_DIRECT_20) {
792	stateProps[state]\|=0x40;
793	}
794	}
795	++min;
796	}
797	return stateProps[state];
798	}
799
800	/*
801	* Internal function enumerating the toUnicode data of an MBCS converter.
802	* Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
803	* table, but could also be used for a future ucnv_getUnicodeSet() option
804	* that includes reverse fallbacks (after updating this function's implementation).
805	* Currently only handles roundtrip mappings.
806	* Does not currently handle extensions.
807	*/
808	static void
809	ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
810	UConverterEnumToUCallback callback, const void context,
811	UErrorCode *pErrorCode) {
812	/*
813	* Properties for each state, to speed up the enumeration.
814	* Ignorable actions are unassigned/illegal/state-change-only:
815	* They do not lead to mappings.
816	*
817	* Bits 7..6:
818	* 1 direct/initial state (stateful converters have multiple)
819	* 0 non-initial state with transitions or with non-ignorable result actions
820	* -1 final state with only ignorable actions
821	*
822	* Bits 5..3:
823	* The lowest byte value with non-ignorable actions is
824	* value<<5 (rounded down).
825	*
826	* Bits 2..0:
827	* The highest byte value with non-ignorable actions is
828	* (value<<5)&0x1f (rounded up).
829	*/
830	int8_t stateProps[MBCS_MAX_STATE_COUNT];
831	int32_t state;
832
833	uprv_memset(stateProps, -1, sizeof(stateProps)):: memset(stateProps, -1, sizeof(stateProps));
834
835	/* recurse from state 0 and set all stateProps */
836	getStateProp(mbcsTable->stateTable, stateProps, 0);
837
838	for(state=0; state<mbcsTable->countStates; ++state) {
839	/*if(stateProps[state]==-1) {
840	printf("unused/unreachable <icu:state> %d\n", state);
841	}*/
842	if(stateProps[state]>=0x40) {
843	/* start from each direct state */
844	enumToU(
845	mbcsTable, stateProps, state, 0, 0,
846	callback, context,
847	pErrorCode);
848	}
849	}
850	}
851
852	U_CFUNCextern "C" void
853	ucnv_MBCSGetFilteredUnicodeSetForUnicodeucnv_MBCSGetFilteredUnicodeSetForUnicode_71(const UConverterSharedData *sharedData,
854	const USetAdder *sa,
855	UConverterUnicodeSet which,
856	UConverterSetFilter filter,
857	UErrorCode *pErrorCode) {
858	const UConverterMBCSTable *mbcsTable;
859	const uint16_t *table;
860
861	uint32_t st3;
862	uint16_t st1, maxStage1, st2;
863
864	UChar32 c;
865
866	/* enumerate the from-Unicode trie table */
867	mbcsTable=&sharedData->mbcs;
868	table=mbcsTable->fromUnicodeTable;
869	if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY1) {
870	maxStage1=0x440;
871	} else {
872	maxStage1=0x40;
873	}
874
875	c=0; /* keep track of the current code point while enumerating */
876
877	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
878	const uint16_t stage2, stage3, *results;
879	uint16_t minValue;
880
881	results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
882
883	/*
884	* Set a threshold variable for selecting which mappings to use.
885	* See ucnv_MBCSSingleFromBMPWithOffsets() and
886	* MBCS_SINGLE_RESULT_FROM_U() for details.
887	*/
888	if(which==UCNV_ROUNDTRIP_SET) {
889	/* use only roundtrips */
890	minValue=0xf00;
891	} else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
892	/* use all roundtrip and fallback results */
893	minValue=0x800;
894	}
895
896	for(st1=0; st1<maxStage1; ++st1) {
897	st2=table[st1];
898	if(st2>maxStage1) {
899	stage2=table+st2;
900	for(st2=0; st2<64; ++st2) {
901	if((st3=stage2[st2])!=0) {
902	/* read the stage 3 block */
903	stage3=results+st3;
904
905	do {
906	if(*stage3++>=minValue) {
907	sa->add(sa->set, c);
908	}
909	} while((++c&0xf)!=0);
910	} else {
911	c+=16; /* empty stage 3 block */
912	}
913	}
914	} else {
915	c+=1024; /* empty stage 2 block */
916	}
917	}
918	} else {
919	const uint32_t *stage2;
920	const uint8_t stage3, bytes;
921	uint32_t st3Multiplier;
922	uint32_t value;
923	UBool useFallback;
924
925	bytes=mbcsTable->fromUnicodeBytes;
926
927	useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
928
929	switch(mbcsTable->outputType) {
930	case MBCS_OUTPUT_3:
931	case MBCS_OUTPUT_4_EUC:
932	st3Multiplier=3;
933	break;
934	case MBCS_OUTPUT_4:
935	st3Multiplier=4;
936	break;
937	default:
938	st3Multiplier=2;
939	break;
940	}
941
942	for(st1=0; st1<maxStage1; ++st1) {
943	st2=table[st1];
944	if(st2>(maxStage1>>1)) {
945	stage2=(const uint32_t *)table+st2;
946	for(st2=0; st2<64; ++st2) {
947	if((st3=stage2[st2])!=0) {
948	/* read the stage 3 block */
949	stage3=bytes+st3Multiplier16(uint32_t)(uint16_t)st3;
950
951	/* get the roundtrip flags for the stage 3 block */
952	st3>>=16;
953
954	/*
955	* Add code points for which the roundtrip flag is set,
956	* or which map to non-zero bytes if we use fallbacks.
957	* See ucnv_MBCSFromUnicodeWithOffsets() for details.
958	*/
959	switch(filter) {
960	case UCNV_SET_FILTER_NONE:
961	do {
962	if(st3&1) {
963	sa->add(sa->set, c);
964	stage3+=st3Multiplier;
965	} else if(useFallback) {
966	uint8_t b=0;
967	switch(st3Multiplier) {
968	case 4:
969	b\|=*stage3++;
970	U_FALLTHROUGH[[clang::fallthrough]];
971	case 3:
972	b\|=*stage3++;
973	U_FALLTHROUGH[[clang::fallthrough]];
974	case 2:
975	b\|=stage3[0]\|stage3[1];
976	stage3+=2;
977	U_FALLTHROUGH[[clang::fallthrough]];
978	default:
979	break;
980	}
981	if(b!=0) {
982	sa->add(sa->set, c);
983	}
984	}
985	st3>>=1;
986	} while((++c&0xf)!=0);
987	break;
988	case UCNV_SET_FILTER_DBCS_ONLY:
989	/* Ignore single-byte results (<0x100). */
990	do {
991	if(((st3&1)!=0 \|\| useFallback) && ((const uint16_t )stage3)>=0x100) {
992	sa->add(sa->set, c);
993	}
994	st3>>=1;
995	stage3+=2; /* +=st3Multiplier */
996	} while((++c&0xf)!=0);
997	break;
998	case UCNV_SET_FILTER_2022_CN:
999	/* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
1000	do {
1001	if(((st3&1)!=0 \|\| useFallback) && ((value=*stage3)==0x81 \|\| value==0x82)) {
1002	sa->add(sa->set, c);
1003	}
1004	st3>>=1;
1005	stage3+=3; /* +=st3Multiplier */
1006	} while((++c&0xf)!=0);
1007	break;
1008	case UCNV_SET_FILTER_SJIS:
1009	/* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
1010	do {
1011	if(((st3&1)!=0 \|\| useFallback) && (value=((const uint16_t )stage3))>=0x8140 && value<=0xeffc) {
1012	sa->add(sa->set, c);
1013	}
1014	st3>>=1;
1015	stage3+=2; /* +=st3Multiplier */
1016	} while((++c&0xf)!=0);
1017	break;
1018	case UCNV_SET_FILTER_GR94DBCS:
1019	/* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
1020	do {
1021	if( ((st3&1)!=0 \|\| useFallback) &&
1022	(uint16_t)((value=((const uint16_t )stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
1023	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
1024	) {
1025	sa->add(sa->set, c);
1026	}
1027	st3>>=1;
1028	stage3+=2; /* +=st3Multiplier */
1029	} while((++c&0xf)!=0);
1030	break;
1031	case UCNV_SET_FILTER_HZ:
1032	/* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
1033	do {
1034	if( ((st3&1)!=0 \|\| useFallback) &&
1035	(uint16_t)((value=((const uint16_t )stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
1036	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
1037	) {
1038	sa->add(sa->set, c);
1039	}
1040	st3>>=1;
1041	stage3+=2; /* +=st3Multiplier */
1042	} while((++c&0xf)!=0);
1043	break;
1044	default:
1045	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1046	return;
1047	}
1048	} else {
1049	c+=16; /* empty stage 3 block */
1050	}
1051	}
1052	} else {
1053	c+=1024; /* empty stage 2 block */
1054	}
1055	}
1056	}
1057
1058	ucnv_extGetUnicodeSetucnv_extGetUnicodeSet_71(sharedData, sa, which, filter, pErrorCode);
1059	}
1060
1061	U_CFUNCextern "C" void
1062	ucnv_MBCSGetUnicodeSetForUnicodeucnv_MBCSGetUnicodeSetForUnicode_71(const UConverterSharedData *sharedData,
1063	const USetAdder *sa,
1064	UConverterUnicodeSet which,
1065	UErrorCode *pErrorCode) {
1066	ucnv_MBCSGetFilteredUnicodeSetForUnicodeucnv_MBCSGetFilteredUnicodeSetForUnicode_71(
1067	sharedData, sa, which,
1068	sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
1069	UCNV_SET_FILTER_DBCS_ONLY :
1070	UCNV_SET_FILTER_NONE,
1071	pErrorCode);
1072	}
1073
1074	static void U_CALLCONV
1075	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
1076	const USetAdder *sa,
1077	UConverterUnicodeSet which,
1078	UErrorCode *pErrorCode) {
1079	if(cnv->options&_MBCS_OPTION_GB180300x8000) {
1080	sa->addRange(sa->set, 0, 0xd7ff);
1081	sa->addRange(sa->set, 0xe000, 0x10ffff);
1082	} else {
1083	ucnv_MBCSGetUnicodeSetForUnicodeucnv_MBCSGetUnicodeSetForUnicode_71(cnv->sharedData, sa, which, pErrorCode);
1084	}
1085	}
1086
1087	/* conversion extensions for input not in the main table -------------------- */
1088
1089	/*
1090	* Hardcoded extension handling for GB 18030.
1091	* Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
1092	*
1093	* In the future, conversion extensions may handle m:n mappings and delta tables,
1094	* see https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/conversion/conversion_extensions.html
1095	*
1096	* If an input character cannot be mapped, then these functions set an error
1097	* code. The framework will then call the callback function.
1098	*/
1099
1100	/*
1101	* @return if(U_FAILURE) return the code point for cnv->fromUChar32
1102	* else return 0 after output has been written to the target
1103	*/
1104	static UChar32
1105	_extFromU(UConverter cnv, const UConverterSharedData sharedData,
1106	UChar32 cp,
1107	const UChar *source, const UChar sourceLimit,
1108	uint8_t *target, const uint8_t targetLimit,
1109	int32_t **offsets, int32_t sourceIndex,
1110	UBool flush,
1111	UErrorCode *pErrorCode) {
1112	const int32_t *cx;
1113
1114	cnv->useSubChar1=FALSE0;
1115
1116	if( (cx=sharedData->mbcs.extIndexes)!=NULL__null &&
1117	ucnv_extInitialMatchFromUucnv_extInitialMatchFromU_71(
1118	cnv, cx,
1119	cp, source, sourceLimit,
1120	(char *)target, (char )targetLimit,
1121	offsets, sourceIndex,
1122	flush,
1123	pErrorCode)
1124	) {
1125	return 0; /* an extension mapping handled the input */
1126	}
1127
1128	/* GB 18030 */
1129	if((cnv->options&_MBCS_OPTION_GB180300x8000)!=0) {
1130	const uint32_t *range;
1131	int32_t i;
1132
1133	range=gb18030Ranges[0];
1134	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges)(int32_t)(sizeof(gb18030Ranges)/sizeof((gb18030Ranges)[0])); range+=4, ++i) {
1135	if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
1136	/* found the Unicode code point, output the four-byte sequence for it */
1137	uint32_t linear;
1138	char bytes[4];
1139
1140	/* get the linear value of the first GB 18030 code in this range */
1141	linear=range[2]-LINEAR_18030_BASE((((0x81)10+(0x30))126L+(0x81))*10L+(0x30));
1142
1143	/* add the offset from the beginning of the range */
1144	linear+=((uint32_t)cp-range[0]);
1145
1146	/* turn this into a four-byte sequence */
1147	bytes[3]=(char)(0x30+linear%10); linear/=10;
1148	bytes[2]=(char)(0x81+linear%126); linear/=126;
1149	bytes[1]=(char)(0x30+linear%10); linear/=10;
1150	bytes[0]=(char)(0x81+linear);
1151
1152	/* output this sequence */
1153	ucnv_fromUWriteBytesucnv_fromUWriteBytes_71(cnv,
1154	bytes, 4, (char *)target, (char )targetLimit,
1155	offsets, sourceIndex, pErrorCode);
1156	return 0;
1157	}
1158	}
1159	}
1160
1161	/* no mapping */
1162	*pErrorCode=U_INVALID_CHAR_FOUND;
1163	return cp;
1164	}
1165
1166	/*
1167	* Input sequence: cnv->toUBytes[0..length[
1168	* @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
1169	* else return 0 after output has been written to the target
1170	*/
1171	static int8_t
1172	_extToU(UConverter cnv, const UConverterSharedData sharedData,
1173	int8_t length,
1174	const uint8_t *source, const uint8_t sourceLimit,
1175	UChar *target, const UChar targetLimit,
1176	int32_t **offsets, int32_t sourceIndex,
1177	UBool flush,
1178	UErrorCode *pErrorCode) {
1179	const int32_t *cx;
1180
1181	if( (cx=sharedData->mbcs.extIndexes)!=NULL__null &&
1182	ucnv_extInitialMatchToUucnv_extInitialMatchToU_71(
1183	cnv, cx,
1184	length, (const char *)source, (const char )sourceLimit,
1185	target, targetLimit,
1186	offsets, sourceIndex,
1187	flush,
1188	pErrorCode)
1189	) {
1190	return 0; /* an extension mapping handled the input */
1191	}
1192
1193	/* GB 18030 */
1194	if(length==4 && (cnv->options&_MBCS_OPTION_GB180300x8000)!=0) {
1195	const uint32_t *range;
1196	uint32_t linear;
1197	int32_t i;
1198
1199	linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3])((((cnv->toUBytes[0])10+(cnv->toUBytes[1]))126L+(cnv-> toUBytes[2]))*10L+(cnv->toUBytes[3]));
1200	range=gb18030Ranges[0];
1201	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges)(int32_t)(sizeof(gb18030Ranges)/sizeof((gb18030Ranges)[0])); range+=4, ++i) {
1202	if(range[2]<=linear && linear<=range[3]) {
1203	/* found the sequence, output the Unicode code point for it */
1204	*pErrorCode=U_ZERO_ERROR;
1205
1206	/* add the linear difference between the input and start sequences to the start code point */
1207	linear=range[0]+(linear-range[2]);
1208
1209	/* output this code point */
1210	ucnv_toUWriteCodePointucnv_toUWriteCodePoint_71(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
1211
1212	return 0;
1213	}
1214	}
1215	}
1216
1217	/* no mapping */
1218	*pErrorCode=U_INVALID_CHAR_FOUND;
1219	return length;
1220	}
1221
1222	/* EBCDIC swap LF<->NL ------------------------------------------------------ */
1223
1224	/*
1225	* This code modifies a standard EBCDIC<->Unicode mapping table for
1226	* OS/390 (z/OS) Unix System Services (Open Edition).
1227	* The difference is in the mapping of Line Feed and New Line control codes:
1228	* Standard EBCDIC maps
1229	*
1230	* <U000A> \x25 \|0
1231	* <U0085> \x15 \|0
1232	*
1233	* but OS/390 USS EBCDIC swaps the control codes for LF and NL,
1234	* mapping
1235	*
1236	* <U000A> \x15 \|0
1237	* <U0085> \x25 \|0
1238	*
1239	* This code modifies a loaded standard EBCDIC<->Unicode mapping table
1240	* by copying it into allocated memory and swapping the LF and NL values.
1241	* It allows to support the same EBCDIC charset in both versions without
1242	* duplicating the entire installed table.
1243	*/
1244
1245	/* standard EBCDIC codes */
1246	#define EBCDIC_LF0x25 0x25
1247	#define EBCDIC_NL0x15 0x15
1248
1249	/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
1250	#define EBCDIC_RT_LF0xf25 0xf25
1251	#define EBCDIC_RT_NL0xf15 0xf15
1252
1253	/* Unicode code points */
1254	#define U_LF0x0a 0x0a
1255	#define U_NL0x85 0x85
1256
1257	static UBool
1258	_EBCDICSwapLFNL(UConverterSharedData sharedData, UErrorCode pErrorCode) {
1259	UConverterMBCSTable *mbcsTable;
1260
1261	const uint16_t table, results;
1262	const uint8_t *bytes;
1263
1264	int32_t (*newStateTable)[256];
1265	uint16_t *newResults;
1266	uint8_t *p;
1267	char *name;
1268
1269	uint32_t stage2Entry;
1270	uint32_t size, sizeofFromUBytes;
1271
1272	mbcsTable=&sharedData->mbcs;
1273
1274	table=mbcsTable->fromUnicodeTable;
1275	bytes=mbcsTable->fromUnicodeBytes;
1276	results=(const uint16_t *)bytes;
1277
1278	/*
1279	* Check that this is an EBCDIC table with SBCS portion -
1280	* SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
1281	*
1282	* If not, ignore the option. Options are always ignored if they do not apply.
1283	*/
1284	if(!(
1285	(mbcsTable->outputType==MBCS_OUTPUT_1 \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
1286	mbcsTable->stateTable[0][EBCDIC_LF0x25]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_VALID_DIRECT_16 )<<20L)\|(0x0a)) &&
1287	mbcsTable->stateTable[0][EBCDIC_NL0x15]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_VALID_DIRECT_16 )<<20L)\|(0x85))
1288	)) {
1289	return FALSE0;
1290	}
1291
1292	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
1293	if(!(
1294	EBCDIC_RT_LF0xf25==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF)(results)[ (table)[ (table)[(0x0a)>>10] +(((0x0a)>> 4)&0x3f) ] +((0x0a)&0xf) ] &&
1295	EBCDIC_RT_NL0xf15==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)(results)[ (table)[ (table)[(0x85)>>10] +(((0x85)>> 4)&0x3f) ] +((0x85)&0xf) ]
1296	)) {
1297	return FALSE0;
1298	}
1299	} else /* MBCS_OUTPUT_2_SISO */ {
1300	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF)((const uint32_t *)(table))[ (table)[(0x0a)>>10] +(((0x0a )>>4)&0x3f) ];
1301	if(!(
1302	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)( ((stage2Entry) & ((uint32_t)1<< (16+((0x0a)&0xf )) )) !=0)!=0 &&
1303	EBCDIC_LF0x25==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( 0x0a)&0xf)]
1304	)) {
1305	return FALSE0;
1306	}
1307
1308	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL)((const uint32_t *)(table))[ (table)[(0x85)>>10] +(((0x85 )>>4)&0x3f) ];
1309	if(!(
1310	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)( ((stage2Entry) & ((uint32_t)1<< (16+((0x85)&0xf )) )) !=0)!=0 &&
1311	EBCDIC_NL0x15==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( 0x85)&0xf)]
1312	)) {
1313	return FALSE0;
1314	}
1315	}
1316
1317	if(mbcsTable->fromUBytesLength>0) {
1318	/*
1319	* We _know_ the number of bytes in the fromUnicodeBytes array
1320	* starting with header.version 4.1.
1321	*/
1322	sizeofFromUBytes=mbcsTable->fromUBytesLength;
1323	} else {
1324	/*
1325	* Otherwise:
1326	* There used to be code to enumerate the fromUnicode
1327	* trie and find the highest entry, but it was removed in ICU 3.2
1328	* because it was not tested and caused a low code coverage number.
1329	* See Jitterbug 3674.
1330	* This affects only some .cnv file formats with a header.version
1331	* below 4.1, and only when swaplfnl is requested.
1332	*
1333	* ucnvmbcs.c revision 1.99 is the last one with the
1334	* ucnv_MBCSSizeofFromUBytes() function.
1335	*/
1336	*pErrorCode=U_INVALID_FORMAT_ERROR;
1337	return FALSE0;
1338	}
1339
1340	/*
1341	* The table has an appropriate format.
1342	* Allocate and build
1343	* - a modified to-Unicode state table
1344	* - a modified from-Unicode output array
1345	* - a converter name string with the swap option appended
1346	*/
1347	size=
1348	mbcsTable->countStates*1024+
1349	sizeofFromUBytes+
1350	UCNV_MAX_CONVERTER_NAME_LENGTH60+20;
1351	p=(uint8_t *)uprv_mallocuprv_malloc_71(size);
1352	if(p==NULL__null) {
1353	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1354	return FALSE0;
1355	}
1356
1357	/* copy and modify the to-Unicode state table */
1358	newStateTable=(int32_t (*)[256])p;
1359	uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates1024)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(newStateTable , mbcsTable->stateTable, mbcsTable->countStates1024); } while (false);
1360
1361	newStateTable[0][EBCDIC_LF0x25]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_VALID_DIRECT_16 )<<20L)\|(0x85));
1362	newStateTable[0][EBCDIC_NL0x15]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_VALID_DIRECT_16 )<<20L)\|(0x0a));
1363
1364	/* copy and modify the from-Unicode result table */
1365	newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
1366	uprv_memcpy(newResults, bytes, sizeofFromUBytes)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(newResults , bytes, sizeofFromUBytes); } while (false);
1367
1368	/* conveniently, the table access macros work on the left side of expressions */
1369	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
1370	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)(newResults)[ (table)[ (table)[(0x0a)>>10] +(((0x0a)>> 4)&0x3f) ] +((0x0a)&0xf) ]=EBCDIC_RT_NL0xf15;
1371	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)(newResults)[ (table)[ (table)[(0x85)>>10] +(((0x85)>> 4)&0x3f) ] +((0x85)&0xf) ]=EBCDIC_RT_LF0xf25;
1372	} else /* MBCS_OUTPUT_2_SISO */ {
1373	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF)((const uint32_t *)(table))[ (table)[(0x0a)>>10] +(((0x0a )>>4)&0x3f) ];
1374	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)((uint16_t )(newResults))[16(uint32_t)(uint16_t)(stage2Entry )+((0x0a)&0xf)]=EBCDIC_NL0x15;
1375
1376	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL)((const uint32_t *)(table))[ (table)[(0x85)>>10] +(((0x85 )>>4)&0x3f) ];
1377	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)((uint16_t )(newResults))[16(uint32_t)(uint16_t)(stage2Entry )+((0x85)&0xf)]=EBCDIC_LF0x25;
1378	}
1379
1380	/* set the canonical converter name */
1381	name=(char *)newResults+sizeofFromUBytes;
1382	uprv_strcpy(name, sharedData->staticData->name):: strcpy(name, sharedData->staticData->name);
1383	uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING):: strcat(name, ",swaplfnl");
1384
1385	/* set the pointers */
1386	icu::umtx_lockumtx_lock_71(NULL__null);
1387	if(mbcsTable->swapLFNLStateTable==NULL__null) {
1388	mbcsTable->swapLFNLStateTable=newStateTable;
1389	mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
1390	mbcsTable->swapLFNLName=name;
1391
1392	newStateTable=NULL__null;
1393	}
1394	icu::umtx_unlockumtx_unlock_71(NULL__null);
1395
1396	/* release the allocated memory if another thread beat us to it */
1397	if(newStateTable!=NULL__null) {
1398	uprv_freeuprv_free_71(newStateTable);
1399	}
1400	return TRUE1;
1401	}
1402
1403	/* reconstitute omitted fromUnicode data ------------------------------------ */
1404
1405	/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
1406	static UBool U_CALLCONV
1407	writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
1408	UConverterMBCSTable mbcsTable=(UConverterMBCSTable )context;
1409	const uint16_t *table;
1410	uint32_t *stage2;
1411	uint8_t bytes, p;
1412	UChar32 c;
1413	int32_t i, st3;
1414
1415	table=mbcsTable->fromUnicodeTable;
1416	bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
1417
1418	/* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
1419	switch(mbcsTable->outputType) {
1420	case MBCS_OUTPUT_3_EUC:
1421	if(value<=0xffff) {
1422	/* short sequences are stored directly */
1423	/* code set 0 or 1 */
1424	} else if(value<=0x8effff) {
1425	/* code set 2 */
1426	value&=0x7fff;
1427	} else /* first byte is 0x8f */ {
1428	/* code set 3 */
1429	value&=0xff7f;
1430	}
1431	break;
1432	case MBCS_OUTPUT_4_EUC:
1433	if(value<=0xffffff) {
1434	/* short sequences are stored directly */
1435	/* code set 0 or 1 */
1436	} else if(value<=0x8effffff) {
1437	/* code set 2 */
1438	value&=0x7fffff;
1439	} else /* first byte is 0x8f */ {
1440	/* code set 3 */
1441	value&=0xff7fff;
1442	}
1443	break;
1444	default:
1445	break;
1446	}
1447
1448	for(i=0; i<=0x1f; ++value, ++i) {
1449	c=codePoints[i];
1450	if(c<0) {
1451	continue;
1452	}
1453
1454	/* locate the stage 2 & 3 data */
1455	stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
1456	p=bytes;
1457	st3=(int32_t)(uint16_t)stage216+(c&0xf);
1458
1459	/* write the codepage bytes into stage 3 */
1460	switch(mbcsTable->outputType) {
1461	case MBCS_OUTPUT_3:
1462	case MBCS_OUTPUT_4_EUC:
1463	p+=st3*3;
1464	p[0]=(uint8_t)(value>>16);
1465	p[1]=(uint8_t)(value>>8);
1466	p[2]=(uint8_t)value;
1467	break;
1468	case MBCS_OUTPUT_4:
1469	((uint32_t *)p)[st3]=value;
1470	break;
1471	default:
1472	/* 2 bytes per character */
1473	((uint16_t *)p)[st3]=(uint16_t)value;
1474	break;
1475	}
1476
1477	/* set the roundtrip flag */
1478	*stage2\|=(1UL<<(16+(c&0xf)));
1479	}
1480	return TRUE1;
1481	}
1482
1483	static void
1484	reconstituteData(UConverterMBCSTable *mbcsTable,
1485	uint32_t stage1Length, uint32_t stage2Length,
1486	uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
1487	UErrorCode *pErrorCode) {
1488	uint16_t *stage1;
1489	uint32_t *stage2;
1490	uint32_t dataLength=stage1Length2+fullStage2Length4+mbcsTable->fromUBytesLength;
1491	mbcsTable->reconstitutedData=(uint8_t *)uprv_mallocuprv_malloc_71(dataLength);
1492	if(mbcsTable->reconstitutedData==NULL__null) {
1493	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1494	return;
1495	}
1496	uprv_memset(mbcsTable->reconstitutedData, 0, dataLength):: memset(mbcsTable->reconstitutedData, 0, dataLength);
1497
1498	/* copy existing data and reroute the pointers */
1499	stage1=(uint16_t *)mbcsTable->reconstitutedData;
1500	uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length2)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(stage1, mbcsTable ->fromUnicodeTable, stage1Length2); } while (false);
1501
1502	stage2=(uint32_t *)(stage1+stage1Length);
1503	uprv_memcpy(stage2+(fullStage2Length-stage2Length),do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(stage2+(fullStage2Length -stage2Length), mbcsTable->fromUnicodeTable+stage1Length, stage2Length *4); } while (false)
1504	mbcsTable->fromUnicodeTable+stage1Length,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(stage2+(fullStage2Length -stage2Length), mbcsTable->fromUnicodeTable+stage1Length, stage2Length *4); } while (false)
1505	stage2Length4)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(stage2+(fullStage2Length -stage2Length), mbcsTable->fromUnicodeTable+stage1Length, stage2Length 4); } while (false);
1506
1507	mbcsTable->fromUnicodeTable=stage1;
1508	mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
1509
1510	/* indexes into stage 2 count from the bottom of the fromUnicodeTable */
1511	stage2=(uint32_t *)stage1;
1512
1513	/* reconstitute the initial part of stage 2 from the mbcsIndex */
1514	{
1515	int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
1516	int32_t stageUTF8Index=0;
1517	int32_t st1, st2, st3, i;
1518
1519	for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
1520	st2=stage1[st1];
1521	if(st2!=(int32_t)stage1Length/2) {
1522	/* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
1523	for(i=0; i<16; ++i) {
1524	st3=mbcsTable->mbcsIndex[stageUTF8Index++];
1525	if(st3!=0) {
1526	/* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
1527	st3>>=4;
1528	/*
1529	* 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
1530	* allocated together as a single 64-block for access from the mbcsIndex
1531	*/
1532	stage2[st2++]=st3++;
1533	stage2[st2++]=st3++;
1534	stage2[st2++]=st3++;
1535	stage2[st2++]=st3;
1536	} else {
1537	/* no stage 3 block, skip */
1538	st2+=4;
1539	}
1540	}
1541	} else {
1542	/* no stage 2 block, skip */
1543	stageUTF8Index+=16;
1544	}
1545	}
1546	}
1547
1548	/* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
1549	ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
1550	}
1551
1552	/* MBCS setup functions ----------------------------------------------------- */
1553
1554	static void U_CALLCONV
1555	ucnv_MBCSLoad(UConverterSharedData *sharedData,
1556	UConverterLoadArgs *pArgs,
1557	const uint8_t *raw,
1558	UErrorCode *pErrorCode) {
1559	UDataInfo info;
1560	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
1561	_MBCSHeader header=(_MBCSHeader )raw;
1562	uint32_t offset;
1563	uint32_t headerLength;
1564	UBool noFromU=FALSE0;
1565
1566	if(header->version[0]==4) {
1567	headerLength=MBCS_HEADER_V4_LENGTH;
1568	} else if(header->version[0]==5 && header->version[1]>=3 &&
1569	(header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
1570	headerLength=header->options&MBCS_OPT_LENGTH_MASK;
1571	noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
1572	} else {
1573	*pErrorCode=U_INVALID_TABLE_FORMAT;
1574	return;
1575	}
1576
1577	mbcsTable->outputType=(uint8_t)header->flags;
1578	if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
1579	*pErrorCode=U_INVALID_TABLE_FORMAT;
1580	return;
1581	}
1582
1583	/* extension data, header version 4.2 and higher */
1584	offset=header->flags>>8;
1585	if(offset!=0) {
1586	mbcsTable->extIndexes=(const int32_t *)(raw+offset);
1587	}
1588
1589	if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
1590	UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER{ (int32_t)sizeof(UConverterLoadArgs), 0, false, false, 0, 0, __null, __null, __null };
1591	UConverterSharedData *baseSharedData;
1592	const int32_t *extIndexes;
1593	const char *baseName;
1594
1595	/* extension-only file, load the base table and set values appropriately */
1596	if((extIndexes=mbcsTable->extIndexes)==NULL__null) {
1597	/* extension-only file without extension */
1598	*pErrorCode=U_INVALID_TABLE_FORMAT;
1599	return;
1600	}
1601
1602	if(pArgs->nestedLoads!=1) {
1603	/* an extension table must not be loaded as a base table */
1604	*pErrorCode=U_INVALID_TABLE_FILE;
1605	return;
1606	}
1607
1608	/* load the base table */
1609	baseName=(const char )header+headerLength4;
1610	if(0==uprv_strcmp(baseName, sharedData->staticData->name):: strcmp(baseName, sharedData->staticData->name)) {
1611	/* forbid loading this same extension-only file */
1612	*pErrorCode=U_INVALID_TABLE_FORMAT;
1613	return;
1614	}
1615
1616	/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
1617	args.size=sizeof(UConverterLoadArgs);
1618	args.nestedLoads=2;
1619	args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
1620	args.reserved=pArgs->reserved;
1621	args.options=pArgs->options;
1622	args.pkg=pArgs->pkg;
1623	args.name=baseName;
1624	baseSharedData=ucnv_loaducnv_load_71(&args, pErrorCode);
1625	if(U_FAILURE(*pErrorCode)) {
1626	return;
1627	}
1628	if( baseSharedData->staticData->conversionType!=UCNV_MBCS \|\|
1629	baseSharedData->mbcs.baseSharedData!=NULL__null
1630	) {
1631	ucnv_unloaducnv_unload_71(baseSharedData);
1632	*pErrorCode=U_INVALID_TABLE_FORMAT;
1633	return;
1634	}
1635	if(pArgs->onlyTestIsLoadable) {
1636	/*
1637	* Exit as soon as we know that we can load the converter
1638	* and the format is valid and supported.
1639	* The worst that can happen in the following code is a memory
1640	* allocation error.
1641	*/
1642	ucnv_unloaducnv_unload_71(baseSharedData);
1643	return;
1644	}
1645
1646	/* copy the base table data */
1647	uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable))do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(mbcsTable , &baseSharedData->mbcs, sizeof(UConverterMBCSTable)); } while (false);
1648
1649	/* overwrite values with relevant ones for the extension converter */
1650	mbcsTable->baseSharedData=baseSharedData;
1651	mbcsTable->extIndexes=extIndexes;
1652
1653	/*
1654	* It would be possible to share the swapLFNL data with a base converter,
1655	* but the generated name would have to be different, and the memory
1656	* would have to be free'd only once.
1657	* It is easier to just create the data for the extension converter
1658	* separately when it is requested.
1659	*/
1660	mbcsTable->swapLFNLStateTable=NULL__null;
1661	mbcsTable->swapLFNLFromUnicodeBytes=NULL__null;
1662	mbcsTable->swapLFNLName=NULL__null;
1663
1664	/*
1665	* The reconstitutedData must be deleted only when the base converter
1666	* is unloaded.
1667	*/
1668	mbcsTable->reconstitutedData=NULL__null;
1669
1670	/*
1671	* Set a special, runtime-only outputType if the extension converter
1672	* is a DBCS version of a base converter that also maps single bytes.
1673	*/
1674	if( sharedData->staticData->conversionType==UCNV_DBCS \|\|
1675	(sharedData->staticData->conversionType==UCNV_MBCS &&
1676	sharedData->staticData->minBytesPerChar>=2)
1677	) {
1678	if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
1679	/* the base converter is SI/SO-stateful */
1680	int32_t entry;
1681
1682	/* get the dbcs state from the state table entry for SO=0x0e */
1683	entry=mbcsTable->stateTable[0][0xe];
1684	if( MBCS_ENTRY_IS_FINAL(entry)((entry)<0) &&
1685	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)==MBCS_STATE_CHANGE_ONLY &&
1686	MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f)!=0
1687	) {
1688	mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f);
1689
1690	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
1691	}
1692	} else if(
1693	baseSharedData->staticData->conversionType==UCNV_MBCS &&
1694	baseSharedData->staticData->minBytesPerChar==1 &&
1695	baseSharedData->staticData->maxBytesPerChar==2 &&
1696	mbcsTable->countStates<=127
1697	) {
1698	/* non-stateful base converter, need to modify the state table */
1699	int32_t (*newStateTable)[256];
1700	int32_t *state;
1701	int32_t i, count;
1702
1703	/* allocate a new state table and copy the base state table contents */
1704	count=mbcsTable->countStates;
1705	newStateTable=(int32_t ()[256])uprv_mallocuprv_malloc_71((count+1)1024);
1706	if(newStateTable==NULL__null) {
1707	ucnv_unloaducnv_unload_71(baseSharedData);
1708	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1709	return;
1710	}
1711
1712	uprv_memcpy(newStateTable, mbcsTable->stateTable, count1024)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(newStateTable , mbcsTable->stateTable, count1024); } while (false);
1713
1714	/* change all final single-byte entries to go to a new all-illegal state */
1715	state=newStateTable[0];
1716	for(i=0; i<256; ++i) {
1717	if(MBCS_ENTRY_IS_FINAL(state[i])((state[i])<0)) {
1718	state[i]=MBCS_ENTRY_TRANSITION(count, 0)(int32_t)(((int32_t)(count)<<24L)\|(0));
1719	}
1720	}
1721
1722	/* build the new all-illegal state */
1723	state=newStateTable[count];
1724	for(i=0; i<256; ++i) {
1725	state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_ILLEGAL )<<20L)\|(0));
1726	}
1727	mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
1728	mbcsTable->countStates=(uint8_t)(count+1);
1729	mbcsTable->stateTableOwned=TRUE1;
1730
1731	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
1732	}
1733	}
1734
1735	/*
1736	* unlike below for files with base tables, do not get the unicodeMask
1737	* from the sharedData; instead, use the base table's unicodeMask,
1738	* which we copied in the memcpy above;
1739	* this is necessary because the static data unicodeMask, especially
1740	* the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
1741	*/
1742	} else {
1743	/* conversion file with a base table; an additional extension table is optional */
1744	/* make sure that the output type is known */
1745	switch(mbcsTable->outputType) {
1746	case MBCS_OUTPUT_1:
1747	case MBCS_OUTPUT_2:
1748	case MBCS_OUTPUT_3:
1749	case MBCS_OUTPUT_4:
1750	case MBCS_OUTPUT_3_EUC:
1751	case MBCS_OUTPUT_4_EUC:
1752	case MBCS_OUTPUT_2_SISO:
1753	/* OK */
1754	break;
1755	default:
1756	*pErrorCode=U_INVALID_TABLE_FORMAT;
1757	return;
1758	}
1759	if(pArgs->onlyTestIsLoadable) {
1760	/*
1761	* Exit as soon as we know that we can load the converter
1762	* and the format is valid and supported.
1763	* The worst that can happen in the following code is a memory
1764	* allocation error.
1765	*/
1766	return;
1767	}
1768
1769	mbcsTable->countStates=(uint8_t)header->countStates;
1770	mbcsTable->countToUFallbacks=header->countToUFallbacks;
1771	mbcsTable->stateTable=(const int32_t ()[256])(raw+headerLength4);
1772	mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
1773	mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
1774
1775	mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
1776	mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
1777	mbcsTable->fromUBytesLength=header->fromUBytesLength;
1778
1779	/*
1780	* converter versions 6.1 and up contain a unicodeMask that is
1781	* used here to select the most efficient function implementations
1782	*/
1783	info.size=sizeof(UDataInfo);
1784	udata_getInfoudata_getInfo_71((UDataMemory *)sharedData->dataMemory, &info);
1785	if(info.formatVersion[0]>6 \|\| (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
1786	/* mask off possible future extensions to be safe */
1787	mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
1788	} else {
1789	/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
1790	mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY1\|UCNV_HAS_SURROGATES2;
1791	}
1792
1793	/*
1794	* _MBCSHeader.version 4.3 adds utf8Friendly data structures.
1795	* Check for the header version, SBCS vs. MBCS, and for whether the
1796	* data structures are optimized for code points as high as what the
1797	* runtime code is designed for.
1798	* The implementation does not handle mapping tables with entries for
1799	* unpaired surrogates.
1800	*/
1801	if( header->version[1]>=3 &&
1802	(mbcsTable->unicodeMask&UCNV_HAS_SURROGATES2)==0 &&
1803	(mbcsTable->countStates==1 ?
1804	(header->version[2]>=(SBCS_FAST_MAX>>8)) :
1805	(header->version[2]>=(MBCS_FAST_MAX>>8))
1806	)
1807	) {
1808	mbcsTable->utf8Friendly=TRUE1;
1809
1810	if(mbcsTable->countStates==1) {
1811	/*
1812	* SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
1813	* Build a table with indexes to each block, to be used instead of
1814	* the regular stage 1/2 table.
1815	*/
1816	int32_t i;
1817	for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
1818	mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
1819	}
1820	/* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
1821	mbcsTable->maxFastUChar=SBCS_FAST_MAX;
1822	} else {
1823	/*
1824	* MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
1825	* The .cnv file is prebuilt with an additional stage table with indexes
1826	* to each block.
1827	*/
1828	mbcsTable->mbcsIndex=(const uint16_t *)
1829	(mbcsTable->fromUnicodeBytes+
1830	(noFromU ? 0 : mbcsTable->fromUBytesLength));
1831	mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)\|0xff;
1832	}
1833	}
1834
1835	/* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
1836	{
1837	uint32_t asciiRoundtrips=0xffffffff;
1838	int32_t i;
1839
1840	for(i=0; i<0x80; ++i) {
1841	if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)(int32_t)(0x80000000\|((int32_t)(0)<<24L)\|((MBCS_STATE_VALID_DIRECT_16 )<<20L)\|(i))) {
1842	asciiRoundtrips&=~((uint32_t)1<<(i>>2));
1843	}
1844	}
1845	mbcsTable->asciiRoundtrips=asciiRoundtrips;
1846	}
1847
1848	if(noFromU) {
1849	uint32_t stage1Length=
1850	mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY1 ?
1851	0x440 : 0x40;
1852	uint32_t stage2Length=
1853	(header->offsetFromUBytes-header->offsetFromUTable)/4-
1854	stage1Length/2;
1855	reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
1856	}
1857	}
1858
1859	/* Set the impl pointer here so that it is set for both extension-only and base tables. */
1860	if(mbcsTable->utf8Friendly) {
1861	if(mbcsTable->countStates==1) {
1862	sharedData->impl=&_SBCSUTF8Impl;
1863	} else {
1864	if(mbcsTable->outputType==MBCS_OUTPUT_2) {
1865	sharedData->impl=&_DBCSUTF8Impl;
1866	}
1867	}
1868	}
1869
1870	if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
1871	/*
1872	* MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
1873	* MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
1874	*/
1875	mbcsTable->asciiRoundtrips=0;
1876	}
1877	}
1878
1879	static void U_CALLCONV
1880	ucnv_MBCSUnload(UConverterSharedData *sharedData) {
1881	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
1882
1883	if(mbcsTable->swapLFNLStateTable!=NULL__null) {
1884	uprv_freeuprv_free_71(mbcsTable->swapLFNLStateTable);
1885	}
1886	if(mbcsTable->stateTableOwned) {
1887	uprv_freeuprv_free_71((void *)mbcsTable->stateTable);
1888	}
1889	if(mbcsTable->baseSharedData!=NULL__null) {
1890	ucnv_unloaducnv_unload_71(mbcsTable->baseSharedData);
1891	}
1892	if(mbcsTable->reconstitutedData!=NULL__null) {
1893	uprv_freeuprv_free_71(mbcsTable->reconstitutedData);
1894	}
1895	}
1896
1897	static void U_CALLCONV
1898	ucnv_MBCSOpen(UConverter *cnv,
1899	UConverterLoadArgs *pArgs,
1900	UErrorCode *pErrorCode) {
1901	UConverterMBCSTable *mbcsTable;
1902	const int32_t *extIndexes;
1903	uint8_t outputType;
1904	int8_t maxBytesPerUChar;
1905
1906	if(pArgs->onlyTestIsLoadable) {
1907	return;
1908	}
1909
1910	mbcsTable=&cnv->sharedData->mbcs;
1911	outputType=mbcsTable->outputType;
1912
1913	if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
1914	/* the swaplfnl option does not apply, remove it */
1915	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL0x10;
1916	}
1917
1918	if((pArgs->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
1919	/* do this because double-checked locking is broken */
1920	UBool isCached;
1921
1922	icu::umtx_lockumtx_lock_71(NULL__null);
1923	isCached=mbcsTable->swapLFNLStateTable!=NULL__null;
1924	icu::umtx_unlockumtx_unlock_71(NULL__null);
1925
1926	if(!isCached) {
1927	if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
1928	if(U_FAILURE(*pErrorCode)) {
1929	return; /* something went wrong */
1930	}
1931
1932	/* the option does not apply, remove it */
1933	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL0x10;
1934	}
1935	}
1936	}
1937
1938	if(uprv_strstr(pArgs->name, "18030"):: strstr(pArgs->name, "18030")!=NULL__null) {
1939	if(uprv_strstr(pArgs->name, "gb18030"):: strstr(pArgs->name, "gb18030")!=NULL__null \|\| uprv_strstr(pArgs->name, "GB18030"):: strstr(pArgs->name, "GB18030")!=NULL__null) {
1940	/* set a flag for GB 18030 mode, which changes the callback behavior */
1941	cnv->options\|=_MBCS_OPTION_GB180300x8000;
1942	}
1943	} else if((uprv_strstr(pArgs->name, "KEIS"):: strstr(pArgs->name, "KEIS")!=NULL__null) \|\| (uprv_strstr(pArgs->name, "keis"):: strstr(pArgs->name, "keis")!=NULL__null)) {
1944	/* set a flag for KEIS converter, which changes the SI/SO character sequence */
1945	cnv->options\|=_MBCS_OPTION_KEIS0x01000;
1946	} else if((uprv_strstr(pArgs->name, "JEF"):: strstr(pArgs->name, "JEF")!=NULL__null) \|\| (uprv_strstr(pArgs->name, "jef"):: strstr(pArgs->name, "jef")!=NULL__null)) {
1947	/* set a flag for JEF converter, which changes the SI/SO character sequence */
1948	cnv->options\|=_MBCS_OPTION_JEF0x02000;
1949	} else if((uprv_strstr(pArgs->name, "JIPS"):: strstr(pArgs->name, "JIPS")!=NULL__null) \|\| (uprv_strstr(pArgs->name, "jips"):: strstr(pArgs->name, "jips")!=NULL__null)) {
1950	/* set a flag for JIPS converter, which changes the SI/SO character sequence */
1951	cnv->options\|=_MBCS_OPTION_JIPS0x04000;
1952	}
1953
1954	/* fix maxBytesPerUChar depending on outputType and options etc. */
1955	if(outputType==MBCS_OUTPUT_2_SISO) {
1956	cnv->maxBytesPerUChar=3; /* SO+DBCS */
1957	}
1958
1959	extIndexes=mbcsTable->extIndexes;
1960	if(extIndexes!=NULL__null) {
1961	maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes)((extIndexes)[UCNV_EXT_COUNT_BYTES]&0xff);
1962	if(outputType==MBCS_OUTPUT_2_SISO) {
1963	++maxBytesPerUChar; /* SO + multiple DBCS */
1964	}
1965
1966	if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
1967	cnv->maxBytesPerUChar=maxBytesPerUChar;
1968	}
1969	}
1970
1971	#if 0
1972	/*
1973	* documentation of UConverter fields used for status
1974	* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
1975	*/
1976
1977	/* toUnicode */
1978	cnv->toUnicodeStatus=0; /* offset */
1979	cnv->mode=0; /* state */
1980	cnv->toULength=0; /* byteIndex */
1981
1982	/* fromUnicode */
1983	cnv->fromUChar32=0;
1984	cnv->fromUnicodeStatus=1; /* prevLength */
1985	#endif
1986	}
1987
1988	U_CDECL_BEGINextern "C" {
1989
1990	static const char* U_CALLCONV
1991	ucnv_MBCSGetName(const UConverter *cnv) {
1992	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL__null) {
1993	return cnv->sharedData->mbcs.swapLFNLName;
1994	} else {
1995	return cnv->sharedData->staticData->name;
1996	}
1997	}
1998	U_CDECL_END}
1999
2000
2001	/* MBCS-to-Unicode conversion functions ------------------------------------- */
2002
2003	static UChar32 U_CALLCONV
2004	ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
2005	const _MBCSToUFallback *toUFallbacks;
2006	uint32_t i, start, limit;
2007
2008	limit=mbcsTable->countToUFallbacks;
2009	if(limit>0) {
2010	/* do a binary search for the fallback mapping */
2011	toUFallbacks=mbcsTable->toUFallbacks;
2012	start=0;
2013	while(start<limit-1) {
2014	i=(start+limit)/2;
2015	if(offset<toUFallbacks[i].offset) {
2016	limit=i;
2017	} else {
2018	start=i;
2019	}
2020	}
2021
2022	/* did we really find it? */
2023	if(offset==toUFallbacks[start].offset) {
2024	return toUFallbacks[start].codePoint;
2025	}
2026	}
2027
2028	return 0xfffe;
2029	}
2030
2031	/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
2032	static void
2033	ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
2034	UErrorCode *pErrorCode) {
2035	UConverter *cnv;
2036	const uint8_t source, sourceLimit;
2037	UChar *target;
2038	const UChar *targetLimit;
2039	int32_t *offsets;
2040
2041	const int32_t (*stateTable)[256];
2042
2043	int32_t sourceIndex;
2044
2045	int32_t entry;
2046	UChar c;
2047	uint8_t action;
2048
2049	/* set up the local pointers */
2050	cnv=pArgs->converter;
2051	source=(const uint8_t *)pArgs->source;
2052	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
2053	target=pArgs->target;
2054	targetLimit=pArgs->targetLimit;
2055	offsets=pArgs->offsets;
2056
2057	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
2058	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
2059	} else {
2060	stateTable=cnv->sharedData->mbcs.stateTable;
2061	}
2062
2063	/* sourceIndex=-1 if the current character began in the previous buffer */
2064	sourceIndex=0;
2065
2066	/* conversion loop */
2067	while(source<sourceLimit) {
2068	/*
2069	* This following test is to see if available input would overflow the output.
2070	* It does not catch output of more than one code unit that
2071	* overflows as a result of a surrogate pair or callback output
2072	* from the last source byte.
2073	* Therefore, those situations also test for overflows and will
2074	* then break the loop, too.
2075	*/
2076	if(target>=targetLimit) {
2077	/* target is full */
2078	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2079	break;
2080	}
2081
2082	entry=stateTable[0][*source++];
2083	/* MBCS_ENTRY_IS_FINAL(entry) */
2084
2085	/* test the most common case first */
2086	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
2087	/* output BMP code point */
2088	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2089	if(offsets!=NULL__null) {
2090	*offsets++=sourceIndex;
2091	}
2092
2093	/* normal end of action codes: prepare for a new character */
2094	++sourceIndex;
2095	continue;
2096	}
2097
2098	/*
2099	* An if-else-if chain provides more reliable performance for
2100	* the most common cases compared to a switch.
2101	*/
2102	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
2103	if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
2104	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)true)
2105	) {
2106	entry=MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
2107	/* output surrogate pair */
2108	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
2109	if(offsets!=NULL__null) {
2110	*offsets++=sourceIndex;
2111	}
2112	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
2113	if(target<targetLimit) {
2114	*target++=c;
2115	if(offsets!=NULL__null) {
2116	*offsets++=sourceIndex;
2117	}
2118	} else {
2119	/* target overflow */
2120	cnv->UCharErrorBuffer[0]=c;
2121	cnv->UCharErrorBufferLength=1;
2122	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2123	break;
2124	}
2125
2126	++sourceIndex;
2127	continue;
2128	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
2129	if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
2130	/* output BMP code point */
2131	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2132	if(offsets!=NULL__null) {
2133	*offsets++=sourceIndex;
2134	}
2135
2136	++sourceIndex;
2137	continue;
2138	}
2139	} else if(action==MBCS_STATE_UNASSIGNED) {
2140	/* just fall through */
2141	} else if(action==MBCS_STATE_ILLEGAL) {
2142	/* callback(illegal) */
2143	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2144	} else {
2145	/* reserved, must never occur */
2146	++sourceIndex;
2147	continue;
2148	}
2149
2150	if(U_FAILURE(*pErrorCode)) {
2151	/* callback(illegal) */
2152	break;
2153	} else /* unassigned sequences indicated with byteIndex>0 */ {
2154	/* try an extension mapping */
2155	pArgs->source=(const char *)source;
2156	cnv->toUBytes[0]=*(source-1);
2157	cnv->toULength=_extToU(cnv, cnv->sharedData,
2158	1, &source, sourceLimit,
2159	&target, targetLimit,
2160	&offsets, sourceIndex,
2161	pArgs->flush,
2162	pErrorCode);
2163	sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
2164
2165	if(U_FAILURE(*pErrorCode)) {
2166	/* not mappable or buffer overflow */
2167	break;
2168	}
2169	}
2170	}
2171
2172	/* write back the updated pointers */
2173	pArgs->source=(const char *)source;
2174	pArgs->target=target;
2175	pArgs->offsets=offsets;
2176	}
2177
2178	/*
2179	* This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
2180	* that only map to and from the BMP.
2181	* In addition to single-byte optimizations, the offset calculations
2182	* become much easier.
2183	*/
2184	static void
2185	ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
2186	UErrorCode *pErrorCode) {
2187	UConverter *cnv;
2188	const uint8_t source, sourceLimit, *lastSource;
2189	UChar *target;
2190	int32_t targetCapacity, length;
2191	int32_t *offsets;
2192
2193	const int32_t (*stateTable)[256];
2194
2195	int32_t sourceIndex;
2196
2197	int32_t entry;
2198	uint8_t action;
2199
2200	/* set up the local pointers */
2201	cnv=pArgs->converter;
2202	source=(const uint8_t *)pArgs->source;
2203	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
2204	target=pArgs->target;
2205	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
2206	offsets=pArgs->offsets;
2207
2208	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
2209	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
2210	} else {
2211	stateTable=cnv->sharedData->mbcs.stateTable;
2212	}
2213
2214	/* sourceIndex=-1 if the current character began in the previous buffer */
2215	sourceIndex=0;
2216	lastSource=source;
2217
2218	/*
2219	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
2220	* for the minimum of the sourceLength and targetCapacity
2221	*/
2222	length=(int32_t)(sourceLimit-source);
2223	if(length<targetCapacity) {
2224	targetCapacity=length;
2225	}
2226
2227	#if MBCS_UNROLL_SINGLE_TO_BMP1
2228	/* unrolling makes it faster on Pentium III/Windows 2000 */
2229	/* unroll the loop with the most common case */
2230	unrolled:
2231	if(targetCapacity>=16) {
2232	int32_t count, loops, oredEntries;
2233
2234	loops=count=targetCapacity>>4;
2235	do {
2236	oredEntries=entry=stateTable[0][*source++];
2237	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2238	oredEntries\|=entry=stateTable[0][*source++];
2239	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2240	oredEntries\|=entry=stateTable[0][*source++];
2241	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2242	oredEntries\|=entry=stateTable[0][*source++];
2243	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2244	oredEntries\|=entry=stateTable[0][*source++];
2245	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2246	oredEntries\|=entry=stateTable[0][*source++];
2247	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2248	oredEntries\|=entry=stateTable[0][*source++];
2249	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2250	oredEntries\|=entry=stateTable[0][*source++];
2251	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2252	oredEntries\|=entry=stateTable[0][*source++];
2253	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2254	oredEntries\|=entry=stateTable[0][*source++];
2255	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2256	oredEntries\|=entry=stateTable[0][*source++];
2257	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2258	oredEntries\|=entry=stateTable[0][*source++];
2259	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2260	oredEntries\|=entry=stateTable[0][*source++];
2261	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2262	oredEntries\|=entry=stateTable[0][*source++];
2263	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2264	oredEntries\|=entry=stateTable[0][*source++];
2265	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2266	oredEntries\|=entry=stateTable[0][*source++];
2267	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2268
2269	/* were all 16 entries really valid? */
2270	if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)((oredEntries)<(int32_t)0x80100000)) {
2271	/* no, return to the first of these 16 */
2272	source-=16;
2273	target-=16;
2274	break;
2275	}
2276	} while(--count>0);
2277	count=loops-count;
2278	targetCapacity-=16*count;
2279
2280	if(offsets!=NULL__null) {
2281	lastSource+=16*count;
2282	while(count>0) {
2283	*offsets++=sourceIndex++;
2284	*offsets++=sourceIndex++;
2285	*offsets++=sourceIndex++;
2286	*offsets++=sourceIndex++;
2287	*offsets++=sourceIndex++;
2288	*offsets++=sourceIndex++;
2289	*offsets++=sourceIndex++;
2290	*offsets++=sourceIndex++;
2291	*offsets++=sourceIndex++;
2292	*offsets++=sourceIndex++;
2293	*offsets++=sourceIndex++;
2294	*offsets++=sourceIndex++;
2295	*offsets++=sourceIndex++;
2296	*offsets++=sourceIndex++;
2297	*offsets++=sourceIndex++;
2298	*offsets++=sourceIndex++;
2299	--count;
2300	}
2301	}
2302	}
2303	#endif
2304
2305	/* conversion loop */
2306	while(targetCapacity > 0 && source < sourceLimit) {
2307	entry=stateTable[0][*source++];
2308	/* MBCS_ENTRY_IS_FINAL(entry) */
2309
2310	/* test the most common case first */
2311	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
2312	/* output BMP code point */
2313	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2314	--targetCapacity;
2315	continue;
2316	}
2317
2318	/*
2319	* An if-else-if chain provides more reliable performance for
2320	* the most common cases compared to a switch.
2321	*/
2322	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
2323	if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
2324	if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
2325	/* output BMP code point */
2326	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2327	--targetCapacity;
2328	continue;
2329	}
2330	} else if(action==MBCS_STATE_UNASSIGNED) {
2331	/* just fall through */
2332	} else if(action==MBCS_STATE_ILLEGAL) {
2333	/* callback(illegal) */
2334	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2335	} else {
2336	/* reserved, must never occur */
2337	continue;
2338	}
2339
2340	/* set offsets since the start or the last extension */
2341	if(offsets!=NULL__null) {
2342	int32_t count=(int32_t)(source-lastSource);
2343
2344	/* predecrement: do not set the offset for the callback-causing character */
2345	while(--count>0) {
2346	*offsets++=sourceIndex++;
2347	}
2348	/* offset and sourceIndex are now set for the current character */
2349	}
2350
2351	if(U_FAILURE(*pErrorCode)) {
2352	/* callback(illegal) */
2353	break;
2354	} else /* unassigned sequences indicated with byteIndex>0 */ {
2355	/* try an extension mapping */
2356	lastSource=source;
2357	cnv->toUBytes[0]=*(source-1);
2358	cnv->toULength=_extToU(cnv, cnv->sharedData,
2359	1, &source, sourceLimit,
2360	&target, pArgs->targetLimit,
2361	&offsets, sourceIndex,
2362	pArgs->flush,
2363	pErrorCode);
2364	sourceIndex+=1+(int32_t)(source-lastSource);
2365
2366	if(U_FAILURE(*pErrorCode)) {
2367	/* not mappable or buffer overflow */
2368	break;
2369	}
2370
2371	/* recalculate the targetCapacity after an extension mapping */
2372	targetCapacity=(int32_t)(pArgs->targetLimit-target);
2373	length=(int32_t)(sourceLimit-source);
2374	if(length<targetCapacity) {
2375	targetCapacity=length;
2376	}
2377	}
2378
2379	#if MBCS_UNROLL_SINGLE_TO_BMP1
2380	/* unrolling makes it faster on Pentium III/Windows 2000 */
2381	goto unrolled;
2382	#endif
2383	}
2384
2385	if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
2386	/* target is full */
2387	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2388	}
2389
2390	/* set offsets since the start or the last callback */
2391	if(offsets!=NULL__null) {
2392	size_t count=source-lastSource;
2393	while(count>0) {
2394	*offsets++=sourceIndex++;
2395	--count;
2396	}
2397	}
2398
2399	/* write back the updated pointers */
2400	pArgs->source=(const char *)source;
2401	pArgs->target=target;
2402	pArgs->offsets=offsets;
2403	}
2404
2405	static UBool
2406	hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
2407	const int32_t *row=stateTable[state];
2408	int32_t b, entry;
2409	/* First test for final entries in this state for some commonly valid byte values. */
2410	entry=row[0xa1];
2411	if( !MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0) &&
2412	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)!=MBCS_STATE_ILLEGAL
2413	) {
2414	return TRUE1;
2415	}
2416	entry=row[0x41];
2417	if( !MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0) &&
2418	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)!=MBCS_STATE_ILLEGAL
2419	) {
2420	return TRUE1;
2421	}
2422	/* Then test for final entries in this state. */
2423	for(b=0; b<=0xff; ++b) {
2424	entry=row[b];
2425	if( !MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0) &&
2426	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)!=MBCS_STATE_ILLEGAL
2427	) {
2428	return TRUE1;
2429	}
2430	}
2431	/* Then recurse for transition entries. */
2432	for(b=0; b<=0xff; ++b) {
2433	entry=row[b];
2434	if( MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0) &&
2435	hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24))
2436	) {
2437	return TRUE1;
2438	}
2439	}
2440	return FALSE0;
2441	}
2442
2443	/*
2444	* Is byte b a single/lead byte in this state?
2445	* Recurse for transition states, because here we don't want to say that
2446	* b is a lead byte if all byte sequences that start with b are illegal.
2447	*/
2448	static UBool
2449	isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
2450	const int32_t *row=stateTable[state];
2451	int32_t entry=row[b];
2452	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) { /* lead byte */
2453	return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24));
2454	} else {
2455	uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
2456	if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
2457	return FALSE0; /* SI/SO are illegal for DBCS-only conversion */
2458	} else {
2459	return action!=MBCS_STATE_ILLEGAL;
2460	}
2461	}
2462	}
2463
2464	U_CFUNCextern "C" void
2465	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71(UConverterToUnicodeArgs *pArgs,
2466	UErrorCode *pErrorCode) {
2467	UConverter *cnv;
2468	const uint8_t source, sourceLimit;
2469	UChar *target;
2470	const UChar *targetLimit;
2471	int32_t *offsets;
2472
2473	const int32_t (*stateTable)[256];
2474	const uint16_t *unicodeCodeUnits;
2475
2476	uint32_t offset;
2477	uint8_t state;
2478	int8_t byteIndex;
2479	uint8_t *bytes;
2480
2481	int32_t sourceIndex, nextSourceIndex;
2482
2483	int32_t entry;
2484	UChar c;
2485	uint8_t action;
2486
2487	/* use optimized function if possible */
2488	cnv=pArgs->converter;
2489
2490	if(cnv->preToULength>0) {
2491	/*
2492	* pass sourceIndex=-1 because we continue from an earlier buffer
2493	* in the future, this may change with continuous offsets
2494	*/
2495	ucnv_extContinueMatchToUucnv_extContinueMatchToU_71(cnv, pArgs, -1, pErrorCode);
2496
2497	if(U_FAILURE(*pErrorCode) \|\| cnv->preToULength<0) {
2498	return;
2499	}
2500	}
2501
2502	if(cnv->sharedData->mbcs.countStates==1) {
2503	if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
2504	ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
2505	} else {
2506	ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
2507	}
2508	return;
2509	}
2510
2511	/* set up the local pointers */
2512	source=(const uint8_t *)pArgs->source;
2513	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
2514	target=pArgs->target;
2515	targetLimit=pArgs->targetLimit;
2516	offsets=pArgs->offsets;
2517
2518	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
2519	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
2520	} else {
2521	stateTable=cnv->sharedData->mbcs.stateTable;
2522	}
2523	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
2524
2525	/* get the converter state from UConverter */
2526	offset=cnv->toUnicodeStatus;
2527	byteIndex=cnv->toULength;
2528	bytes=cnv->toUBytes;
2529
2530	/*
2531	* if we are in the SBCS state for a DBCS-only converter,
2532	* then load the DBCS state from the MBCS data
2533	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
2534	*/
2535	if((state=(uint8_t)(cnv->mode))==0) {
2536	state=cnv->sharedData->mbcs.dbcsOnlyState;
2537	}
2538
2539	/* sourceIndex=-1 if the current character began in the previous buffer */
2540	sourceIndex=byteIndex==0 ? 0 : -1;
2541	nextSourceIndex=0;
2542
2543	/* conversion loop */
2544	while(source<sourceLimit) {
2545	/*
2546	* This following test is to see if available input would overflow the output.
2547	* It does not catch output of more than one code unit that
2548	* overflows as a result of a surrogate pair or callback output
2549	* from the last source byte.
2550	* Therefore, those situations also test for overflows and will
2551	* then break the loop, too.
2552	*/
2553	if(target>=targetLimit) {
2554	/* target is full */
2555	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2556	break;
2557	}
2558
2559	if(byteIndex==0) {
2560	/* optimized loop for 1/2-byte input and BMP output */
2561	if(offsets==NULL__null) {
2562	do {
2563	entry=stateTable[state][*source];
2564	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
2565	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
2566	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff);
2567
2568	++source;
2569	if( source<sourceLimit &&
2570	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][source])((entry=stateTable[state][source])<0) &&
2571	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)==MBCS_STATE_VALID_16 &&
2572	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry)])<0xfffe
2573	) {
2574	++source;
2575	*target++=c;
2576	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
2577	offset=0;
2578	} else {
2579	/* set the state and leave the optimized loop */
2580	bytes[0]=*(source-1);
2581	byteIndex=1;
2582	break;
2583	}
2584	} else {
2585	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
2586	/* output BMP code point */
2587	++source;
2588	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2589	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
2590	} else {
2591	/* leave the optimized loop */
2592	break;
2593	}
2594	}
2595	} while(source<sourceLimit && target<targetLimit);
2596	} else /* offsets!=NULL */ {
2597	do {
2598	entry=stateTable[state][*source];
2599	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
2600	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
2601	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff);
2602
2603	++source;
2604	if( source<sourceLimit &&
2605	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][source])((entry=stateTable[state][source])<0) &&
2606	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)==MBCS_STATE_VALID_16 &&
2607	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry)])<0xfffe
2608	) {
2609	++source;
2610	*target++=c;
2611	if(offsets!=NULL__null) {
2612	*offsets++=sourceIndex;
2613	sourceIndex=(nextSourceIndex+=2);
2614	}
2615	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
2616	offset=0;
2617	} else {
2618	/* set the state and leave the optimized loop */
2619	++nextSourceIndex;
2620	bytes[0]=*(source-1);
2621	byteIndex=1;
2622	break;
2623	}
2624	} else {
2625	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
2626	/* output BMP code point */
2627	++source;
2628	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2629	if(offsets!=NULL__null) {
2630	*offsets++=sourceIndex;
2631	sourceIndex=++nextSourceIndex;
2632	}
2633	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
2634	} else {
2635	/* leave the optimized loop */
2636	break;
2637	}
2638	}
2639	} while(source<sourceLimit && target<targetLimit);
2640	}
2641
2642	/*
2643	* these tests and break statements could be put inside the loop
2644	* if C had "break outerLoop" like Java
2645	*/
2646	if(source>=sourceLimit) {
2647	break;
2648	}
2649	if(target>=targetLimit) {
2650	/* target is full */
2651	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2652	break;
2653	}
2654
2655	++nextSourceIndex;
2656	bytes[byteIndex++]=*source++;
2657	} else /* byteIndex>0 */ {
2658	++nextSourceIndex;
2659	entry=stateTable[state][bytes[byteIndex++]=*source++];
2660	}
2661
2662	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
2663	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
2664	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff);
2665	continue;
2666	}
2667
2668	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
2669	cnv->mode=state;
2670
2671	/* set the next state early so that we can reuse the entry variable */
2672	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
2673
2674	/*
2675	* An if-else-if chain provides more reliable performance for
2676	* the most common cases compared to a switch.
2677	*/
2678	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
2679	if(action==MBCS_STATE_VALID_16) {
2680	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2681	c=unicodeCodeUnits[offset];
2682	if(c<0xfffe) {
2683	/* output BMP code point */
2684	*target++=c;
2685	if(offsets!=NULL__null) {
2686	*offsets++=sourceIndex;
2687	}
2688	byteIndex=0;
2689	} else if(c==0xfffe) {
2690	if(UCNV_TO_U_USE_FALLBACK(cnv)true && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
2691	/* output fallback BMP code point */
2692	*target++=(UChar)entry;
2693	if(offsets!=NULL__null) {
2694	*offsets++=sourceIndex;
2695	}
2696	byteIndex=0;
2697	}
2698	} else {
2699	/* callback(illegal) */
2700	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2701	}
2702	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
2703	/* output BMP code point */
2704	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2705	if(offsets!=NULL__null) {
2706	*offsets++=sourceIndex;
2707	}
2708	byteIndex=0;
2709	} else if(action==MBCS_STATE_VALID_16_PAIR) {
2710	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2711	c=unicodeCodeUnits[offset++];
2712	if(c<0xd800) {
2713	/* output BMP code point below 0xd800 */
2714	*target++=c;
2715	if(offsets!=NULL__null) {
2716	*offsets++=sourceIndex;
2717	}
2718	byteIndex=0;
2719	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? c<=0xdfff : c<=0xdbff) {
2720	/* output roundtrip or fallback surrogate pair */
2721	*target++=(UChar)(c&0xdbff);
2722	if(offsets!=NULL__null) {
2723	*offsets++=sourceIndex;
2724	}
2725	byteIndex=0;
2726	if(target<targetLimit) {
2727	*target++=unicodeCodeUnits[offset];
2728	if(offsets!=NULL__null) {
2729	*offsets++=sourceIndex;
2730	}
2731	} else {
2732	/* target overflow */
2733	cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
2734	cnv->UCharErrorBufferLength=1;
2735	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2736
2737	offset=0;
2738	break;
2739	}
2740	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? (c&0xfffe)==0xe000 : c==0xe000) {
2741	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
2742	*target++=unicodeCodeUnits[offset];
2743	if(offsets!=NULL__null) {
2744	*offsets++=sourceIndex;
2745	}
2746	byteIndex=0;
2747	} else if(c==0xffff) {
2748	/* callback(illegal) */
2749	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2750	}
2751	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
2752	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)true)
2753	) {
2754	entry=MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
2755	/* output surrogate pair */
2756	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
2757	if(offsets!=NULL__null) {
2758	*offsets++=sourceIndex;
2759	}
2760	byteIndex=0;
2761	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
2762	if(target<targetLimit) {
2763	*target++=c;
2764	if(offsets!=NULL__null) {
2765	*offsets++=sourceIndex;
2766	}
2767	} else {
2768	/* target overflow */
2769	cnv->UCharErrorBuffer[0]=c;
2770	cnv->UCharErrorBufferLength=1;
2771	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
2772
2773	offset=0;
2774	break;
2775	}
2776	} else if(action==MBCS_STATE_CHANGE_ONLY) {
2777	/*
2778	* This serves as a state change without any output.
2779	* It is useful for reading simple stateful encodings,
2780	* for example using just Shift-In/Shift-Out codes.
2781	* The 21 unused bits may later be used for more sophisticated
2782	* state transitions.
2783	*/
2784	if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
2785	byteIndex=0;
2786	} else {
2787	/* SI/SO are illegal for DBCS-only conversion */
2788	state=(uint8_t)(cnv->mode); /* restore the previous state */
2789
2790	/* callback(illegal) */
2791	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2792	}
2793	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
2794	if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
2795	/* output BMP code point */
2796	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2797	if(offsets!=NULL__null) {
2798	*offsets++=sourceIndex;
2799	}
2800	byteIndex=0;
2801	}
2802	} else if(action==MBCS_STATE_UNASSIGNED) {
2803	/* just fall through */
2804	} else if(action==MBCS_STATE_ILLEGAL) {
2805	/* callback(illegal) */
2806	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2807	} else {
2808	/* reserved, must never occur */
2809	byteIndex=0;
2810	}
2811
2812	/* end of action codes: prepare for a new character */
2813	offset=0;
2814
2815	if(byteIndex==0) {
2816	sourceIndex=nextSourceIndex;
2817	} else if(U_FAILURE(*pErrorCode)) {
2818	/* callback(illegal) */
2819	if(byteIndex>1) {
2820	/*
2821	* Ticket 5691: consistent illegal sequences:
2822	* - We include at least the first byte in the illegal sequence.
2823	* - If any of the non-initial bytes could be the start of a character,
2824	* we stop the illegal sequence before the first one of those.
2825	*/
2826	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
2827	int8_t i;
2828	for(i=1;
2829	i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
2830	++i) {}
2831	if(i<byteIndex) {
2832	/* Back out some bytes. */
2833	int8_t backOutDistance=byteIndex-i;
2834	int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
2835	byteIndex=i; /* length of reported illegal byte sequence */
2836	if(backOutDistance<=bytesFromThisBuffer) {
2837	source-=backOutDistance;
2838	} else {
2839	/* Back out bytes from the previous buffer: Need to replay them. */
2840	cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
2841	/* preToULength is negative! */
2842	uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->preToU , bytes+i, -cnv->preToULength); } while (false);
2843	source=(const uint8_t *)pArgs->source;
2844	}
2845	}
2846	}
2847	break;
2848	} else /* unassigned sequences indicated with byteIndex>0 */ {
2849	/* try an extension mapping */
2850	pArgs->source=(const char *)source;
2851	byteIndex=_extToU(cnv, cnv->sharedData,
2852	byteIndex, &source, sourceLimit,
2853	&target, targetLimit,
2854	&offsets, sourceIndex,
2855	pArgs->flush,
2856	pErrorCode);
2857	sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
2858
2859	if(U_FAILURE(*pErrorCode)) {
2860	/* not mappable or buffer overflow */
2861	break;
2862	}
2863	}
2864	}
2865
2866	/* set the converter state back into UConverter */
2867	cnv->toUnicodeStatus=offset;
2868	cnv->mode=state;
2869	cnv->toULength=byteIndex;
2870
2871	/* write back the updated pointers */
2872	pArgs->source=(const char *)source;
2873	pArgs->target=target;
2874	pArgs->offsets=offsets;
2875	}
2876
2877	/*
2878	* This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
2879	* We still need a conversion loop in case we find reserved action codes, which are to be ignored.
2880	*/
2881	static UChar32
2882	ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
2883	UErrorCode *pErrorCode) {
2884	UConverter *cnv;
2885	const int32_t (*stateTable)[256];
2886	const uint8_t source, sourceLimit;
2887
2888	int32_t entry;
2889	uint8_t action;
2890
2891	/* set up the local pointers */
2892	cnv=pArgs->converter;
2893	source=(const uint8_t *)pArgs->source;
2894	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
2895	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
2896	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
2897	} else {
2898	stateTable=cnv->sharedData->mbcs.stateTable;
2899	}
2900
2901	/* conversion loop */
2902	while(source<sourceLimit) {
2903	entry=stateTable[0][*source++];
2904	/* MBCS_ENTRY_IS_FINAL(entry) */
2905
2906	/* write back the updated pointer early so that we can return directly */
2907	pArgs->source=(const char *)source;
2908
2909	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
2910	/* output BMP code point */
2911	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2912	}
2913
2914	/*
2915	* An if-else-if chain provides more reliable performance for
2916	* the most common cases compared to a switch.
2917	*/
2918	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
2919	if( action==MBCS_STATE_VALID_DIRECT_20 \|\|
2920	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)true)
2921	) {
2922	/* output supplementary code point */
2923	return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff)+0x10000);
2924	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
2925	if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
2926	/* output BMP code point */
2927	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
2928	}
2929	} else if(action==MBCS_STATE_UNASSIGNED) {
2930	/* just fall through */
2931	} else if(action==MBCS_STATE_ILLEGAL) {
2932	/* callback(illegal) */
2933	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
2934	} else {
2935	/* reserved, must never occur */
2936	continue;
2937	}
2938
2939	if(U_FAILURE(*pErrorCode)) {
2940	/* callback(illegal) */
2941	break;
2942	} else /* unassigned sequence */ {
2943	/* defer to the generic implementation */
2944	pArgs->source=(const char *)source-1;
2945	return UCNV_GET_NEXT_UCHAR_USE_TO_U-9;
2946	}
2947	}
2948
2949	/* no output because of empty input or only state changes */
2950	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2951	return 0xffff;
2952	}
2953
2954	/*
2955	* Version of _MBCSToUnicodeWithOffsets() optimized for single-character
2956	* conversion without offset handling.
2957	*
2958	* When a character does not have a mapping to Unicode, then we return to the
2959	* generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
2960	* handling.
2961	* We also defer to the generic code in other complicated cases and have them
2962	* ultimately handled by _MBCSToUnicodeWithOffsets() itself.
2963	*
2964	* All normal mappings and errors are handled here.
2965	*/
2966	static UChar32 U_CALLCONV
2967	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
2968	UErrorCode *pErrorCode) {
2969	UConverter *cnv;
2970	const uint8_t source, sourceLimit, *lastSource;
2971
2972	const int32_t (*stateTable)[256];
2973	const uint16_t *unicodeCodeUnits;
2974
2975	uint32_t offset;
2976	uint8_t state;
2977
2978	int32_t entry;
2979	UChar32 c;
2980	uint8_t action;
2981
2982	/* use optimized function if possible */
2983	cnv=pArgs->converter;
2984
2985	if(cnv->preToULength>0) {
2986	/* use the generic code in ucnv_getNextUChar() to continue with a partial match */
2987	return UCNV_GET_NEXT_UCHAR_USE_TO_U-9;
2988	}
2989
2990	if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES2) {
2991	/*
2992	* Using the generic ucnv_getNextUChar() code lets us deal correctly
2993	* with the rare case of a codepage that maps single surrogates
2994	* without adding the complexity to this already complicated function here.
2995	*/
2996	return UCNV_GET_NEXT_UCHAR_USE_TO_U-9;
2997	} else if(cnv->sharedData->mbcs.countStates==1) {
2998	return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
2999	}
3000
3001	/* set up the local pointers */
3002	source=lastSource=(const uint8_t *)pArgs->source;
3003	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
3004
3005	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
3006	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
3007	} else {
3008	stateTable=cnv->sharedData->mbcs.stateTable;
3009	}
3010	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
3011
3012	/* get the converter state from UConverter */
3013	offset=cnv->toUnicodeStatus;
3014
3015	/*
3016	* if we are in the SBCS state for a DBCS-only converter,
3017	* then load the DBCS state from the MBCS data
3018	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
3019	*/
3020	if((state=(uint8_t)(cnv->mode))==0) {
3021	state=cnv->sharedData->mbcs.dbcsOnlyState;
3022	}
3023
3024	/* conversion loop */
3025	c=U_SENTINEL(-1);
3026	while(source<sourceLimit) {
3027	entry=stateTable[state][*source++];
3028	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
3029	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
3030	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff);
3031
3032	/* optimization for 1/2-byte input and BMP output */
3033	if( source<sourceLimit &&
3034	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][source])((entry=stateTable[state][source])<0) &&
3035	MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf)==MBCS_STATE_VALID_16 &&
3036	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry)])<0xfffe
3037	) {
3038	++source;
3039	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
3040	/* output BMP code point */
3041	break;
3042	}
3043	} else {
3044	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
3045	cnv->mode=state;
3046
3047	/* set the next state early so that we can reuse the entry variable */
3048	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry)((((uint32_t)entry)>>24)&0x7f); /* typically 0 */
3049
3050	/*
3051	* An if-else-if chain provides more reliable performance for
3052	* the most common cases compared to a switch.
3053	*/
3054	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
3055	if(action==MBCS_STATE_VALID_DIRECT_16) {
3056	/* output BMP code point */
3057	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3058	break;
3059	} else if(action==MBCS_STATE_VALID_16) {
3060	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3061	c=unicodeCodeUnits[offset];
3062	if(c<0xfffe) {
3063	/* output BMP code point */
3064	break;
3065	} else if(c==0xfffe) {
3066	if(UCNV_TO_U_USE_FALLBACK(cnv)true && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
3067	break;
3068	}
3069	} else {
3070	/* callback(illegal) */
3071	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3072	}
3073	} else if(action==MBCS_STATE_VALID_16_PAIR) {
3074	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3075	c=unicodeCodeUnits[offset++];
3076	if(c<0xd800) {
3077	/* output BMP code point below 0xd800 */
3078	break;
3079	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? c<=0xdfff : c<=0xdbff) {
3080	/* output roundtrip or fallback supplementary code point */
3081	c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
3082	break;
3083	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? (c&0xfffe)==0xe000 : c==0xe000) {
3084	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
3085	c=unicodeCodeUnits[offset];
3086	break;
3087	} else if(c==0xffff) {
3088	/* callback(illegal) */
3089	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3090	}
3091	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
3092	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)true)
3093	) {
3094	/* output supplementary code point */
3095	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff)+0x10000);
3096	break;
3097	} else if(action==MBCS_STATE_CHANGE_ONLY) {
3098	/*
3099	* This serves as a state change without any output.
3100	* It is useful for reading simple stateful encodings,
3101	* for example using just Shift-In/Shift-Out codes.
3102	* The 21 unused bits may later be used for more sophisticated
3103	* state transitions.
3104	*/
3105	if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
3106	/* SI/SO are illegal for DBCS-only conversion */
3107	state=(uint8_t)(cnv->mode); /* restore the previous state */
3108
3109	/* callback(illegal) */
3110	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3111	}
3112	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
3113	if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
3114	/* output BMP code point */
3115	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3116	break;
3117	}
3118	} else if(action==MBCS_STATE_UNASSIGNED) {
3119	/* just fall through */
3120	} else if(action==MBCS_STATE_ILLEGAL) {
3121	/* callback(illegal) */
3122	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3123	} else {
3124	/* reserved (must never occur), or only state change */
3125	offset=0;
3126	lastSource=source;
3127	continue;
3128	}
3129
3130	/* end of action codes: prepare for a new character */
3131	offset=0;
3132
3133	if(U_FAILURE(*pErrorCode)) {
3134	/* callback(illegal) */
3135	break;
3136	} else /* unassigned sequence */ {
3137	/* defer to the generic implementation */
3138	cnv->toUnicodeStatus=0;
3139	cnv->mode=state;
3140	pArgs->source=(const char *)lastSource;
3141	return UCNV_GET_NEXT_UCHAR_USE_TO_U-9;
3142	}
3143	}
3144	}
3145
3146	if(c<0) {
3147	if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
3148	/* incomplete character byte sequence */
3149	uint8_t *bytes=cnv->toUBytes;
3150	cnv->toULength=(int8_t)(source-lastSource);
3151	do {
3152	bytes++=lastSource++;
3153	} while(lastSource<source);
3154	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
3155	} else if(U_FAILURE(*pErrorCode)) {
3156	/* callback(illegal) */
3157	/*
3158	* Ticket 5691: consistent illegal sequences:
3159	* - We include at least the first byte in the illegal sequence.
3160	* - If any of the non-initial bytes could be the start of a character,
3161	* we stop the illegal sequence before the first one of those.
3162	*/
3163	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
3164	uint8_t *bytes=cnv->toUBytes;
3165	bytes++=lastSource++; /* first byte */
3166	if(lastSource==source) {
3167	cnv->toULength=1;
3168	} else /* lastSource<source: multi-byte character */ {
3169	int8_t i;
3170	for(i=1;
3171	lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
3172	++i
3173	) {
3174	bytes++=lastSource++;
3175	}
3176	cnv->toULength=i;
3177	source=lastSource;
3178	}
3179	} else {
3180	/* no output because of empty input or only state changes */
3181	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
3182	}
3183	c=0xffff;
3184	}
3185
3186	/* set the converter state back into UConverter, ready for a new character */
3187	cnv->toUnicodeStatus=0;
3188	cnv->mode=state;
3189
3190	/* write back the updated pointer */
3191	pArgs->source=(const char *)source;
3192	return c;
3193	}
3194
3195	#if 0
3196	/*
3197	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
3198	* Removal improves code coverage.
3199	*/
3200	/**
3201	* This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
3202	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
3203	* It does not handle conversion extensions (_extToU()).
3204	*/
3205	U_CFUNCextern "C" UChar32
3206	ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
3207	uint8_t b, UBool useFallback) {
3208	int32_t entry;
3209	uint8_t action;
3210
3211	entry=sharedData->mbcs.stateTable[0][b];
3212	/* MBCS_ENTRY_IS_FINAL(entry) */
3213
3214	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)((entry)<(int32_t)0x80100000)) {
3215	/* output BMP code point */
3216	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3217	}
3218
3219	/*
3220	* An if-else-if chain provides more reliable performance for
3221	* the most common cases compared to a switch.
3222	*/
3223	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
3224	if(action==MBCS_STATE_VALID_DIRECT_20) {
3225	/* output supplementary code point */
3226	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
3227	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
3228	if(!TO_U_USE_FALLBACK(useFallback)true) {
3229	return 0xfffe;
3230	}
3231	/* output BMP code point */
3232	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3233	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
3234	if(!TO_U_USE_FALLBACK(useFallback)true) {
3235	return 0xfffe;
3236	}
3237	/* output supplementary code point */
3238	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
3239	} else if(action==MBCS_STATE_UNASSIGNED) {
3240	return 0xfffe;
3241	} else if(action==MBCS_STATE_ILLEGAL) {
3242	return 0xffff;
3243	} else {
3244	/* reserved, must never occur */
3245	return 0xffff;
3246	}
3247	}
3248	#endif
3249
3250	/*
3251	* This is a simple version of _MBCSGetNextUChar() that is used
3252	* by other converter implementations.
3253	* It only returns an "assigned" result if it consumes the entire input.
3254	* It does not use state from the converter, nor error codes.
3255	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
3256	* It handles conversion extensions but not GB 18030.
3257	*
3258	* Return value:
3259	* U+fffe unassigned
3260	* U+ffff illegal
3261	* otherwise the Unicode code point
3262	*/
3263	U_CFUNCextern "C" UChar32
3264	ucnv_MBCSSimpleGetNextUCharucnv_MBCSSimpleGetNextUChar_71(UConverterSharedData *sharedData,
3265	const char *source, int32_t length,
3266	UBool useFallback) {
3267	const int32_t (*stateTable)[256];
3268	const uint16_t *unicodeCodeUnits;
3269
3270	uint32_t offset;
3271	uint8_t state, action;
3272
3273	UChar32 c;
3274	int32_t i, entry;
3275
3276	if(length<=0) {
3277	/* no input at all: "illegal" */
3278	return 0xffff;
3279	}
3280
3281	#if 0
3282	/*
3283	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
3284	* TODO In future releases, verify that this function is never called for SBCS
3285	* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
3286	* Removal improves code coverage.
3287	*/
3288	/* use optimized function if possible */
3289	if(sharedData->mbcs.countStates==1) {
3290	if(length==1) {
3291	return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
3292	} else {
3293	return 0xffff; /* illegal: more than a single byte for an SBCS converter */
3294	}
3295	}
3296	#endif
3297
3298	/* set up the local pointers */
3299	stateTable=sharedData->mbcs.stateTable;
3300	unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
3301
3302	/* converter state */
3303	offset=0;
3304	state=sharedData->mbcs.dbcsOnlyState;
3305
3306	/* conversion loop */
3307	for(i=0;;) {
3308	entry=stateTable[state][(uint8_t)source[i++]];
3309	if(MBCS_ENTRY_IS_TRANSITION(entry)((entry)>=0)) {
3310	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)(((uint32_t)entry)>>24);
3311	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry)((entry)&0xffffff);
3312
3313	if(i==length) {
3314	return 0xffff; /* truncated character */
3315	}
3316	} else {
3317	/*
3318	* An if-else-if chain provides more reliable performance for
3319	* the most common cases compared to a switch.
3320	*/
3321	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)((((uint32_t)entry)>>20)&0xf));
3322	if(action==MBCS_STATE_VALID_16) {
3323	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3324	c=unicodeCodeUnits[offset];
3325	if(c!=0xfffe) {
3326	/* done */
3327	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true) {
3328	c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
3329	/* else done with 0xfffe */
3330	}
3331	break;
3332	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
3333	/* output BMP code point */
3334	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3335	break;
3336	} else if(action==MBCS_STATE_VALID_16_PAIR) {
3337	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3338	c=unicodeCodeUnits[offset++];
3339	if(c<0xd800) {
3340	/* output BMP code point below 0xd800 */
3341	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? c<=0xdfff : c<=0xdbff) {
3342	/* output roundtrip or fallback supplementary code point */
3343	c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
3344	} else if(UCNV_TO_U_USE_FALLBACK(cnv)true ? (c&0xfffe)==0xe000 : c==0xe000) {
3345	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
3346	c=unicodeCodeUnits[offset];
3347	} else if(c==0xffff) {
3348	return 0xffff;
3349	} else {
3350	c=0xfffe;
3351	}
3352	break;
3353	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
3354	/* output supplementary code point */
3355	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
3356	break;
3357	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
3358	if(!TO_U_USE_FALLBACK(useFallback)true) {
3359	c=0xfffe;
3360	break;
3361	}
3362	/* output BMP code point */
3363	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry)(uint16_t)(entry);
3364	break;
3365	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
3366	if(!TO_U_USE_FALLBACK(useFallback)true) {
3367	c=0xfffe;
3368	break;
3369	}
3370	/* output supplementary code point */
3371	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry)((entry)&0xfffff);
3372	break;
3373	} else if(action==MBCS_STATE_UNASSIGNED) {
3374	c=0xfffe;
3375	break;
3376	}
3377
3378	/*
3379	* forbid MBCS_STATE_CHANGE_ONLY for this function,
3380	* and MBCS_STATE_ILLEGAL and reserved action codes
3381	*/
3382	return 0xffff;
3383	}
3384	}
3385
3386	if(i!=length) {
3387	/* illegal for this function: not all input consumed */
3388	return 0xffff;
3389	}
3390
3391	if(c==0xfffe) {
3392	/* try an extension mapping */
3393	const int32_t *cx=sharedData->mbcs.extIndexes;
3394	if(cx!=NULL__null) {
3395	return ucnv_extSimpleMatchToUucnv_extSimpleMatchToU_71(cx, source, length, useFallback);
3396	}
3397	}
3398
3399	return c;
3400	}
3401
3402	/* MBCS-from-Unicode conversion functions ----------------------------------- */
3403
3404	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
3405	static void
3406	ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
3407	UErrorCode *pErrorCode) {
3408	UConverter *cnv;
3409	const UChar source, sourceLimit;
3410	uint8_t *target;
3411	int32_t targetCapacity;
3412	int32_t *offsets;
3413
3414	const uint16_t *table;
3415	const uint16_t *mbcsIndex;
3416	const uint8_t *bytes;
3417
3418	UChar32 c;
3419
3420	int32_t sourceIndex, nextSourceIndex;
3421
3422	uint32_t stage2Entry;
3423	uint32_t asciiRoundtrips;
3424	uint32_t value;
3425	uint8_t unicodeMask;
3426
3427	/* use optimized function if possible */
3428	cnv=pArgs->converter;
3429	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
3430
3431	/* set up the local pointers */
3432	source=pArgs->source;
3433	sourceLimit=pArgs->sourceLimit;
3434	target=(uint8_t *)pArgs->target;
3435	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
3436	offsets=pArgs->offsets;
3437
3438	table=cnv->sharedData->mbcs.fromUnicodeTable;
3439	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
3440	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
3441	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
3442	} else {
3443	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
3444	}
3445	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
3446
3447	/* get the converter state from UConverter */
3448	c=cnv->fromUChar32;
3449
3450	/* sourceIndex=-1 if the current character began in the previous buffer */
3451	sourceIndex= c==0 ? 0 : -1;
3452	nextSourceIndex=0;
3453
3454	/* conversion loop */
3455	if(c!=0 && targetCapacity>0) {
3456	goto getTrail;
3457	}
3458
3459	while(source<sourceLimit) {
3460	/*
3461	* This following test is to see if available input would overflow the output.
3462	* It does not catch output of more than one byte that
3463	* overflows as a result of a multi-byte character or callback output
3464	* from the last source character.
3465	* Therefore, those situations also test for overflows and will
3466	* then break the loop, too.
3467	*/
3468	if(targetCapacity>0) {
3469	/*
3470	* Get a correct Unicode code point:
3471	* a single UChar for a BMP code point or
3472	* a matched surrogate pair for a "supplementary code point".
3473	*/
3474	c=*source++;
3475	++nextSourceIndex;
3476	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)(((asciiRoundtrips) & (1<<((c)>>2)))!=0)) {
3477	*target++=(uint8_t)c;
3478	if(offsets!=NULL__null) {
3479	*offsets++=sourceIndex;
3480	sourceIndex=nextSourceIndex;
3481	}
3482	--targetCapacity;
3483	c=0;
3484	continue;
3485	}
3486	/*
3487	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
3488	* to avoid dealing with surrogates.
3489	* MBCS_FAST_MAX must be >=0xd7ff.
3490	*/
3491	if(c<=0xd7ff) {
3492	value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t )bytes, c)((const uint16_t )bytes)[ (mbcsIndex)[(c)>>6] +((c)& 0x3f) ];
3493	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
3494	if(value==0) {
3495	goto unassigned;
3496	}
3497	/* output the value */
3498	} else {
3499	/*
3500	* This also tests if the codepage maps single surrogates.
3501	* If it does, then surrogates are not paired but mapped separately.
3502	* Note that in this case unmatched surrogates are not detected.
3503	*/
3504	if(U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800) && !(unicodeMask&UCNV_HAS_SURROGATES2)) {
3505	if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
3506	getTrail:
3507	if(source<sourceLimit) {
3508	/* test the following code unit */
3509	UChar trail=*source;
3510	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
3511	++source;
3512	++nextSourceIndex;
3513	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
3514	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
3515	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
3516	/* callback(unassigned) */
3517	goto unassigned;
3518	}
3519	/* convert this supplementary code point */
3520	/* exit this condition tree */
3521	} else {
3522	/* this is an unmatched lead code unit (1st surrogate) */
3523	/* callback(illegal) */
3524	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3525	break;
3526	}
3527	} else {
3528	/* no more input */
3529	break;
3530	}
3531	} else {
3532	/* this is an unmatched trail code unit (2nd surrogate) */
3533	/* callback(illegal) */
3534	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3535	break;
3536	}
3537	}
3538
3539	/* convert the Unicode code point in c into codepage bytes */
3540	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
3541
3542	/* get the bytes and the length for the output */
3543	/* MBCS_OUTPUT_2 */
3544	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
3545
3546	/* is this code point assigned, or do we use fallbacks? */
3547	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf) ) )) !=0) \|\|
3548	(UCNV_FROM_U_USE_FALLBACK(cnv, c)(((cnv)->useFallback) \|\| ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t)((c)-0xf0000)<0x20000)) && value!=0))
3549	) {
3550	/*
3551	* We allow a 0 byte output if the "assigned" bit is set for this entry.
3552	* There is no way with this data structure for fallback output
3553	* to be a zero byte.
3554	*/
3555
3556	unassigned:
3557	/* try an extension mapping */
3558	pArgs->source=source;
3559	c=_extFromU(cnv, cnv->sharedData,
3560	c, &source, sourceLimit,
3561	&target, target+targetCapacity,
3562	&offsets, sourceIndex,
3563	pArgs->flush,
3564	pErrorCode);
3565	nextSourceIndex+=(int32_t)(source-pArgs->source);
3566
3567	if(U_FAILURE(*pErrorCode)) {
3568	/* not mappable or buffer overflow */
3569	break;
3570	} else {
3571	/* a mapping was written to the target, continue */
3572
3573	/* recalculate the targetCapacity after an extension mapping */
3574	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
3575
3576	/* normal end of conversion: prepare for a new character */
3577	sourceIndex=nextSourceIndex;
3578	continue;
3579	}
3580	}
3581	}
3582
3583	/* write the output character bytes from value and length */
3584	/* from the first if in the loop we know that targetCapacity>0 */
3585	if(value<=0xff) {
3586	/* this is easy because we know that there is enough space */
3587	*target++=(uint8_t)value;
3588	if(offsets!=NULL__null) {
3589	*offsets++=sourceIndex;
3590	}
3591	--targetCapacity;
3592	} else /* length==2 */ {
3593	*target++=(uint8_t)(value>>8);
3594	if(2<=targetCapacity) {
3595	*target++=(uint8_t)value;
3596	if(offsets!=NULL__null) {
3597	*offsets++=sourceIndex;
3598	*offsets++=sourceIndex;
3599	}
3600	targetCapacity-=2;
3601	} else {
3602	if(offsets!=NULL__null) {
3603	*offsets++=sourceIndex;
3604	}
3605	cnv->charErrorBuffer[0]=(char)value;
3606	cnv->charErrorBufferLength=1;
3607
3608	/* target overflow */
3609	targetCapacity=0;
	Value stored to 'targetCapacity' is never read
3610	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
3611	c=0;
3612	break;
3613	}
3614	}
3615
3616	/* normal end of conversion: prepare for a new character */
3617	c=0;
3618	sourceIndex=nextSourceIndex;
3619	continue;
3620	} else {
3621	/* target is full */
3622	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
3623	break;
3624	}
3625	}
3626
3627	/* set the converter state back into UConverter */
3628	cnv->fromUChar32=c;
3629
3630	/* write back the updated pointers */
3631	pArgs->source=source;
3632	pArgs->target=(char *)target;
3633	pArgs->offsets=offsets;
3634	}
3635
3636	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
3637	static void
3638	ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
3639	UErrorCode *pErrorCode) {
3640	UConverter *cnv;
3641	const UChar source, sourceLimit;
3642	uint8_t *target;
3643	int32_t targetCapacity;
3644	int32_t *offsets;
3645
3646	const uint16_t *table;
3647	const uint16_t *results;
3648
3649	UChar32 c;
3650
3651	int32_t sourceIndex, nextSourceIndex;
3652
3653	uint16_t value, minValue;
3654	UBool hasSupplementary;
3655
3656	/* set up the local pointers */
3657	cnv=pArgs->converter;
3658	source=pArgs->source;
3659	sourceLimit=pArgs->sourceLimit;
3660	target=(uint8_t *)pArgs->target;
3661	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
3662	offsets=pArgs->offsets;
3663
3664	table=cnv->sharedData->mbcs.fromUnicodeTable;
3665	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
3666	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
3667	} else {
3668	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
3669	}
3670
3671	if(cnv->useFallback) {
3672	/* use all roundtrip and fallback results */
3673	minValue=0x800;
3674	} else {
3675	/* use only roundtrips and fallbacks from private-use characters */
3676	minValue=0xc00;
3677	}
3678	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1);
3679
3680	/* get the converter state from UConverter */
3681	c=cnv->fromUChar32;
3682
3683	/* sourceIndex=-1 if the current character began in the previous buffer */
3684	sourceIndex= c==0 ? 0 : -1;
3685	nextSourceIndex=0;
3686
3687	/* conversion loop */
3688	if(c!=0 && targetCapacity>0) {
3689	goto getTrail;
3690	}
3691
3692	while(source<sourceLimit) {
3693	/*
3694	* This following test is to see if available input would overflow the output.
3695	* It does not catch output of more than one byte that
3696	* overflows as a result of a multi-byte character or callback output
3697	* from the last source character.
3698	* Therefore, those situations also test for overflows and will
3699	* then break the loop, too.
3700	*/
3701	if(targetCapacity>0) {
3702	/*
3703	* Get a correct Unicode code point:
3704	* a single UChar for a BMP code point or
3705	* a matched surrogate pair for a "supplementary code point".
3706	*/
3707	c=*source++;
3708	++nextSourceIndex;
3709	if(U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
3710	if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
3711	getTrail:
3712	if(source<sourceLimit) {
3713	/* test the following code unit */
3714	UChar trail=*source;
3715	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
3716	++source;
3717	++nextSourceIndex;
3718	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
3719	if(!hasSupplementary) {
3720	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
3721	/* callback(unassigned) */
3722	goto unassigned;
3723	}
3724	/* convert this supplementary code point */
3725	/* exit this condition tree */
3726	} else {
3727	/* this is an unmatched lead code unit (1st surrogate) */
3728	/* callback(illegal) */
3729	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3730	break;
3731	}
3732	} else {
3733	/* no more input */
3734	break;
3735	}
3736	} else {
3737	/* this is an unmatched trail code unit (2nd surrogate) */
3738	/* callback(illegal) */
3739	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3740	break;
3741	}
3742	}
3743
3744	/* convert the Unicode code point in c into codepage bytes */
3745	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3746
3747	/* is this code point assigned, or do we use fallbacks? */
3748	if(value>=minValue) {
3749	/* assigned, write the output character bytes from value and length */
3750	/* length==1 */
3751	/* this is easy because we know that there is enough space */
3752	*target++=(uint8_t)value;
3753	if(offsets!=NULL__null) {
3754	*offsets++=sourceIndex;
3755	}
3756	--targetCapacity;
3757
3758	/* normal end of conversion: prepare for a new character */
3759	c=0;
3760	sourceIndex=nextSourceIndex;
3761	} else { /* unassigned */
3762	unassigned:
3763	/* try an extension mapping */
3764	pArgs->source=source;
3765	c=_extFromU(cnv, cnv->sharedData,
3766	c, &source, sourceLimit,
3767	&target, target+targetCapacity,
3768	&offsets, sourceIndex,
3769	pArgs->flush,
3770	pErrorCode);
3771	nextSourceIndex+=(int32_t)(source-pArgs->source);
3772
3773	if(U_FAILURE(*pErrorCode)) {
3774	/* not mappable or buffer overflow */
3775	break;
3776	} else {
3777	/* a mapping was written to the target, continue */
3778
3779	/* recalculate the targetCapacity after an extension mapping */
3780	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
3781
3782	/* normal end of conversion: prepare for a new character */
3783	sourceIndex=nextSourceIndex;
3784	}
3785	}
3786	} else {
3787	/* target is full */
3788	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
3789	break;
3790	}
3791	}
3792
3793	/* set the converter state back into UConverter */
3794	cnv->fromUChar32=c;
3795
3796	/* write back the updated pointers */
3797	pArgs->source=source;
3798	pArgs->target=(char *)target;
3799	pArgs->offsets=offsets;
3800	}
3801
3802	/*
3803	* This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
3804	* that map only to and from the BMP.
3805	* In addition to single-byte/state optimizations, the offset calculations
3806	* become much easier.
3807	* It would be possible to use the sbcsIndex for UTF-8-friendly tables,
3808	* but measurements have shown that this diminishes performance
3809	* in more cases than it improves it.
3810	* See SVN revision 21013 (2007-feb-06) for the last version with #if switches
3811	* for various MBCS and SBCS optimizations.
3812	*/
3813	static void
3814	ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
3815	UErrorCode *pErrorCode) {
3816	UConverter *cnv;
3817	const UChar source, sourceLimit, *lastSource;
3818	uint8_t *target;
3819	int32_t targetCapacity, length;
3820	int32_t *offsets;
3821
3822	const uint16_t *table;
3823	const uint16_t *results;
3824
3825	UChar32 c;
3826
3827	int32_t sourceIndex;
3828
3829	uint32_t asciiRoundtrips;
3830	uint16_t value, minValue;
3831
3832	/* set up the local pointers */
3833	cnv=pArgs->converter;
3834	source=pArgs->source;
3835	sourceLimit=pArgs->sourceLimit;
3836	target=(uint8_t *)pArgs->target;
3837	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
3838	offsets=pArgs->offsets;
3839
3840	table=cnv->sharedData->mbcs.fromUnicodeTable;
3841	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
3842	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
3843	} else {
3844	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
3845	}
3846	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
3847
3848	if(cnv->useFallback) {
3849	/* use all roundtrip and fallback results */
3850	minValue=0x800;
3851	} else {
3852	/* use only roundtrips and fallbacks from private-use characters */
3853	minValue=0xc00;
3854	}
3855
3856	/* get the converter state from UConverter */
3857	c=cnv->fromUChar32;
3858
3859	/* sourceIndex=-1 if the current character began in the previous buffer */
3860	sourceIndex= c==0 ? 0 : -1;
3861	lastSource=source;
3862
3863	/*
3864	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
3865	* for the minimum of the sourceLength and targetCapacity
3866	*/
3867	length=(int32_t)(sourceLimit-source);
3868	if(length<targetCapacity) {
3869	targetCapacity=length;
3870	}
3871
3872	/* conversion loop */
3873	if(c!=0 && targetCapacity>0) {
3874	goto getTrail;
3875	}
3876
3877	#if MBCS_UNROLL_SINGLE_FROM_BMP0
3878	/* unrolling makes it slower on Pentium III/Windows 2000?! */
3879	/* unroll the loop with the most common case */
3880	unrolled:
3881	if(targetCapacity>=4) {
3882	int32_t count, loops;
3883	uint16_t andedValues;
3884
3885	loops=count=targetCapacity>>2;
3886	do {
3887	c=*source++;
3888	andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3889	*target++=(uint8_t)value;
3890	c=*source++;
3891	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3892	*target++=(uint8_t)value;
3893	c=*source++;
3894	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3895	*target++=(uint8_t)value;
3896	c=*source++;
3897	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3898	*target++=(uint8_t)value;
3899
3900	/* were all 4 entries really valid? */
3901	if(andedValues<minValue) {
3902	/* no, return to the first of these 4 */
3903	source-=4;
3904	target-=4;
3905	break;
3906	}
3907	} while(--count>0);
3908	count=loops-count;
3909	targetCapacity-=4*count;
3910
3911	if(offsets!=NULL__null) {
3912	lastSource+=4*count;
3913	while(count>0) {
3914	*offsets++=sourceIndex++;
3915	*offsets++=sourceIndex++;
3916	*offsets++=sourceIndex++;
3917	*offsets++=sourceIndex++;
3918	--count;
3919	}
3920	}
3921
3922	c=0;
3923	}
3924	#endif
3925
3926	while(targetCapacity>0) {
3927	/*
3928	* Get a correct Unicode code point:
3929	* a single UChar for a BMP code point or
3930	* a matched surrogate pair for a "supplementary code point".
3931	*/
3932	c=*source++;
3933	/*
3934	* Do not immediately check for single surrogates:
3935	* Assume that they are unassigned and check for them in that case.
3936	* This speeds up the conversion of assigned characters.
3937	*/
3938	/* convert the Unicode code point in c into codepage bytes */
3939	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)(((asciiRoundtrips) & (1<<((c)>>2)))!=0)) {
3940	*target++=(uint8_t)c;
3941	--targetCapacity;
3942	c=0;
3943	continue;
3944	}
3945	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
3946	/* is this code point assigned, or do we use fallbacks? */
3947	if(value>=minValue) {
3948	/* assigned, write the output character bytes from value and length */
3949	/* length==1 */
3950	/* this is easy because we know that there is enough space */
3951	*target++=(uint8_t)value;
3952	--targetCapacity;
3953
3954	/* normal end of conversion: prepare for a new character */
3955	c=0;
3956	continue;
3957	} else if(!U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
3958	/* normal, unassigned BMP character */
3959	} else if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
3960	getTrail:
3961	if(source<sourceLimit) {
3962	/* test the following code unit */
3963	UChar trail=*source;
3964	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
3965	++source;
3966	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
3967	/* this codepage does not map supplementary code points */
3968	/* callback(unassigned) */
3969	} else {
3970	/* this is an unmatched lead code unit (1st surrogate) */
3971	/* callback(illegal) */
3972	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3973	break;
3974	}
3975	} else {
3976	/* no more input */
3977	if (pArgs->flush) {
3978	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
3979	}
3980	break;
3981	}
3982	} else {
3983	/* this is an unmatched trail code unit (2nd surrogate) */
3984	/* callback(illegal) */
3985	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
3986	break;
3987	}
3988
3989	/* c does not have a mapping */
3990
3991	/* get the number of code units for c to correctly advance sourceIndex */
3992	length=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
3993
3994	/* set offsets since the start or the last extension */
3995	if(offsets!=NULL__null) {
3996	int32_t count=(int32_t)(source-lastSource);
3997
3998	/* do not set the offset for this character */
3999	count-=length;
4000
4001	while(count>0) {
4002	*offsets++=sourceIndex++;
4003	--count;
4004	}
4005	/* offsets and sourceIndex are now set for the current character */
4006	}
4007
4008	/* try an extension mapping */
4009	lastSource=source;
4010	c=_extFromU(cnv, cnv->sharedData,
4011	c, &source, sourceLimit,
4012	&target, (const uint8_t *)(pArgs->targetLimit),
4013	&offsets, sourceIndex,
4014	pArgs->flush,
4015	pErrorCode);
4016	sourceIndex+=length+(int32_t)(source-lastSource);
4017	lastSource=source;
4018
4019	if(U_FAILURE(*pErrorCode)) {
4020	/* not mappable or buffer overflow */
4021	break;
4022	} else {
4023	/* a mapping was written to the target, continue */
4024
4025	/* recalculate the targetCapacity after an extension mapping */
4026	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
4027	length=(int32_t)(sourceLimit-source);
4028	if(length<targetCapacity) {
4029	targetCapacity=length;
4030	}
4031	}
4032
4033	#if MBCS_UNROLL_SINGLE_FROM_BMP0
4034	/* unrolling makes it slower on Pentium III/Windows 2000?! */
4035	goto unrolled;
4036	#endif
4037	}
4038
4039	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
4040	/* target is full */
4041	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
4042	}
4043
4044	/* set offsets since the start or the last callback */
4045	if(offsets!=NULL__null) {
4046	size_t count=source-lastSource;
4047	if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
4048	/*
4049	Caller gave us a partial supplementary character,
4050	which this function couldn't convert in any case.
4051	The callback will handle the offset.
4052	*/
4053	count--;
4054	}
4055	while(count>0) {
4056	*offsets++=sourceIndex++;
4057	--count;
4058	}
4059	}
4060
4061	/* set the converter state back into UConverter */
4062	cnv->fromUChar32=c;
4063
4064	/* write back the updated pointers */
4065	pArgs->source=source;
4066	pArgs->target=(char *)target;
4067	pArgs->offsets=offsets;
4068	}
4069
4070	U_CFUNCextern "C" void
4071	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71(UConverterFromUnicodeArgs *pArgs,
4072	UErrorCode *pErrorCode) {
4073	UConverter *cnv;
4074	const UChar source, sourceLimit;
4075	uint8_t *target;
4076	int32_t targetCapacity;
4077	int32_t *offsets;
4078
4079	const uint16_t *table;
4080	const uint16_t *mbcsIndex;
4081	const uint8_t p, bytes;
4082	uint8_t outputType;
4083
4084	UChar32 c;
4085
4086	int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
4087
4088	uint32_t stage2Entry;
4089	uint32_t asciiRoundtrips;
4090	uint32_t value;
4091	/* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
4092	uint8_t siBytes[2] = {0, 0};
4093	uint8_t soBytes[2] = {0, 0};
4094	uint8_t siLength, soLength;
4095	int32_t length = 0, prevLength;
4096	uint8_t unicodeMask;
4097
4098	cnv=pArgs->converter;
4099
4100	if(cnv->preFromUFirstCP>=0) {
4101	/*
4102	* pass sourceIndex=-1 because we continue from an earlier buffer
4103	* in the future, this may change with continuous offsets
4104	*/
4105	ucnv_extContinueMatchFromUucnv_extContinueMatchFromU_71(cnv, pArgs, -1, pErrorCode);
4106
4107	if(U_FAILURE(*pErrorCode) \|\| cnv->preFromULength<0) {
4108	return;
4109	}
4110	}
4111
4112	/* use optimized function if possible */
4113	outputType=cnv->sharedData->mbcs.outputType;
4114	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
4115	if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES2)) {
4116	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
4117	ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
4118	} else {
4119	ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
4120	}
4121	return;
4122	} else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
4123	ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
4124	return;
4125	}
4126
4127	/* set up the local pointers */
4128	source=pArgs->source;
4129	sourceLimit=pArgs->sourceLimit;
4130	target=(uint8_t *)pArgs->target;
4131	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
4132	offsets=pArgs->offsets;
4133
4134	table=cnv->sharedData->mbcs.fromUnicodeTable;
4135	if(cnv->sharedData->mbcs.utf8Friendly) {
4136	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
4137	} else {
4138	mbcsIndex=NULL__null;
4139	}
4140	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
4141	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
4142	} else {
4143	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
4144	}
4145	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
4146
4147	/* get the converter state from UConverter */
4148	c=cnv->fromUChar32;
4149
4150	if(outputType==MBCS_OUTPUT_2_SISO) {
4151	prevLength=cnv->fromUnicodeStatus;
4152	if(prevLength==0) {
4153	/* set the real value */
4154	prevLength=1;
4155	}
4156	} else {
4157	/* prevent fromUnicodeStatus from being set to something non-0 */
4158	prevLength=0;
4159	}
4160
4161	/* sourceIndex=-1 if the current character began in the previous buffer */
4162	prevSourceIndex=-1;
4163	sourceIndex= c==0 ? 0 : -1;
4164	nextSourceIndex=0;
4165
4166	/* Get the SI/SO character for the converter */
4167	siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
4168	soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
4169
4170	/* conversion loop */
4171	/*
4172	* This is another piece of ugly code:
4173	* A goto into the loop if the converter state contains a first surrogate
4174	* from the previous function call.
4175	* It saves me to check in each loop iteration a check of if(c==0)
4176	* and duplicating the trail-surrogate-handling code in the else
4177	* branch of that check.
4178	* I could not find any other way to get around this other than
4179	* using a function call for the conversion and callback, which would
4180	* be even more inefficient.
4181	*
4182	* Markus Scherer 2000-jul-19
4183	*/
4184	if(c!=0 && targetCapacity>0) {
4185	goto getTrail;
4186	}
4187
4188	while(source<sourceLimit) {
4189	/*
4190	* This following test is to see if available input would overflow the output.
4191	* It does not catch output of more than one byte that
4192	* overflows as a result of a multi-byte character or callback output
4193	* from the last source character.
4194	* Therefore, those situations also test for overflows and will
4195	* then break the loop, too.
4196	*/
4197	if(targetCapacity>0) {
4198	/*
4199	* Get a correct Unicode code point:
4200	* a single UChar for a BMP code point or
4201	* a matched surrogate pair for a "supplementary code point".
4202	*/
4203	c=*source++;
4204	++nextSourceIndex;
4205	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)(((asciiRoundtrips) & (1<<((c)>>2)))!=0)) {
4206	*target++=(uint8_t)c;
4207	if(offsets!=NULL__null) {
4208	*offsets++=sourceIndex;
4209	prevSourceIndex=sourceIndex;
4210	sourceIndex=nextSourceIndex;
4211	}
4212	--targetCapacity;
4213	c=0;
4214	continue;
4215	}
4216	/*
4217	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
4218	* to avoid dealing with surrogates.
4219	* MBCS_FAST_MAX must be >=0xd7ff.
4220	*/
4221	if(c<=0xd7ff && mbcsIndex!=NULL__null) {
4222	value=mbcsIndex[c>>6];
4223
4224	/* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
4225	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
4226	switch(outputType) {
4227	case MBCS_OUTPUT_2:
4228	value=((const uint16_t *)bytes)[value +(c&0x3f)];
4229	if(value<=0xff) {
4230	if(value==0) {
4231	goto unassigned;
4232	} else {
4233	length=1;
4234	}
4235	} else {
4236	length=2;
4237	}
4238	break;
4239	case MBCS_OUTPUT_2_SISO:
4240	/* 1/2-byte stateful with Shift-In/Shift-Out */
4241	/*
4242	* Save the old state in the converter object
4243	* right here, then change the local prevLength state variable if necessary.
4244	* Then, if this character turns out to be unassigned or a fallback that
4245	* is not taken, the callback code must not save the new state in the converter
4246	* because the new state is for a character that is not output.
4247	* However, the callback must still restore the state from the converter
4248	* in case the callback function changed it for its output.
4249	*/
4250	cnv->fromUnicodeStatus=prevLength; /* save the old state */
4251	value=((const uint16_t *)bytes)[value +(c&0x3f)];
4252	if(value<=0xff) {
4253	if(value==0) {
4254	goto unassigned;
4255	} else if(prevLength<=1) {
4256	length=1;
4257	} else {
4258	/* change from double-byte mode to single-byte */
4259	if (siLength == 1) {
4260	value\|=(uint32_t)siBytes[0]<<8;
4261	length = 2;
4262	} else if (siLength == 2) {
4263	value\|=(uint32_t)siBytes[1]<<8;
4264	value\|=(uint32_t)siBytes[0]<<16;
4265	length = 3;
4266	}
4267	prevLength=1;
4268	}
4269	} else {
4270	if(prevLength==2) {
4271	length=2;
4272	} else {
4273	/* change from single-byte mode to double-byte */
4274	if (soLength == 1) {
4275	value\|=(uint32_t)soBytes[0]<<16;
4276	length = 3;
4277	} else if (soLength == 2) {
4278	value\|=(uint32_t)soBytes[1]<<16;
4279	value\|=(uint32_t)soBytes[0]<<24;
4280	length = 4;
4281	}
4282	prevLength=2;
4283	}
4284	}
4285	break;
4286	case MBCS_OUTPUT_DBCS_ONLY:
4287	/* table with single-byte results, but only DBCS mappings used */
4288	value=((const uint16_t *)bytes)[value +(c&0x3f)];
4289	if(value<=0xff) {
4290	/* no mapping or SBCS result, not taken for DBCS-only */
4291	goto unassigned;
4292	} else {
4293	length=2;
4294	}
4295	break;
4296	case MBCS_OUTPUT_3:
4297	p=bytes+(value+(c&0x3f))*3;
4298	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4299	if(value<=0xff) {
4300	if(value==0) {
4301	goto unassigned;
4302	} else {
4303	length=1;
4304	}
4305	} else if(value<=0xffff) {
4306	length=2;
4307	} else {
4308	length=3;
4309	}
4310	break;
4311	case MBCS_OUTPUT_4:
4312	value=((const uint32_t *)bytes)[value +(c&0x3f)];
4313	if(value<=0xff) {
4314	if(value==0) {
4315	goto unassigned;
4316	} else {
4317	length=1;
4318	}
4319	} else if(value<=0xffff) {
4320	length=2;
4321	} else if(value<=0xffffff) {
4322	length=3;
4323	} else {
4324	length=4;
4325	}
4326	break;
4327	case MBCS_OUTPUT_3_EUC:
4328	value=((const uint16_t *)bytes)[value +(c&0x3f)];
4329	/* EUC 16-bit fixed-length representation */
4330	if(value<=0xff) {
4331	if(value==0) {
4332	goto unassigned;
4333	} else {
4334	length=1;
4335	}
4336	} else if((value&0x8000)==0) {
4337	value\|=0x8e8000;
4338	length=3;
4339	} else if((value&0x80)==0) {
4340	value\|=0x8f0080;
4341	length=3;
4342	} else {
4343	length=2;
4344	}
4345	break;
4346	case MBCS_OUTPUT_4_EUC:
4347	p=bytes+(value+(c&0x3f))*3;
4348	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4349	/* EUC 16-bit fixed-length representation applied to the first two bytes */
4350	if(value<=0xff) {
4351	if(value==0) {
4352	goto unassigned;
4353	} else {
4354	length=1;
4355	}
4356	} else if(value<=0xffff) {
4357	length=2;
4358	} else if((value&0x800000)==0) {
4359	value\|=0x8e800000;
4360	length=4;
4361	} else if((value&0x8000)==0) {
4362	value\|=0x8f008000;
4363	length=4;
4364	} else {
4365	length=3;
4366	}
4367	break;
4368	default:
4369	/* must not occur */
4370	/*
4371	* To avoid compiler warnings that value & length may be
4372	* used without having been initialized, we set them here.
4373	* In reality, this is unreachable code.
4374	* Not having a default branch also causes warnings with
4375	* some compilers.
4376	*/
4377	value=0;
4378	length=0;
4379	break;
4380	}
4381	/* output the value */
4382	} else {
4383	/*
4384	* This also tests if the codepage maps single surrogates.
4385	* If it does, then surrogates are not paired but mapped separately.
4386	* Note that in this case unmatched surrogates are not detected.
4387	*/
4388	if(U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800) && !(unicodeMask&UCNV_HAS_SURROGATES2)) {
4389	if(U16_IS_SURROGATE_LEAD(c)(((c)&0x400)==0)) {
4390	getTrail:
4391	if(source<sourceLimit) {
4392	/* test the following code unit */
4393	UChar trail=*source;
4394	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
4395	++source;
4396	++nextSourceIndex;
4397	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
4398	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
4399	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
4400	cnv->fromUnicodeStatus=prevLength; /* save the old state */
4401	/* callback(unassigned) */
4402	goto unassigned;
4403	}
4404	/* convert this supplementary code point */
4405	/* exit this condition tree */
4406	} else {
4407	/* this is an unmatched lead code unit (1st surrogate) */
4408	/* callback(illegal) */
4409	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
4410	break;
4411	}
4412	} else {
4413	/* no more input */
4414	break;
4415	}
4416	} else {
4417	/* this is an unmatched trail code unit (2nd surrogate) */
4418	/* callback(illegal) */
4419	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
4420	break;
4421	}
4422	}
4423
4424	/* convert the Unicode code point in c into codepage bytes */
4425
4426	/*
4427	* The basic lookup is a triple-stage compact array (trie) lookup.
4428	* For details see the beginning of this file.
4429	*
4430	* Single-byte codepages are handled with a different data structure
4431	* by _MBCSSingle... functions.
4432	*
4433	* The result consists of a 32-bit value from stage 2 and
4434	* a pointer to as many bytes as are stored per character.
4435	* The pointer points to the character's bytes in stage 3.
4436	* Bits 15..0 of the stage 2 entry contain the stage 3 index
4437	* for that pointer, while bits 31..16 are flags for which of
4438	* the 16 characters in the block are roundtrip-assigned.
4439	*
4440	* For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
4441	* respectively as uint32_t, in the platform encoding.
4442	* For 3-byte codepages, the bytes are always stored in big-endian order.
4443	*
4444	* For EUC encodings that use only either 0x8e or 0x8f as the first
4445	* byte of their longest byte sequences, the first two bytes in
4446	* this third stage indicate with their 7th bits whether these bytes
4447	* are to be written directly or actually need to be preceded by
4448	* one of the two Single-Shift codes. With this, the third stage
4449	* stores one byte fewer per character than the actual maximum length of
4450	* EUC byte sequences.
4451	*
4452	* Other than that, leading zero bytes are removed and the other
4453	* bytes output. A single zero byte may be output if the "assigned"
4454	* bit in stage 2 was on.
4455	* The data structure does not support zero byte output as a fallback,
4456	* and also does not allow output of leading zeros.
4457	*/
4458	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
4459
4460	/* get the bytes and the length for the output */
4461	switch(outputType) {
4462	case MBCS_OUTPUT_2:
4463	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
4464	if(value<=0xff) {
4465	length=1;
4466	} else {
4467	length=2;
4468	}
4469	break;
4470	case MBCS_OUTPUT_2_SISO:
4471	/* 1/2-byte stateful with Shift-In/Shift-Out */
4472	/*
4473	* Save the old state in the converter object
4474	* right here, then change the local prevLength state variable if necessary.
4475	* Then, if this character turns out to be unassigned or a fallback that
4476	* is not taken, the callback code must not save the new state in the converter
4477	* because the new state is for a character that is not output.
4478	* However, the callback must still restore the state from the converter
4479	* in case the callback function changed it for its output.
4480	*/
4481	cnv->fromUnicodeStatus=prevLength; /* save the old state */
4482	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
4483	if(value<=0xff) {
4484	if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf) ) )) !=0)==0) {
4485	/* no mapping, leave value==0 */
4486	length=0;
4487	} else if(prevLength<=1) {
4488	length=1;
4489	} else {
4490	/* change from double-byte mode to single-byte */
4491	if (siLength == 1) {
4492	value\|=(uint32_t)siBytes[0]<<8;
4493	length = 2;
4494	} else if (siLength == 2) {
4495	value\|=(uint32_t)siBytes[1]<<8;
4496	value\|=(uint32_t)siBytes[0]<<16;
4497	length = 3;
4498	}
4499	prevLength=1;
4500	}
4501	} else {
4502	if(prevLength==2) {
4503	length=2;
4504	} else {
4505	/* change from single-byte mode to double-byte */
4506	if (soLength == 1) {
4507	value\|=(uint32_t)soBytes[0]<<16;
4508	length = 3;
4509	} else if (soLength == 2) {
4510	value\|=(uint32_t)soBytes[1]<<16;
4511	value\|=(uint32_t)soBytes[0]<<24;
4512	length = 4;
4513	}
4514	prevLength=2;
4515	}
4516	}
4517	break;
4518	case MBCS_OUTPUT_DBCS_ONLY:
4519	/* table with single-byte results, but only DBCS mappings used */
4520	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
4521	if(value<=0xff) {
4522	/* no mapping or SBCS result, not taken for DBCS-only */
4523	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
4524	length=0;
4525	} else {
4526	length=2;
4527	}
4528	break;
4529	case MBCS_OUTPUT_3:
4530	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c)((bytes)+(16(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)) 3);
4531	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4532	if(value<=0xff) {
4533	length=1;
4534	} else if(value<=0xffff) {
4535	length=2;
4536	} else {
4537	length=3;
4538	}
4539	break;
4540	case MBCS_OUTPUT_4:
4541	value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c)((uint32_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
4542	if(value<=0xff) {
4543	length=1;
4544	} else if(value<=0xffff) {
4545	length=2;
4546	} else if(value<=0xffffff) {
4547	length=3;
4548	} else {
4549	length=4;
4550	}
4551	break;
4552	case MBCS_OUTPUT_3_EUC:
4553	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c)((uint16_t )(bytes))[16(uint32_t)(uint16_t)(stage2Entry)+(( c)&0xf)];
4554	/* EUC 16-bit fixed-length representation */
4555	if(value<=0xff) {
4556	length=1;
4557	} else if((value&0x8000)==0) {
4558	value\|=0x8e8000;
4559	length=3;
4560	} else if((value&0x80)==0) {
4561	value\|=0x8f0080;
4562	length=3;
4563	} else {
4564	length=2;
4565	}
4566	break;
4567	case MBCS_OUTPUT_4_EUC:
4568	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c)((bytes)+(16(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)) 3);
4569	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4570	/* EUC 16-bit fixed-length representation applied to the first two bytes */
4571	if(value<=0xff) {
4572	length=1;
4573	} else if(value<=0xffff) {
4574	length=2;
4575	} else if((value&0x800000)==0) {
4576	value\|=0x8e800000;
4577	length=4;
4578	} else if((value&0x8000)==0) {
4579	value\|=0x8f008000;
4580	length=4;
4581	} else {
4582	length=3;
4583	}
4584	break;
4585	default:
4586	/* must not occur */
4587	/*
4588	* To avoid compiler warnings that value & length may be
4589	* used without having been initialized, we set them here.
4590	* In reality, this is unreachable code.
4591	* Not having a default branch also causes warnings with
4592	* some compilers.
4593	*/
4594	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
4595	length=0;
4596	break;
4597	}
4598
4599	/* is this code point assigned, or do we use fallbacks? */
4600	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf) ) )) !=0)!=0 \|\|
4601	(UCNV_FROM_U_USE_FALLBACK(cnv, c)(((cnv)->useFallback) \|\| ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t)((c)-0xf0000)<0x20000)) && value!=0))
4602	) {
4603	/*
4604	* We allow a 0 byte output if the "assigned" bit is set for this entry.
4605	* There is no way with this data structure for fallback output
4606	* to be a zero byte.
4607	*/
4608
4609	unassigned:
4610	/* try an extension mapping */
4611	pArgs->source=source;
4612	c=_extFromU(cnv, cnv->sharedData,
4613	c, &source, sourceLimit,
4614	&target, target+targetCapacity,
4615	&offsets, sourceIndex,
4616	pArgs->flush,
4617	pErrorCode);
4618	nextSourceIndex+=(int32_t)(source-pArgs->source);
4619	prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
4620
4621	if(U_FAILURE(*pErrorCode)) {
4622	/* not mappable or buffer overflow */
4623	break;
4624	} else {
4625	/* a mapping was written to the target, continue */
4626
4627	/* recalculate the targetCapacity after an extension mapping */
4628	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
4629
4630	/* normal end of conversion: prepare for a new character */
4631	if(offsets!=NULL__null) {
4632	prevSourceIndex=sourceIndex;
4633	sourceIndex=nextSourceIndex;
4634	}
4635	continue;
4636	}
4637	}
4638	}
4639
4640	/* write the output character bytes from value and length */
4641	/* from the first if in the loop we know that targetCapacity>0 */
4642	if(length<=targetCapacity) {
4643	if(offsets==NULL__null) {
4644	switch(length) {
4645	/* each branch falls through to the next one */
4646	case 4:
4647	*target++=(uint8_t)(value>>24);
4648	U_FALLTHROUGH[[clang::fallthrough]];
4649	case 3:
4650	*target++=(uint8_t)(value>>16);
4651	U_FALLTHROUGH[[clang::fallthrough]];
4652	case 2:
4653	*target++=(uint8_t)(value>>8);
4654	U_FALLTHROUGH[[clang::fallthrough]];
4655	case 1:
4656	*target++=(uint8_t)value;
4657	U_FALLTHROUGH[[clang::fallthrough]];
4658	default:
4659	/* will never occur */
4660	break;
4661	}
4662	} else {
4663	switch(length) {
4664	/* each branch falls through to the next one */
4665	case 4:
4666	*target++=(uint8_t)(value>>24);
4667	*offsets++=sourceIndex;
4668	U_FALLTHROUGH[[clang::fallthrough]];
4669	case 3:
4670	*target++=(uint8_t)(value>>16);
4671	*offsets++=sourceIndex;
4672	U_FALLTHROUGH[[clang::fallthrough]];
4673	case 2:
4674	*target++=(uint8_t)(value>>8);
4675	*offsets++=sourceIndex;
4676	U_FALLTHROUGH[[clang::fallthrough]];
4677	case 1:
4678	*target++=(uint8_t)value;
4679	*offsets++=sourceIndex;
4680	U_FALLTHROUGH[[clang::fallthrough]];
4681	default:
4682	/* will never occur */
4683	break;
4684	}
4685	}
4686	targetCapacity-=length;
4687	} else {
4688	uint8_t *charErrorBuffer;
4689
4690	/*
4691	* We actually do this backwards here:
4692	* In order to save an intermediate variable, we output
4693	* first to the overflow buffer what does not fit into the
4694	* regular target.
4695	*/
4696	/* we know that 1<=targetCapacity<length<=4 */
4697	length-=targetCapacity;
4698	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
4699	switch(length) {
4700	/* each branch falls through to the next one */
4701	case 3:
4702	*charErrorBuffer++=(uint8_t)(value>>16);
4703	U_FALLTHROUGH[[clang::fallthrough]];
4704	case 2:
4705	*charErrorBuffer++=(uint8_t)(value>>8);
4706	U_FALLTHROUGH[[clang::fallthrough]];
4707	case 1:
4708	*charErrorBuffer=(uint8_t)value;
4709	U_FALLTHROUGH[[clang::fallthrough]];
4710	default:
4711	/* will never occur */
4712	break;
4713	}
4714	cnv->charErrorBufferLength=(int8_t)length;
4715
4716	/* now output what fits into the regular target */
4717	value>>=8length; / length was reduced by targetCapacity */
4718	switch(targetCapacity) {
4719	/* each branch falls through to the next one */
4720	case 3:
4721	*target++=(uint8_t)(value>>16);
4722	if(offsets!=NULL__null) {
4723	*offsets++=sourceIndex;
4724	}
4725	U_FALLTHROUGH[[clang::fallthrough]];
4726	case 2:
4727	*target++=(uint8_t)(value>>8);
4728	if(offsets!=NULL__null) {
4729	*offsets++=sourceIndex;
4730	}
4731	U_FALLTHROUGH[[clang::fallthrough]];
4732	case 1:
4733	*target++=(uint8_t)value;
4734	if(offsets!=NULL__null) {
4735	*offsets++=sourceIndex;
4736	}
4737	U_FALLTHROUGH[[clang::fallthrough]];
4738	default:
4739	/* will never occur */
4740	break;
4741	}
4742
4743	/* target overflow */
4744	targetCapacity=0;
4745	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
4746	c=0;
4747	break;
4748	}
4749
4750	/* normal end of conversion: prepare for a new character */
4751	c=0;
4752	if(offsets!=NULL__null) {
4753	prevSourceIndex=sourceIndex;
4754	sourceIndex=nextSourceIndex;
4755	}
4756	continue;
4757	} else {
4758	/* target is full */
4759	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
4760	break;
4761	}
4762	}
4763
4764	/*
4765	* the end of the input stream and detection of truncated input
4766	* are handled by the framework, but for EBCDIC_STATEFUL conversion
4767	* we need to emit an SI at the very end
4768	*
4769	* conditions:
4770	* successful
4771	* EBCDIC_STATEFUL in DBCS mode
4772	* end of input and no truncated input
4773	*/
4774	if( U_SUCCESS(*pErrorCode) &&
4775	outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
4776	pArgs->flush && source>=sourceLimit && c==0
4777	) {
4778	/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
4779	if(targetCapacity>0) {
4780	*target++=(uint8_t)siBytes[0];
4781	if (siLength == 2) {
4782	if (targetCapacity<2) {
4783	cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
4784	cnv->charErrorBufferLength=1;
4785	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
4786	} else {
4787	*target++=(uint8_t)siBytes[1];
4788	}
4789	}
4790	if(offsets!=NULL__null) {
4791	/* set the last source character's index (sourceIndex points at sourceLimit now) */
4792	*offsets++=prevSourceIndex;
4793	}
4794	} else {
4795	/* target is full */
4796	cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
4797	if (siLength == 2) {
4798	cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
4799	}
4800	cnv->charErrorBufferLength=siLength;
4801	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
4802	}
4803	prevLength=1; /* we switched into SBCS */
4804	}
4805
4806	/* set the converter state back into UConverter */
4807	cnv->fromUChar32=c;
4808	cnv->fromUnicodeStatus=prevLength;
4809
4810	/* write back the updated pointers */
4811	pArgs->source=source;
4812	pArgs->target=(char *)target;
4813	pArgs->offsets=offsets;
4814	}
4815
4816	/*
4817	* This is another simple conversion function for internal use by other
4818	* conversion implementations.
4819	* It does not use the converter state nor call callbacks.
4820	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
4821	* It handles conversion extensions but not GB 18030.
4822	*
4823	* It converts one single Unicode code point into codepage bytes, encoded
4824	* as one 32-bit value. The function returns the number of bytes in *pValue:
4825	* 1..4 the number of bytes in *pValue
4826	* 0 unassigned (*pValue undefined)
4827	* -1 illegal (currently not used, *pValue undefined)
4828	*
4829	* *pValue will contain the resulting bytes with the last byte in bits 7..0,
4830	* the second to last byte in bits 15..8, etc.
4831	* Currently, the function assumes but does not check that 0<=c<=0x10ffff.
4832	*/
4833	U_CFUNCextern "C" int32_t
4834	ucnv_MBCSFromUChar32ucnv_MBCSFromUChar32_71(UConverterSharedData *sharedData,
4835	UChar32 c, uint32_t *pValue,
4836	UBool useFallback) {
4837	const int32_t *cx;
4838	const uint16_t *table;
4839	#if 0
4840	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
4841	const uint8_t *p;
4842	#endif
4843	uint32_t stage2Entry;
4844	uint32_t value;
4845	int32_t length;
4846
4847	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
4848	if(c<=0xffff \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
4849	table=sharedData->mbcs.fromUnicodeTable;
4850
4851	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
4852	if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
4853	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t )sharedData->mbcs.fromUnicodeBytes, c)((uint16_t )sharedData->mbcs.fromUnicodeBytes)[ (table)[ ( table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)& 0xf) ];
4854	/* is this code point assigned, or do we use fallbacks? */
4855	if(useFallback ? value>=0x800 : value>=0xc00) {
4856	*pValue=value&0xff;
4857	return 1;
4858	}
4859	} else /* outputType!=MBCS_OUTPUT_1 */ {
4860	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
4861
4862	/* get the bytes and the length for the output */
4863	switch(sharedData->mbcs.outputType) {
4864	case MBCS_OUTPUT_2:
4865	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((uint16_t )(sharedData->mbcs.fromUnicodeBytes))[16(uint32_t )(uint16_t)(stage2Entry)+((c)&0xf)];
4866	if(value<=0xff) {
4867	length=1;
4868	} else {
4869	length=2;
4870	}
4871	break;
4872	#if 0
4873	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
4874	case MBCS_OUTPUT_DBCS_ONLY:
4875	/* table with single-byte results, but only DBCS mappings used */
4876	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((uint16_t )(sharedData->mbcs.fromUnicodeBytes))[16(uint32_t )(uint16_t)(stage2Entry)+((c)&0xf)];
4877	if(value<=0xff) {
4878	/* no mapping or SBCS result, not taken for DBCS-only */
4879	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
4880	length=0;
4881	} else {
4882	length=2;
4883	}
4884	break;
4885	case MBCS_OUTPUT_3:
4886	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((sharedData->mbcs.fromUnicodeBytes)+(16(uint32_t)(uint16_t )(stage2Entry)+((c)&0xf))3);
4887	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4888	if(value<=0xff) {
4889	length=1;
4890	} else if(value<=0xffff) {
4891	length=2;
4892	} else {
4893	length=3;
4894	}
4895	break;
4896	case MBCS_OUTPUT_4:
4897	value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((uint32_t )(sharedData->mbcs.fromUnicodeBytes))[16(uint32_t )(uint16_t)(stage2Entry)+((c)&0xf)];
4898	if(value<=0xff) {
4899	length=1;
4900	} else if(value<=0xffff) {
4901	length=2;
4902	} else if(value<=0xffffff) {
4903	length=3;
4904	} else {
4905	length=4;
4906	}
4907	break;
4908	case MBCS_OUTPUT_3_EUC:
4909	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((uint16_t )(sharedData->mbcs.fromUnicodeBytes))[16(uint32_t )(uint16_t)(stage2Entry)+((c)&0xf)];
4910	/* EUC 16-bit fixed-length representation */
4911	if(value<=0xff) {
4912	length=1;
4913	} else if((value&0x8000)==0) {
4914	value\|=0x8e8000;
4915	length=3;
4916	} else if((value&0x80)==0) {
4917	value\|=0x8f0080;
4918	length=3;
4919	} else {
4920	length=2;
4921	}
4922	break;
4923	case MBCS_OUTPUT_4_EUC:
4924	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((sharedData->mbcs.fromUnicodeBytes)+(16(uint32_t)(uint16_t )(stage2Entry)+((c)&0xf))3);
4925	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
4926	/* EUC 16-bit fixed-length representation applied to the first two bytes */
4927	if(value<=0xff) {
4928	length=1;
4929	} else if(value<=0xffff) {
4930	length=2;
4931	} else if((value&0x800000)==0) {
4932	value\|=0x8e800000;
4933	length=4;
4934	} else if((value&0x8000)==0) {
4935	value\|=0x8f008000;
4936	length=4;
4937	} else {
4938	length=3;
4939	}
4940	break;
4941	#endif
4942	default:
4943	/* must not occur */
4944	return -1;
4945	}
4946
4947	/* is this code point assigned, or do we use fallbacks? */
4948	if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf) ) )) !=0) \|\|
4949	(FROM_U_USE_FALLBACK(useFallback, c)((useFallback) \|\| ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t )((c)-0xf0000)<0x20000)) && value!=0)
4950	) {
4951	/*
4952	* We allow a 0 byte output if the "assigned" bit is set for this entry.
4953	* There is no way with this data structure for fallback output
4954	* to be a zero byte.
4955	*/
4956	/* assigned */
4957	*pValue=value;
4958	return length;
4959	}
4960	}
4961	}
4962
4963	cx=sharedData->mbcs.extIndexes;
4964	if(cx!=NULL__null) {
4965	length=ucnv_extSimpleMatchFromUucnv_extSimpleMatchFromU_71(cx, c, pValue, useFallback);
4966	return length>=0 ? length : -length; /* return abs(length); */
4967	}
4968
4969	/* unassigned */
4970	return 0;
4971	}
4972
4973
4974	#if 0
4975	/*
4976	* This function has been moved to ucnv2022.c for inlining.
4977	* This implementation is here only for documentation purposes
4978	*/
4979
4980	/**
4981	* This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
4982	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
4983	* It does not handle conversion extensions (_extFromU()).
4984	*
4985	* It returns the codepage byte for the code point, or -1 if it is unassigned.
4986	*/
4987	U_CFUNCextern "C" int32_t
4988	ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
4989	UChar32 c,
4990	UBool useFallback) {
4991	const uint16_t *table;
4992	int32_t value;
4993
4994	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
4995	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
4996	return -1;
4997	}
4998
4999	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
5000	table=sharedData->mbcs.fromUnicodeTable;
5001
5002	/* get the byte for the output */
5003	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t )sharedData->mbcs.fromUnicodeBytes, c)((uint16_t )sharedData->mbcs.fromUnicodeBytes)[ (table)[ ( table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)& 0xf) ];
5004	/* is this code point assigned, or do we use fallbacks? */
5005	if(useFallback ? value>=0x800 : value>=0xc00) {
5006	return value&0xff;
5007	} else {
5008	return -1;
5009	}
5010	}
5011	#endif
5012
5013	/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
5014
5015	/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
5016	static const UChar32
5017	utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
5018
5019	static void U_CALLCONV
5020	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
5021	UConverterToUnicodeArgs *pToUArgs,
5022	UErrorCode *pErrorCode) {
5023	UConverter utf8, cnv;
5024	const uint8_t source, sourceLimit;
5025	uint8_t *target;
5026	int32_t targetCapacity;
5027
5028	const uint16_t table, sbcsIndex;
5029	const uint16_t *results;
5030
5031	int8_t oldToULength, toULength, toULimit;
5032
5033	UChar32 c;
5034	uint8_t b, t1, t2;
5035
5036	uint32_t asciiRoundtrips;
5037	uint16_t value, minValue = 0;
5038	UBool hasSupplementary;
5039
5040	/* set up the local pointers */
5041	utf8=pToUArgs->converter;
5042	cnv=pFromUArgs->converter;
5043	source=(uint8_t *)pToUArgs->source;
5044	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
5045	target=(uint8_t *)pFromUArgs->target;
5046	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
5047
5048	table=cnv->sharedData->mbcs.fromUnicodeTable;
5049	sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
5050	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
5051	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
5052	} else {
5053	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
5054	}
5055	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
5056
5057	if(cnv->useFallback) {
5058	/* use all roundtrip and fallback results */
5059	minValue=0x800;
5060	} else {
5061	/* use only roundtrips and fallbacks from private-use characters */
5062	minValue=0xc00;
5063	}
5064	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1);
5065
5066	/* get the converter state from the UTF-8 UConverter */
5067	if(utf8->toULength > 0) {
5068	toULength=oldToULength=utf8->toULength;
5069	toULimit=(int8_t)utf8->mode;
5070	c=(UChar32)utf8->toUnicodeStatus;
5071	} else {
5072	toULength=oldToULength=toULimit=0;
5073	c = 0;
5074	}
5075
5076	// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
5077	// If the buffer ends with a truncated 2- or 3-byte sequence,
5078	// then we reduce the sourceLimit to before that,
5079	// and collect the remaining bytes after the conversion loop.
5080	{
5081	// Do not go back into the bytes that will be read for finishing a partial
5082	// sequence from the previous buffer.
5083	int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
5084	if(length>0) {
5085	uint8_t b1=*(sourceLimit-1);
5086	if(U8_IS_SINGLE(b1)(((b1)&0x80)==0)) {
5087	// common ASCII character
5088	} else if(U8_IS_TRAIL(b1)((int8_t)(b1)<-0x40) && length>=2) {
5089	uint8_t b2=*(sourceLimit-2);
5090	if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)("\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [(b2)&0xf]&(1<<((uint8_t)(b1)>>5)))) {
5091	// truncated 3-byte sequence
5092	sourceLimit-=2;
5093	}
5094	} else if(0xc2<=b1 && b1<0xf0) {
5095	// truncated 2- or 3-byte sequence
5096	--sourceLimit;
5097	}
5098	}
5099	}
5100
5101	if(c!=0 && targetCapacity>0) {
5102	utf8->toUnicodeStatus=0;
5103	utf8->toULength=0;
5104	goto moreBytes;
5105	/*
5106	* Note: We could avoid the goto by duplicating some of the moreBytes
5107	* code, but only up to the point of collecting a complete UTF-8
5108	* sequence; then recurse for the toUBytes[toULength]
5109	* and then continue with normal conversion.
5110	*
5111	* If so, move this code to just after initializing the minimum
5112	* set of local variables for reading the UTF-8 input
5113	* (utf8, source, target, limits but not cnv, table, minValue, etc.).
5114	*
5115	* Potential advantages:
5116	* - avoid the goto
5117	* - oldToULength could become a local variable in just those code blocks
5118	* that deal with buffer boundaries
5119	* - possibly faster if the goto prevents some compiler optimizations
5120	* (this would need measuring to confirm)
5121	* Disadvantage:
5122	* - code duplication
5123	*/
5124	}
5125
5126	/* conversion loop */
5127	while(source<sourceLimit) {
5128	if(targetCapacity>0) {
5129	b=*source++;
5130	if(U8_IS_SINGLE(b)(((b)&0x80)==0)) {
5131	/* convert ASCII */
5132	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)(((asciiRoundtrips) & (1<<((b)>>2)))!=0)) {
5133	*target++=(uint8_t)b;
5134	--targetCapacity;
5135	continue;
5136	} else {
5137	c=b;
5138	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c)(results)[ (sbcsIndex)[0] +(c) ];
5139	}
5140	} else {
5141	if(b<0xe0) {
5142	if( /* handle U+0080..U+07FF inline */
5143	b>=0xc2 &&
5144	(t1=(uint8_t)(*source-0x80)) <= 0x3f
5145	) {
5146	c=b&0x1f;
5147	++source;
5148	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1)(results)[ (sbcsIndex)[c] +(t1) ];
5149	if(value>=minValue) {
5150	*target++=(uint8_t)value;
5151	--targetCapacity;
5152	continue;
5153	} else {
5154	c=(c<<6)\|t1;
5155	}
5156	} else {
5157	c=-1;
5158	}
5159	} else if(b==0xe0) {
5160	if( /* handle U+0800..U+0FFF inline */
5161	(t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
5162	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
5163	) {
5164	c=t1;
5165	source+=2;
5166	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2)(results)[ (sbcsIndex)[c] +(t2) ];
5167	if(value>=minValue) {
5168	*target++=(uint8_t)value;
5169	--targetCapacity;
5170	continue;
5171	} else {
5172	c=(c<<6)\|t2;
5173	}
5174	} else {
5175	c=-1;
5176	}
5177	} else {
5178	c=-1;
5179	}
5180
5181	if(c<0) {
5182	/* handle "complicated" and error cases, and continuing partial characters */
5183	oldToULength=0;
5184	toULength=1;
5185	toULimit=U8_COUNT_BYTES_NON_ASCII(b)(((uint8_t)((b)-0xc2)<=0x32) ? ((uint8_t)(b)>=0xe0)+((uint8_t )(b)>=0xf0)+2 : 0);
5186	c=b;
5187	moreBytes:
5188	while(toULength<toULimit) {
5189	/*
5190	* The sourceLimit may have been adjusted before the conversion loop
5191	* to stop before a truncated sequence.
5192	* Here we need to use the real limit in case we have two truncated
5193	* sequences at the end.
5194	* See ticket #7492.
5195	*/
5196	if(source<(uint8_t *)pToUArgs->sourceLimit) {
5197	b=*source;
5198	if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
5199	++source;
5200	++toULength;
5201	c=(c<<6)+b;
5202	} else {
5203	break; /* sequence too short, stop with toULength<toULimit */
5204	}
5205	} else {
5206	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
5207	source-=(toULength-oldToULength);
5208	while(oldToULength<toULength) {
5209	utf8->toUBytes[oldToULength++]=*source++;
5210	}
5211	utf8->toUnicodeStatus=c;
5212	utf8->toULength=toULength;
5213	utf8->mode=toULimit;
5214	pToUArgs->source=(char *)source;
5215	pFromUArgs->target=(char *)target;
5216	return;
5217	}
5218	}
5219
5220	if(toULength==toULimit) {
5221	c-=utf8_offsets[toULength];
5222	if(toULength<=3) { /* BMP */
5223	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
5224	} else {
5225	/* supplementary code point */
5226	if(!hasSupplementary) {
5227	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
5228	value=0;
5229	} else {
5230	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c)(results)[ (table)[ (table)[(c)>>10] +(((c)>>4)& 0x3f) ] +((c)&0xf) ];
5231	}
5232	}
5233	} else {
5234	/* error handling: illegal UTF-8 byte sequence */
5235	source-=(toULength-oldToULength);
5236	while(oldToULength<toULength) {
5237	utf8->toUBytes[oldToULength++]=*source++;
5238	}
5239	utf8->toULength=toULength;
5240	pToUArgs->source=(char *)source;
5241	pFromUArgs->target=(char *)target;
5242	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
5243	return;
5244	}
5245	}
5246	}
5247
5248	if(value>=minValue) {
5249	/* output the mapping for c */
5250	*target++=(uint8_t)value;
5251	--targetCapacity;
5252	} else {
5253	/* value<minValue means c is unassigned (unmappable) */
5254	/*
5255	* Try an extension mapping.
5256	* Pass in no source because we don't have UTF-16 input.
5257	* If we have a partial match on c, we will return and revert
5258	* to UTF-8->UTF-16->charset conversion.
5259	*/
5260	static const UChar nul=0;
5261	const UChar *noSource=&nul;
5262	c=_extFromU(cnv, cnv->sharedData,
5263	c, &noSource, noSource,
5264	&target, target+targetCapacity,
5265	NULL__null, -1,
5266	pFromUArgs->flush,
5267	pErrorCode);
5268
5269	if(U_FAILURE(*pErrorCode)) {
5270	/* not mappable or buffer overflow */
5271	cnv->fromUChar32=c;
5272	break;
5273	} else if(cnv->preFromUFirstCP>=0) {
5274	/*
5275	* Partial match, return and revert to pivoting.
5276	* In normal from-UTF-16 conversion, we would just continue
5277	* but then exit the loop because the extension match would
5278	* have consumed the source.
5279	*/
5280	*pErrorCode=U_USING_DEFAULT_WARNING;
5281	break;
5282	} else {
5283	/* a mapping was written to the target, continue */
5284
5285	/* recalculate the targetCapacity after an extension mapping */
5286	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
5287	}
5288	}
5289	} else {
5290	/* target is full */
5291	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
5292	break;
5293	}
5294	}
5295
5296	/*
5297	* The sourceLimit may have been adjusted before the conversion loop
5298	* to stop before a truncated sequence.
5299	* If so, then collect the truncated sequence now.
5300	*/
5301	if(U_SUCCESS(*pErrorCode) &&
5302	cnv->preFromUFirstCP<0 &&
5303	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
5304	c=utf8->toUBytes[0]=b=*source++;
5305	toULength=1;
5306	toULimit=U8_COUNT_BYTES(b)((((b)&0x80)==0) ? 1 : (((uint8_t)((b)-0xc2)<=0x32) ? ( (uint8_t)(b)>=0xe0)+((uint8_t)(b)>=0xf0)+2 : 0));
5307	while(source<sourceLimit) {
5308	utf8->toUBytes[toULength++]=b=*source++;
5309	c=(c<<6)+b;
5310	}
5311	utf8->toUnicodeStatus=c;
5312	utf8->toULength=toULength;
5313	utf8->mode=toULimit;
5314	}
5315
5316	/* write back the updated pointers */
5317	pToUArgs->source=(char *)source;
5318	pFromUArgs->target=(char *)target;
5319	}
5320
5321	static void U_CALLCONV
5322	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
5323	UConverterToUnicodeArgs *pToUArgs,
5324	UErrorCode *pErrorCode) {
5325	UConverter utf8, cnv;
5326	const uint8_t source, sourceLimit;
5327	uint8_t *target;
5328	int32_t targetCapacity;
5329
5330	const uint16_t table, mbcsIndex;
5331	const uint16_t *results;
5332
5333	int8_t oldToULength, toULength, toULimit;
5334
5335	UChar32 c;
5336	uint8_t b, t1, t2;
5337
5338	uint32_t stage2Entry;
5339	uint32_t asciiRoundtrips;
5340	uint16_t value = 0;
5341	UBool hasSupplementary;
5342
5343	/* set up the local pointers */
5344	utf8=pToUArgs->converter;
5345	cnv=pFromUArgs->converter;
5346	source=(uint8_t *)pToUArgs->source;
5347	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
5348	target=(uint8_t *)pFromUArgs->target;
5349	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
5350
5351	table=cnv->sharedData->mbcs.fromUnicodeTable;
5352	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
5353	if((cnv->options&UCNV_OPTION_SWAP_LFNL0x10)!=0) {
5354	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
5355	} else {
5356	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
5357	}
5358	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
5359
5360	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1);
5361
5362	/* get the converter state from the UTF-8 UConverter */
5363	if(utf8->toULength > 0) {
5364	toULength=oldToULength=utf8->toULength;
5365	toULimit=(int8_t)utf8->mode;
5366	c=(UChar32)utf8->toUnicodeStatus;
5367	} else {
5368	toULength=oldToULength=toULimit=0;
5369	c = 0;
5370	}
5371
5372	// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
5373	// If the buffer ends with a truncated 2- or 3-byte sequence,
5374	// then we reduce the sourceLimit to before that,
5375	// and collect the remaining bytes after the conversion loop.
5376	{
5377	// Do not go back into the bytes that will be read for finishing a partial
5378	// sequence from the previous buffer.
5379	int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
5380	if(length>0) {
5381	uint8_t b1=*(sourceLimit-1);
5382	if(U8_IS_SINGLE(b1)(((b1)&0x80)==0)) {
5383	// common ASCII character
5384	} else if(U8_IS_TRAIL(b1)((int8_t)(b1)<-0x40) && length>=2) {
5385	uint8_t b2=*(sourceLimit-2);
5386	if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)("\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [(b2)&0xf]&(1<<((uint8_t)(b1)>>5)))) {
5387	// truncated 3-byte sequence
5388	sourceLimit-=2;
5389	}
5390	} else if(0xc2<=b1 && b1<0xf0) {
5391	// truncated 2- or 3-byte sequence
5392	--sourceLimit;
5393	}
5394	}
5395	}
5396
5397	if(c!=0 && targetCapacity>0) {
5398	utf8->toUnicodeStatus=0;
5399	utf8->toULength=0;
5400	goto moreBytes;
5401	/* See note in ucnv_SBCSFromUTF8() about this goto. */
5402	}
5403
5404	/* conversion loop */
5405	while(source<sourceLimit) {
5406	if(targetCapacity>0) {
5407	b=*source++;
5408	if(U8_IS_SINGLE(b)(((b)&0x80)==0)) {
5409	/* convert ASCII */
5410	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)(((asciiRoundtrips) & (1<<((b)>>2)))!=0)) {
5411	*target++=b;
5412	--targetCapacity;
5413	continue;
5414	} else {
5415	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b)(results)[ (mbcsIndex)[0] +(b) ];
5416	if(value==0) {
5417	c=b;
5418	goto unassigned;
5419	}
5420	}
5421	} else {
5422	if(b>=0xe0) {
5423	if( /* handle U+0800..U+D7FF inline */
5424	b<=0xed && // do not assume maxFastUChar>0xd7ff
5425	U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0])("\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [(b)&0xf]&(1<<((uint8_t)(t1=source[0])>>5 ))) &&
5426	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
5427	) {
5428	c=((b&0xf)<<6)\|(t1&0x3f);
5429	source+=2;
5430	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2)(results)[ (mbcsIndex)[c] +(t2) ];
5431	if(value==0) {
5432	c=(c<<6)\|t2;
5433	goto unassigned;
5434	}
5435	} else {
5436	c=-1;
5437	}
5438	} else {
5439	if( /* handle U+0080..U+07FF inline */
5440	b>=0xc2 &&
5441	(t1=(uint8_t)(*source-0x80)) <= 0x3f
5442	) {
5443	c=b&0x1f;
5444	++source;
5445	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1)(results)[ (mbcsIndex)[c] +(t1) ];
5446	if(value==0) {
5447	c=(c<<6)\|t1;
5448	goto unassigned;
5449	}
5450	} else {
5451	c=-1;
5452	}
5453	}
5454
5455	if(c<0) {
5456	/* handle "complicated" and error cases, and continuing partial characters */
5457	oldToULength=0;
5458	toULength=1;
5459	toULimit=U8_COUNT_BYTES_NON_ASCII(b)(((uint8_t)((b)-0xc2)<=0x32) ? ((uint8_t)(b)>=0xe0)+((uint8_t )(b)>=0xf0)+2 : 0);
5460	c=b;
5461	moreBytes:
5462	while(toULength<toULimit) {
5463	/*
5464	* The sourceLimit may have been adjusted before the conversion loop
5465	* to stop before a truncated sequence.
5466	* Here we need to use the real limit in case we have two truncated
5467	* sequences at the end.
5468	* See ticket #7492.
5469	*/
5470	if(source<(uint8_t *)pToUArgs->sourceLimit) {
5471	b=*source;
5472	if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
5473	++source;
5474	++toULength;
5475	c=(c<<6)+b;
5476	} else {
5477	break; /* sequence too short, stop with toULength<toULimit */
5478	}
5479	} else {
5480	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
5481	source-=(toULength-oldToULength);
5482	while(oldToULength<toULength) {
5483	utf8->toUBytes[oldToULength++]=*source++;
5484	}
5485	utf8->toUnicodeStatus=c;
5486	utf8->toULength=toULength;
5487	utf8->mode=toULimit;
5488	pToUArgs->source=(char *)source;
5489	pFromUArgs->target=(char *)target;
5490	return;
5491	}
5492	}
5493
5494	if(toULength==toULimit) {
5495	c-=utf8_offsets[toULength];
5496	if(toULength<=3) { /* BMP */
5497	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
5498	} else {
5499	/* supplementary code point */
5500	if(!hasSupplementary) {
5501	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
5502	stage2Entry=0;
5503	} else {
5504	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
5505	}
5506	}
5507	} else {
5508	/* error handling: illegal UTF-8 byte sequence */
5509	source-=(toULength-oldToULength);
5510	while(oldToULength<toULength) {
5511	utf8->toUBytes[oldToULength++]=*source++;
5512	}
5513	utf8->toULength=toULength;
5514	pToUArgs->source=(char *)source;
5515	pFromUArgs->target=(char *)target;
5516	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
5517	return;
5518	}
5519
5520	/* get the bytes and the length for the output */
5521	/* MBCS_OUTPUT_2 */
5522	value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c)((uint16_t )(results))[16(uint32_t)(uint16_t)(stage2Entry)+ ((c)&0xf)];
5523
5524	/* is this code point assigned, or do we use fallbacks? */
5525	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf) ) )) !=0) \|\|
5526	(UCNV_FROM_U_USE_FALLBACK(cnv, c)(((cnv)->useFallback) \|\| ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t)((c)-0xf0000)<0x20000)) && value!=0))
5527	) {
5528	goto unassigned;
5529	}
5530	}
5531	}
5532
5533	/* write the output character bytes from value and length */
5534	/* from the first if in the loop we know that targetCapacity>0 */
5535	if(value<=0xff) {
5536	/* this is easy because we know that there is enough space */
5537	*target++=(uint8_t)value;
5538	--targetCapacity;
5539	} else /* length==2 */ {
5540	*target++=(uint8_t)(value>>8);
5541	if(2<=targetCapacity) {
5542	*target++=(uint8_t)value;
5543	targetCapacity-=2;
5544	} else {
5545	cnv->charErrorBuffer[0]=(char)value;
5546	cnv->charErrorBufferLength=1;
5547
5548	/* target overflow */
5549	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
5550	break;
5551	}
5552	}
5553	continue;
5554
5555	unassigned:
5556	{
5557	/*
5558	* Try an extension mapping.
5559	* Pass in no source because we don't have UTF-16 input.
5560	* If we have a partial match on c, we will return and revert
5561	* to UTF-8->UTF-16->charset conversion.
5562	*/
5563	static const UChar nul=0;
5564	const UChar *noSource=&nul;
5565	c=_extFromU(cnv, cnv->sharedData,
5566	c, &noSource, noSource,
5567	&target, target+targetCapacity,
5568	NULL__null, -1,
5569	pFromUArgs->flush,
5570	pErrorCode);
5571
5572	if(U_FAILURE(*pErrorCode)) {
5573	/* not mappable or buffer overflow */
5574	cnv->fromUChar32=c;
5575	break;
5576	} else if(cnv->preFromUFirstCP>=0) {
5577	/*
5578	* Partial match, return and revert to pivoting.
5579	* In normal from-UTF-16 conversion, we would just continue
5580	* but then exit the loop because the extension match would
5581	* have consumed the source.
5582	*/
5583	*pErrorCode=U_USING_DEFAULT_WARNING;
5584	break;
5585	} else {
5586	/* a mapping was written to the target, continue */
5587
5588	/* recalculate the targetCapacity after an extension mapping */
5589	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
5590	continue;
5591	}
5592	}
5593	} else {
5594	/* target is full */
5595	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
5596	break;
5597	}
5598	}
5599
5600	/*
5601	* The sourceLimit may have been adjusted before the conversion loop
5602	* to stop before a truncated sequence.
5603	* If so, then collect the truncated sequence now.
5604	*/
5605	if(U_SUCCESS(*pErrorCode) &&
5606	cnv->preFromUFirstCP<0 &&
5607	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
5608	c=utf8->toUBytes[0]=b=*source++;
5609	toULength=1;
5610	toULimit=U8_COUNT_BYTES(b)((((b)&0x80)==0) ? 1 : (((uint8_t)((b)-0xc2)<=0x32) ? ( (uint8_t)(b)>=0xe0)+((uint8_t)(b)>=0xf0)+2 : 0));
5611	while(source<sourceLimit) {
5612	utf8->toUBytes[toULength++]=b=*source++;
5613	c=(c<<6)+b;
5614	}
5615	utf8->toUnicodeStatus=c;
5616	utf8->toULength=toULength;
5617	utf8->mode=toULimit;
5618	}
5619
5620	/* write back the updated pointers */
5621	pToUArgs->source=(char *)source;
5622	pFromUArgs->target=(char *)target;
5623	}
5624
5625	/* miscellaneous ------------------------------------------------------------ */
5626
5627	static void U_CALLCONV
5628	ucnv_MBCSGetStarters(const UConverter* cnv,
5629	UBool starters[256],
5630	UErrorCode *) {
5631	const int32_t *state0;
5632	int i;
5633
5634	state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
5635	for(i=0; i<256; ++i) {
5636	/* all bytes that cause a state transition from state 0 are lead bytes */
5637	starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i])((state0[i])>=0);
5638	}
5639	}
5640
5641	/*
5642	* This is an internal function that allows other converter implementations
5643	* to check whether a byte is a lead byte.
5644	*/
5645	U_CFUNCextern "C" UBool
5646	ucnv_MBCSIsLeadByteucnv_MBCSIsLeadByte_71(UConverterSharedData *sharedData, char byte) {
5647	return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte])((sharedData->mbcs.stateTable[0][(uint8_t)byte])>=0);
5648	}
5649
5650	static void U_CALLCONV
5651	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
5652	int32_t offsetIndex,
5653	UErrorCode *pErrorCode) {
5654	UConverter *cnv=pArgs->converter;
5655	char p, subchar;
5656	char buffer[4];
5657	int32_t length;
5658
5659	/* first, select between subChar and subChar1 */
5660	if( cnv->subChar1!=0 &&
5661	(cnv->sharedData->mbcs.extIndexes!=NULL__null ?
5662	cnv->useSubChar1 :
5663	(cnv->invalidUCharBuffer[0]<=0xff))
5664	) {
5665	/* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
5666	subchar=(char *)&cnv->subChar1;
5667	length=1;
5668	} else {
5669	/* select subChar in all other cases */
5670	subchar=(char *)cnv->subChars;
5671	length=cnv->subCharLen;
5672	}
5673
5674	/* reset the selector for the next code point */
5675	cnv->useSubChar1=FALSE0;
5676
5677	if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
5678	p=buffer;
5679
5680	/* fromUnicodeStatus contains prevLength */
5681	switch(length) {
5682	case 1:
5683	if(cnv->fromUnicodeStatus==2) {
5684	/* DBCS mode and SBCS sub char: change to SBCS */
5685	cnv->fromUnicodeStatus=1;
5686	*p++=UCNV_SI0x0F;
5687	}
5688	*p++=subchar[0];
5689	break;
5690	case 2:
5691	if(cnv->fromUnicodeStatus<=1) {
5692	/* SBCS mode and DBCS sub char: change to DBCS */
5693	cnv->fromUnicodeStatus=2;
5694	*p++=UCNV_SO0x0E;
5695	}
5696	*p++=subchar[0];
5697	*p++=subchar[1];
5698	break;
5699	default:
5700	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
5701	return;
5702	}
5703	subchar=buffer;
5704	length=(int32_t)(p-buffer);
5705	}
5706
5707	ucnv_cbFromUWriteBytesucnv_cbFromUWriteBytes_71(pArgs, subchar, length, offsetIndex, pErrorCode);
5708	}
5709
5710	U_CFUNCextern "C" UConverterType
5711	ucnv_MBCSGetTypeucnv_MBCSGetType_71(const UConverter* converter) {
5712	/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
5713	if(converter->sharedData->mbcs.countStates==1) {
5714	return (UConverterType)UCNV_SBCS;
5715	} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
5716	return (UConverterType)UCNV_EBCDIC_STATEFUL;
5717	} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
5718	return (UConverterType)UCNV_DBCS;
5719	}
5720	return (UConverterType)UCNV_MBCS;
5721	}
5722
5723	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */