Bug Summary

File:out/../deps/icu-small/source/common/loclikelysubtags.cpp
Warning:line 297, column 5
Value stored to 'result' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name loclikelysubtags.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/loclikelysubtags.cpp
1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// loclikelysubtags.cpp
5// created: 2019may08 Markus W. Scherer
6
7#include <utility>
8#include "unicode/utypes.h"
9#include "unicode/bytestrie.h"
10#include "unicode/localpointer.h"
11#include "unicode/locid.h"
12#include "unicode/uobject.h"
13#include "unicode/ures.h"
14#include "charstr.h"
15#include "cstring.h"
16#include "loclikelysubtags.h"
17#include "lsr.h"
18#include "uassert.h"
19#include "ucln_cmn.h"
20#include "uhash.h"
21#include "uinvchar.h"
22#include "umutex.h"
23#include "uniquecharstr.h"
24#include "uresdata.h"
25#include "uresimp.h"
26
27U_NAMESPACE_BEGINnamespace icu_71 {
28
29namespace {
30
31constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
32constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
33constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
34
35} // namespace
36
37LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
38 distanceTrieBytes(data.distanceTrieBytes),
39 regionToPartitions(data.regionToPartitions),
40 partitions(data.partitions),
41 paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
42 distances(data.distances) {
43 data.partitions = nullptr;
44 data.paradigms = nullptr;
45}
46
47LocaleDistanceData::~LocaleDistanceData() {
48 uprv_freeuprv_free_71(partitions);
49 delete[] paradigms;
50}
51
52// TODO(ICU-20777): Rename to just LikelySubtagsData.
53struct XLikelySubtagsData {
54 UResourceBundle *langInfoBundle = nullptr;
55 UniqueCharStrings strings;
56 CharStringMap languageAliases;
57 CharStringMap regionAliases;
58 const uint8_t *trieBytes = nullptr;
59 LSR *lsrs = nullptr;
60 int32_t lsrsLength = 0;
61
62 LocaleDistanceData distanceData;
63
64 XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
65
66 ~XLikelySubtagsData() {
67 ures_closeures_close_71(langInfoBundle);
68 delete[] lsrs;
69 }
70
71 void load(UErrorCode &errorCode) {
72 langInfoBundle = ures_openDirectures_openDirect_71(nullptr, "langInfo", &errorCode);
73 if (U_FAILURE(errorCode)) { return; }
74 StackUResourceBundle stackTempBundle;
75 ResourceDataValue value;
76 ures_getValueWithFallbackures_getValueWithFallback_71(langInfoBundle, "likely", stackTempBundle.getAlias(),
77 value, errorCode);
78 ResourceTable likelyTable = value.getTable(errorCode);
79 if (U_FAILURE(errorCode)) { return; }
80
81 // Read all strings in the resource bundle and convert them to invariant char *.
82 LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
83 int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
84 if (!readStrings(likelyTable, "languageAliases", value,
85 languageIndexes, languagesLength, errorCode) ||
86 !readStrings(likelyTable, "regionAliases", value,
87 regionIndexes, regionsLength, errorCode) ||
88 !readStrings(likelyTable, "lsrs", value,
89 lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
90 return;
91 }
92 if ((languagesLength & 1) != 0 ||
93 (regionsLength & 1) != 0 ||
94 (lsrSubtagsLength % 3) != 0) {
95 errorCode = U_INVALID_FORMAT_ERROR;
96 return;
97 }
98 if (lsrSubtagsLength == 0) {
99 errorCode = U_MISSING_RESOURCE_ERROR;
100 return;
101 }
102
103 if (!likelyTable.findValue("trie", value)) {
104 errorCode = U_MISSING_RESOURCE_ERROR;
105 return;
106 }
107 int32_t length;
108 trieBytes = value.getBinary(length, errorCode);
109 if (U_FAILURE(errorCode)) { return; }
110
111 // Also read distance/matcher data if available,
112 // to open & keep only one resource bundle pointer
113 // and to use one single UniqueCharStrings.
114 UErrorCode matchErrorCode = U_ZERO_ERROR;
115 ures_getValueWithFallbackures_getValueWithFallback_71(langInfoBundle, "match", stackTempBundle.getAlias(),
116 value, matchErrorCode);
117 LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
118 int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
119 if (U_SUCCESS(matchErrorCode)) {
120 ResourceTable matchTable = value.getTable(errorCode);
121 if (U_FAILURE(errorCode)) { return; }
122
123 if (matchTable.findValue("trie", value)) {
124 distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
125 if (U_FAILURE(errorCode)) { return; }
126 }
127
128 if (matchTable.findValue("regionToPartitions", value)) {
129 distanceData.regionToPartitions = value.getBinary(length, errorCode);
130 if (U_FAILURE(errorCode)) { return; }
131 if (length < LSR::REGION_INDEX_LIMIT) {
132 errorCode = U_INVALID_FORMAT_ERROR;
133 return;
134 }
135 }
136
137 if (!readStrings(matchTable, "partitions", value,
138 partitionIndexes, partitionsLength, errorCode) ||
139 !readStrings(matchTable, "paradigms", value,
140 paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
141 return;
142 }
143 if ((paradigmSubtagsLength % 3) != 0) {
144 errorCode = U_INVALID_FORMAT_ERROR;
145 return;
146 }
147
148 if (matchTable.findValue("distances", value)) {
149 distanceData.distances = value.getIntVector(length, errorCode);
150 if (U_FAILURE(errorCode)) { return; }
151 if (length < 4) { // LocaleDistance IX_LIMIT
152 errorCode = U_INVALID_FORMAT_ERROR;
153 return;
154 }
155 }
156 } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
157 // ok for likely subtags
158 } else { // error other than missing resource
159 errorCode = matchErrorCode;
160 return;
161 }
162
163 // Fetch & store invariant-character versions of strings
164 // only after we have collected and de-duplicated all of them.
165 strings.freeze();
166
167 languageAliases = CharStringMap(languagesLength / 2, errorCode);
168 for (int32_t i = 0; i < languagesLength; i += 2) {
169 languageAliases.put(strings.get(languageIndexes[i]),
170 strings.get(languageIndexes[i + 1]), errorCode);
171 }
172
173 regionAliases = CharStringMap(regionsLength / 2, errorCode);
174 for (int32_t i = 0; i < regionsLength; i += 2) {
175 regionAliases.put(strings.get(regionIndexes[i]),
176 strings.get(regionIndexes[i + 1]), errorCode);
177 }
178 if (U_FAILURE(errorCode)) { return; }
179
180 lsrsLength = lsrSubtagsLength / 3;
181 lsrs = new LSR[lsrsLength];
182 if (lsrs == nullptr) {
183 errorCode = U_MEMORY_ALLOCATION_ERROR;
184 return;
185 }
186 for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
187 lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
188 strings.get(lsrSubtagIndexes[i + 1]),
189 strings.get(lsrSubtagIndexes[i + 2]),
190 LSR::IMPLICIT_LSR);
191 }
192
193 if (partitionsLength > 0) {
194 distanceData.partitions = static_cast<const char **>(
195 uprv_mallocuprv_malloc_71(partitionsLength * sizeof(const char *)));
196 if (distanceData.partitions == nullptr) {
197 errorCode = U_MEMORY_ALLOCATION_ERROR;
198 return;
199 }
200 for (int32_t i = 0; i < partitionsLength; ++i) {
201 distanceData.partitions[i] = strings.get(partitionIndexes[i]);
202 }
203 }
204
205 if (paradigmSubtagsLength > 0) {
206 distanceData.paradigmsLength = paradigmSubtagsLength / 3;
207 LSR *paradigms = new LSR[distanceData.paradigmsLength];
208 if (paradigms == nullptr) {
209 errorCode = U_MEMORY_ALLOCATION_ERROR;
210 return;
211 }
212 for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
213 paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
214 strings.get(paradigmSubtagIndexes[i + 1]),
215 strings.get(paradigmSubtagIndexes[i + 2]),
216 LSR::DONT_CARE_FLAGS);
217 }
218 distanceData.paradigms = paradigms;
219 }
220 }
221
222private:
223 bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
224 LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
225 if (table.findValue(key, value)) {
226 ResourceArray stringArray = value.getArray(errorCode);
227 if (U_FAILURE(errorCode)) { return false; }
228 length = stringArray.getSize();
229 if (length == 0) { return true; }
230 int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
231 if (rawIndexes == nullptr) {
232 errorCode = U_MEMORY_ALLOCATION_ERROR;
233 return false;
234 }
235 for (int i = 0; i < length; ++i) {
236 stringArray.getValue(i, value); // returns TRUE because i < length
237 rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
238 if (U_FAILURE(errorCode)) { return false; }
239 }
240 }
241 return true;
242 }
243};
244
245namespace {
246
247XLikelySubtags *gLikelySubtags = nullptr;
248UInitOnce gInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
249
250UBool U_CALLCONV cleanup() {
251 delete gLikelySubtags;
252 gLikelySubtags = nullptr;
253 gInitOnce.reset();
254 return TRUE1;
255}
256
257} // namespace
258
259void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
260 // This function is invoked only via umtx_initOnce().
261 U_ASSERT(gLikelySubtags == nullptr)(void)0;
262 XLikelySubtagsData data(errorCode);
263 data.load(errorCode);
264 if (U_FAILURE(errorCode)) { return; }
265 gLikelySubtags = new XLikelySubtags(data);
266 if (gLikelySubtags == nullptr) {
267 errorCode = U_MEMORY_ALLOCATION_ERROR;
268 return;
269 }
270 ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
271}
272
273const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
274 if (U_FAILURE(errorCode)) { return nullptr; }
275 umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
276 return gLikelySubtags;
277}
278
279XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
280 langInfoBundle(data.langInfoBundle),
281 strings(data.strings.orphanCharStrings()),
282 languageAliases(std::move(data.languageAliases)),
283 regionAliases(std::move(data.regionAliases)),
284 trie(data.trieBytes),
285 lsrs(data.lsrs),
286#if U_DEBUG0
287 lsrsLength(data.lsrsLength),
288#endif
289 distanceData(std::move(data.distanceData)) {
290 data.langInfoBundle = nullptr;
291 data.lsrs = nullptr;
292
293 // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
294 UStringTrieResult result = trie.next(u'*');
295 U_ASSERT(USTRINGTRIE_HAS_NEXT(result))(void)0;
296 trieUndState = trie.getState64();
297 result = trie.next(u'*');
Value stored to 'result' is never read
298 U_ASSERT(USTRINGTRIE_HAS_NEXT(result))(void)0;
299 trieUndZzzzState = trie.getState64();
300 result = trie.next(u'*');
301 U_ASSERT(USTRINGTRIE_HAS_VALUE(result))(void)0;
302 defaultLsrIndex = trie.getValue();
303 trie.reset();
304
305 for (char16_t c = u'a'; c <= u'z'; ++c) {
306 result = trie.next(c);
307 if (result == USTRINGTRIE_NO_VALUE) {
308 trieFirstLetterStates[c - u'a'] = trie.getState64();
309 }
310 trie.reset();
311 }
312}
313
314XLikelySubtags::~XLikelySubtags() {
315 ures_closeures_close_71(langInfoBundle);
316 delete strings;
317 delete[] lsrs;
318}
319
320LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
321 const char *name = locale.getName();
322 if (uprv_isAtSign(name[0])((name[0])=='@') && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
323 // Private use language tag x-subtag-subtag... which CLDR changes to
324 // und-x-subtag-subtag...
325 return LSR(name, "", "", LSR::EXPLICIT_LSR);
326 }
327 return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
328 locale.getVariant(), errorCode);
329}
330
331namespace {
332
333const char *getCanonical(const CharStringMap &aliases, const char *alias) {
334 const char *canonical = aliases.get(alias);
335 return canonical == nullptr ? alias : canonical;
336}
337
338} // namespace
339
340LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
341 const char *variant, UErrorCode &errorCode) const {
342 // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
343 // They should match only themselves,
344 // not other locales with what looks like the same language and script subtags.
345 char c1;
346 if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
347 switch (c1) {
348 case 'A':
349 return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
350 LSR::EXPLICIT_LSR, errorCode);
351 case 'B':
352 return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
353 LSR::EXPLICIT_LSR, errorCode);
354 case 'C':
355 return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
356 LSR::EXPLICIT_LSR, errorCode);
357 default: // normal locale
358 break;
359 }
360 }
361
362 if (variant[0] == 'P' && variant[1] == 'S') {
363 int32_t lsrFlags = *region == 0 ?
364 LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
365 if (uprv_strcmp(variant, "PSACCENT"):: strcmp(variant, "PSACCENT") == 0) {
366 return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
367 *region == 0 ? "XA" : region, lsrFlags, errorCode);
368 } else if (uprv_strcmp(variant, "PSBIDI"):: strcmp(variant, "PSBIDI") == 0) {
369 return LSR(PSEUDO_BIDI_PREFIX, language, script,
370 *region == 0 ? "XB" : region, lsrFlags, errorCode);
371 } else if (uprv_strcmp(variant, "PSCRACK"):: strcmp(variant, "PSCRACK") == 0) {
372 return LSR(PSEUDO_CRACKED_PREFIX, language, script,
373 *region == 0 ? "XC" : region, lsrFlags, errorCode);
374 }
375 // else normal locale
376 }
377
378 language = getCanonical(languageAliases, language);
379 // (We have no script mappings.)
380 region = getCanonical(regionAliases, region);
381 return maximize(language, script, region);
382}
383
384LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
385 if (uprv_strcmp(language, "und"):: strcmp(language, "und") == 0) {
386 language = "";
387 }
388 if (uprv_strcmp(script, "Zzzz"):: strcmp(script, "Zzzz") == 0) {
389 script = "";
390 }
391 if (uprv_strcmp(region, "ZZ"):: strcmp(region, "ZZ") == 0) {
392 region = "";
393 }
394 if (*script != 0 && *region != 0 && *language != 0) {
395 return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
396 }
397
398 uint32_t retainOldMask = 0;
399 BytesTrie iter(trie);
400 uint64_t state;
401 int32_t value;
402 // Small optimization: Array lookup for first language letter.
403 int32_t c0;
404 if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
405 language[1] != 0 && // language.length() >= 2
406 (state = trieFirstLetterStates[c0]) != 0) {
407 value = trieNext(iter.resetToState64(state), language, 1);
408 } else {
409 value = trieNext(iter, language, 0);
410 }
411 if (value >= 0) {
412 if (*language != 0) {
413 retainOldMask |= 4;
414 }
415 state = iter.getState64();
416 } else {
417 retainOldMask |= 4;
418 iter.resetToState64(trieUndState); // "und" ("*")
419 state = 0;
420 }
421
422 if (value > 0) {
423 // Intermediate or final value from just language.
424 if (value == SKIP_SCRIPT) {
425 value = 0;
426 }
427 if (*script != 0) {
428 retainOldMask |= 2;
429 }
430 } else {
431 value = trieNext(iter, script, 0);
432 if (value >= 0) {
433 if (*script != 0) {
434 retainOldMask |= 2;
435 }
436 state = iter.getState64();
437 } else {
438 retainOldMask |= 2;
439 if (state == 0) {
440 iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
441 } else {
442 iter.resetToState64(state);
443 value = trieNext(iter, "", 0);
444 U_ASSERT(value >= 0)(void)0;
445 state = iter.getState64();
446 }
447 }
448 }
449
450 if (value > 0) {
451 // Final value from just language or language+script.
452 if (*region != 0) {
453 retainOldMask |= 1;
454 }
455 } else {
456 value = trieNext(iter, region, 0);
457 if (value >= 0) {
458 if (*region != 0) {
459 retainOldMask |= 1;
460 }
461 } else {
462 retainOldMask |= 1;
463 if (state == 0) {
464 value = defaultLsrIndex;
465 } else {
466 iter.resetToState64(state);
467 value = trieNext(iter, "", 0);
468 U_ASSERT(value > 0)(void)0;
469 }
470 }
471 }
472 U_ASSERT(value < lsrsLength)(void)0;
473 const LSR &result = lsrs[value];
474
475 if (*language == 0) {
476 language = "und";
477 }
478
479 if (retainOldMask == 0) {
480 // Quickly return a copy of the lookup-result LSR
481 // without new allocation of the subtags.
482 return LSR(result.language, result.script, result.region, result.flags);
483 }
484 if ((retainOldMask & 4) == 0) {
485 language = result.language;
486 }
487 if ((retainOldMask & 2) == 0) {
488 script = result.script;
489 }
490 if ((retainOldMask & 1) == 0) {
491 region = result.region;
492 }
493 // retainOldMask flags = LSR explicit-subtag flags
494 return LSR(language, script, region, retainOldMask);
495}
496
497int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
498 // If likelyInfo >= 0:
499 // likelyInfo bit 1 is set if the previous comparison with lsr
500 // was for equal language and script.
501 // Otherwise the scripts differed.
502 if (uprv_strcmp(lsr.language, other.language):: strcmp(lsr.language, other.language) != 0) {
503 return 0xfffffffc; // negative, lsr not better than other
504 }
505 if (uprv_strcmp(lsr.script, other.script):: strcmp(lsr.script, other.script) != 0) {
506 int32_t index;
507 if (likelyInfo >= 0 && (likelyInfo & 2) == 0) {
508 index = likelyInfo >> 2;
509 } else {
510 index = getLikelyIndex(lsr.language, "");
511 likelyInfo = index << 2;
512 }
513 const LSR &likely = lsrs[index];
514 if (uprv_strcmp(lsr.script, likely.script):: strcmp(lsr.script, likely.script) == 0) {
515 return likelyInfo | 1;
516 } else {
517 return likelyInfo & ~1;
518 }
519 }
520 if (uprv_strcmp(lsr.region, other.region):: strcmp(lsr.region, other.region) != 0) {
521 int32_t index;
522 if (likelyInfo >= 0 && (likelyInfo & 2) != 0) {
523 index = likelyInfo >> 2;
524 } else {
525 index = getLikelyIndex(lsr.language, lsr.region);
526 likelyInfo = (index << 2) | 2;
527 }
528 const LSR &likely = lsrs[index];
529 if (uprv_strcmp(lsr.region, likely.region):: strcmp(lsr.region, likely.region) == 0) {
530 return likelyInfo | 1;
531 } else {
532 return likelyInfo & ~1;
533 }
534 }
535 return likelyInfo & ~1; // lsr not better than other
536}
537
538// Subset of maximize().
539int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
540 if (uprv_strcmp(language, "und"):: strcmp(language, "und") == 0) {
541 language = "";
542 }
543 if (uprv_strcmp(script, "Zzzz"):: strcmp(script, "Zzzz") == 0) {
544 script = "";
545 }
546
547 BytesTrie iter(trie);
548 uint64_t state;
549 int32_t value;
550 // Small optimization: Array lookup for first language letter.
551 int32_t c0;
552 if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
553 language[1] != 0 && // language.length() >= 2
554 (state = trieFirstLetterStates[c0]) != 0) {
555 value = trieNext(iter.resetToState64(state), language, 1);
556 } else {
557 value = trieNext(iter, language, 0);
558 }
559 if (value >= 0) {
560 state = iter.getState64();
561 } else {
562 iter.resetToState64(trieUndState); // "und" ("*")
563 state = 0;
564 }
565
566 if (value > 0) {
567 // Intermediate or final value from just language.
568 if (value == SKIP_SCRIPT) {
569 value = 0;
570 }
571 } else {
572 value = trieNext(iter, script, 0);
573 if (value >= 0) {
574 state = iter.getState64();
575 } else {
576 if (state == 0) {
577 iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
578 } else {
579 iter.resetToState64(state);
580 value = trieNext(iter, "", 0);
581 U_ASSERT(value >= 0)(void)0;
582 state = iter.getState64();
583 }
584 }
585 }
586
587 if (value > 0) {
588 // Final value from just language or language+script.
589 } else {
590 value = trieNext(iter, "", 0);
591 U_ASSERT(value > 0)(void)0;
592 }
593 U_ASSERT(value < lsrsLength)(void)0;
594 return value;
595}
596
597int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
598 UStringTrieResult result;
599 uint8_t c;
600 if ((c = s[i]) == 0) {
601 result = iter.next(u'*');
602 } else {
603 for (;;) {
604 c = uprv_invCharToAscii(c)(c);
605 // EBCDIC: If s[i] is not an invariant character,
606 // then c is now 0 and will simply not match anything, which is harmless.
607 uint8_t next = s[++i];
608 if (next != 0) {
609 if (!USTRINGTRIE_HAS_NEXT(iter.next(c))((iter.next(c))&1)) {
610 return -1;
611 }
612 } else {
613 // last character of this subtag
614 result = iter.next(c | 0x80);
615 break;
616 }
617 c = next;
618 }
619 }
620 switch (result) {
621 case USTRINGTRIE_NO_MATCH: return -1;
622 case USTRINGTRIE_NO_VALUE: return 0;
623 case USTRINGTRIE_INTERMEDIATE_VALUE:
624 U_ASSERT(iter.getValue() == SKIP_SCRIPT)(void)0;
625 return SKIP_SCRIPT;
626 case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
627 default: return -1;
628 }
629}
630
631// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
632// in loclikely.cpp to this new code, including activating this
633// minimizeSubtags() function. The LocaleMatcher does not minimize.
634#if 0
635LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
636 const char *regionIn, ULocale.Minimize fieldToFavor,
637 UErrorCode &errorCode) const {
638 LSR result = maximize(languageIn, scriptIn, regionIn);
639
640 // We could try just a series of checks, like:
641 // LSR result2 = addLikelySubtags(languageIn, "", "");
642 // if result.equals(result2) return result2;
643 // However, we can optimize 2 of the cases:
644 // (languageIn, "", "")
645 // (languageIn, "", regionIn)
646
647 // value00 = lookup(result.language, "", "")
648 BytesTrie iter = new BytesTrie(trie);
649 int value = trieNext(iter, result.language, 0);
650 U_ASSERT(value >= 0)(void)0;
651 if (value == 0) {
652 value = trieNext(iter, "", 0);
653 U_ASSERT(value >= 0)(void)0;
654 if (value == 0) {
655 value = trieNext(iter, "", 0);
656 }
657 }
658 U_ASSERT(value > 0)(void)0;
659 LSR value00 = lsrs[value];
660 boolean favorRegionOk = false;
661 if (result.script.equals(value00.script)) { //script is default
662 if (result.region.equals(value00.region)) {
663 return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
664 } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
665 return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
666 } else {
667 favorRegionOk = true;
668 }
669 }
670
671 // The last case is not as easy to optimize.
672 // Maybe do later, but for now use the straightforward code.
673 LSR result2 = maximize(languageIn, scriptIn, "");
674 if (result2.equals(result)) {
675 return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
676 } else if (favorRegionOk) {
677 return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
678 }
679 return result;
680}
681#endif
682
683U_NAMESPACE_END}