../deps/icu-small/source/common/ucnvisci.cpp

1

2

// License & terms of use: http://www.unicode.org/copyright.html

3

/*

4

**********************************************************************

5

6

7

**********************************************************************

8

* file name: ucnvisci.c

9

* encoding: UTF-8

10

* tab size: 8 (not used)

11

* indentation:4

12

*

13

* created on: 2001JUN26

14

* created by: Ram Viswanadha

15

*

16

* Date Name Description

17

* 24/7/2001 Ram Added support for EXT character handling

18

*/

19

20

#include "unicode/utypes.h"

21

22

#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_NO_LEGACY_CONVERSION0 && !UCONFIG_ONLY_HTML_CONVERSION0

23

24

#include "unicode/ucnv.h"

25

#include "unicode/ucnv_cb.h"

26

#include "unicode/utf16.h"

27

#include "cmemory.h"

28

#include "ucnv_bld.h"

29

#include "ucnv_cnv.h"

30

#include "cstring.h"

31

#include "uassert.h"

32

33

#define UCNV_OPTIONS_VERSION_MASK0xf 0xf

34

#define NUKTA0x093c 0x093c

35

#define HALANT0x094d 0x094d

36

#define ZWNJ0x200c 0x200c /* Zero Width Non Joiner */

37

#define ZWJ0x200d 0x200d /* Zero width Joiner */

38

#define INVALID_CHAR0xffff 0xffff

39

#define ATR0xEF 0xEF /* Attribute code */

40

#define EXT0xF0 0xF0 /* Extension code */

41

#define DANDA0x0964 0x0964

42

#define DOUBLE_DANDA0x0965 0x0965

43

#define ISCII_NUKTA0xE9 0xE9

44

#define ISCII_HALANT0xE8 0xE8

45

#define ISCII_DANDA0xEA 0xEA

46

#define ISCII_INV0xD9 0xD9

47

#define ISCII_VOWEL_SIGN_E0xE0 0xE0

48

#define INDIC_BLOCK_BEGIN0x0900 0x0900

49

#define INDIC_BLOCK_END0x0D7F 0x0D7F

50

#define INDIC_RANGE(0x0D7F - 0x0900) (INDIC_BLOCK_END0x0D7F - INDIC_BLOCK_BEGIN0x0900)

51

#define VOCALLIC_RR0x0931 0x0931

52

#define LF0x0A 0x0A

53

#define ASCII_END0xA0 0xA0

54

#define NO_CHAR_MARKER0xFFFE 0xFFFE

55

#define TELUGU_DELTADELTA * TELUGU DELTA * TELUGU

56

#define DEV_ABBR_SIGN0x0970 0x0970

57

#define DEV_ANUDATTA0x0952 0x0952

58

#define EXT_RANGE_BEGIN0xA1 0xA1

59

#define EXT_RANGE_END0xEE 0xEE

60

61

#define PNJ_DELTA0x0100 0x0100

62

#define PNJ_BINDI0x0A02 0x0A02

63

#define PNJ_TIPPI0x0A70 0x0A70

64

#define PNJ_SIGN_VIRAMA0x0A4D 0x0A4D

65

#define PNJ_ADHAK0x0A71 0x0A71

66

#define PNJ_HA0x0A39 0x0A39

67

#define PNJ_RRA0x0A5C 0x0A5C

68

69

typedef enum {

70

DEVANAGARI =0,

71

BENGALI,

72

GURMUKHI,

73

GUJARATI,

74

ORIYA,

75

TAMIL,

76

TELUGU,

77

KANNADA,

78

MALAYALAM,

79

DELTA=0x80

80

}UniLang;

81

82

/**

83

* Enumeration for switching code pages if <ATR>+<one of below values>

84

* is encountered

85

*/

86

typedef enum {

87

DEF = 0x40,

88

RMN = 0x41,

89

DEV = 0x42,

90

BNG = 0x43,

91

TML = 0x44,

92

TLG = 0x45,

93

ASM = 0x46,

94

ORI = 0x47,

95

KND = 0x48,

96

MLM = 0x49,

97

GJR = 0x4A,

98

PNJ = 0x4B,

99

ARB = 0x71,

100

PES = 0x72,

101

URD = 0x73,

102

SND = 0x74,

103

KSM = 0x75,

104

PST = 0x76

105

}ISCIILang;

106

107

typedef enum {

108

DEV_MASK =0x80,

109

PNJ_MASK =0x40,

110

GJR_MASK =0x20,

111

ORI_MASK =0x10,

112

BNG_MASK =0x08,

113

KND_MASK =0x04,

114

MLM_MASK =0x02,

115

TML_MASK =0x01,

116

ZERO =0x00

117

}MaskEnum;

118

119

#define ISCII_CNV_PREFIX"ISCII,version=" "ISCII,version="

120

121

typedef struct {

122

UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */

123

UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */

124

uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */

125

uint16_t currentDeltaFromUnicode; /* current delta in Indic block */

126

uint16_t currentDeltaToUnicode; /* current delta in Indic block */

127

MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */

128

MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */

129

MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */

130

UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */

131

UBool resetToDefaultToUnicode; /* boolean for resetting to default delta and mask when a newline is encountered*/

132

char name[sizeof(ISCII_CNV_PREFIX"ISCII,version=") + 1];

133

UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */

134

} UConverterDataISCII;

135

136

typedef struct LookupDataStruct {

137

UniLang uniLang;

138

MaskEnum maskEnum;

139

ISCIILang isciiLang;

140

} LookupDataStruct;

141

142

static const LookupDataStruct lookupInitialData[]={

143

{ DEVANAGARI, DEV_MASK, DEV },

144

{ BENGALI, BNG_MASK, BNG },

145

{ GURMUKHI, PNJ_MASK, PNJ },

146

{ GUJARATI, GJR_MASK, GJR },

147

{ ORIYA, ORI_MASK, ORI },

148

{ TAMIL, TML_MASK, TML },

149

{ TELUGU, KND_MASK, TLG },

150

{ KANNADA, KND_MASK, KND },

151

{ MALAYALAM, MLM_MASK, MLM }

152

};

153

154

/*

155

* For special handling of certain Gurmukhi characters.

156

* Bit 0 (value 1): PNJ consonant

157

* Bit 1 (value 2): PNJ Bindi Tippi

158

*/

159

static const uint8_t pnjMap[80] = {

160

/* 0A00..0A0F */

161

0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,

162

/* 0A10..0A1F */

163

0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

164

/* 0A20..0A2F */

165

3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,

166

/* 0A30..0A3F */

167

3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,

168

/* 0A40..0A4F */

169

0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

170

};

171

172

static UBool

173

isPNJConsonant(UChar32 c) {

174

if (c < 0xa00 || 0xa50 <= c) {

175

return FALSE0;

176

} else {

177

return (UBool)(pnjMap[c - 0xa00] & 1);

178

}

179

}

180

181

static UBool

182

isPNJBindiTippi(UChar32 c) {

183

if (c < 0xa00 || 0xa50 <= c) {

184

return FALSE0;

185

} else {

186

return (UBool)(pnjMap[c - 0xa00] >> 1);

187

}

188

}

189

U_CDECL_BEGINextern "C" {

190

static void U_CALLCONV

191

_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {

192

if(pArgs->onlyTestIsLoadable) {

193

return;

194

}

195

196

cnv->extraInfo = uprv_mallocuprv_malloc_71(sizeof(UConverterDataISCII));

197

198

if (cnv->extraInfo != NULL__null) {

199

int32_t len=0;

200

UConverterDataISCII *converterData=

201

(UConverterDataISCII *) cnv->extraInfo;

202

converterData->contextCharToUnicode=NO_CHAR_MARKER0xFFFE;

203

cnv->toUnicodeStatus = missingCharMarker0xFFFF;

204

converterData->contextCharFromUnicode=0x0000;

205

converterData->resetToDefaultToUnicode=FALSE0;

206

/* check if the version requested is supported */

207

if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK0xf) < 9) {

208

/* initialize state variables */

209

converterData->currentDeltaFromUnicode

210

= converterData->currentDeltaToUnicode

211

= converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK0xf].uniLang * DELTA);

212

213

converterData->currentMaskFromUnicode

214

= converterData->currentMaskToUnicode

215

= converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK0xf].maskEnum;

216

217

converterData->isFirstBuffer=TRUE1;

218

(void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX):: strcpy(converterData->name, "ISCII,version=");

219

len = (int32_t)uprv_strlen(converterData->name):: strlen(converterData->name);

220

converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK0xf) + '0');

221

converterData->name[len+1]=0;

222

223

converterData->prevToUnicodeStatus = 0x0000;

224

} else {

225

uprv_freeuprv_free_71(cnv->extraInfo);

226

cnv->extraInfo = NULL__null;

227

*errorCode = U_ILLEGAL_ARGUMENT_ERROR;

228

}

229

230

} else {

231

*errorCode =U_MEMORY_ALLOCATION_ERROR;

232

}

233

}

234

235

static void U_CALLCONV

236

_ISCIIClose(UConverter *cnv) {

237

if (cnv->extraInfo!=NULL__null) {

238

if (!cnv->isExtraLocal) {

239

uprv_freeuprv_free_71(cnv->extraInfo);

240

}

241

cnv->extraInfo=NULL__null;

242

}

243

}

244

245

static const char* U_CALLCONV

246

_ISCIIgetName(const UConverter* cnv) {

247

if (cnv->extraInfo) {

248

UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;

249

return myData->name;

250

}

251

return NULL__null;

252

}

253

254

static void U_CALLCONV

255

_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {

256

UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);

257

if (choice<=UCNV_RESET_TO_UNICODE) {

258

cnv->toUnicodeStatus = missingCharMarker0xFFFF;

259

cnv->mode=0;

260

data->currentDeltaToUnicode=data->defDeltaToUnicode;

261

data->currentMaskToUnicode = data->defMaskToUnicode;

262

data->contextCharToUnicode=NO_CHAR_MARKER0xFFFE;

263

data->prevToUnicodeStatus = 0x0000;

264

}

265

if (choice!=UCNV_RESET_TO_UNICODE) {

266

cnv->fromUChar32=0x0000;

267

data->contextCharFromUnicode=0x00;

268

data->currentMaskFromUnicode=data->defMaskToUnicode;

269

data->currentDeltaFromUnicode=data->defDeltaToUnicode;

270

data->isFirstBuffer=TRUE1;

271

data->resetToDefaultToUnicode=FALSE0;

272

}

273

}

274

275

/**

276

* The values in validity table are indexed by the lower bits of Unicode

277

* range 0x0900 - 0x09ff. The values have a structure like:

278

* ---------------------------------------------------------------

279

* | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |

280

* | | | | | ASM | KND | | |

281

* ---------------------------------------------------------------

282

* If a code point is valid in a particular script

283

* then that bit is turned on

284

*

285

* Unicode does not distinguish between Bengali and Assamese so we use 1 bit for

286

* to represent these languages

287

*

288

* Telugu and Kannada have same codepoints except for Vocallic_RR which we special case

289

* and combine and use 1 bit to represent these languages.

290

*

291

* TODO: It is probably easier to understand and maintain to change this

292

* to use uint16_t and give each of the 9 Unicode/script blocks its own bit.

293

*/

294

295

static const uint8_t validityTable[128] = {

296

/* This state table is tool generated please do not edit unless you know exactly what you are doing */

297

/* Note: This table was edited to mirror the Windows XP implementation */

298

/*ISCII:Valid:Unicode */

299

/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

300

/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

301

/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

302

/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

303

/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

304

/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

305

/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

306

/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

307

/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

308

/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

309

/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

310

/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

311

/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

312

/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

313

/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

314

/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

315

/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

316

/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

317

/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

318

/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

319

/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

320

/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

321

/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

322

/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

323

/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

324

/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

325

/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

326

/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

327

/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

328

/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

329

/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

330

/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

331

/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

332

/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

333

/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

334

/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

335

/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

336

/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

337

/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

338

/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

339

/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

340

/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,

341

/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

342

/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

343

/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

344

/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

345

/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

346

/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

347

/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

348

/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

349

/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

350

/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

351

/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

352

/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

353

/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

354

/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

355

/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

356

/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

357

/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

358

/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

359

/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

360

/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

361

/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

362

/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

363

/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

364

/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

365

/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

366

/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

367

/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,

368

/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

369

/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

370

/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

371

/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

372

/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

373

/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

374

/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

375

/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

376

/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

377

/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

378

/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

379

/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

380

/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

381

/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

382

/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

383

/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

384

/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,

385

/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,

386

/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,

387

/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

388

/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

389

/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

390

/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

391

/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

392

/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

393

/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

394

/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

395

/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

396

/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

397

/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

398

/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

399

/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

400

/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

401

/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

402

/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

403

/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

404

/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

405

/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

406

/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

407

/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

408

/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

409

/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

410

/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

411

/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

412

/*

413

* The length of the array is 128 to provide values for 0x900..0x97f.

414

* The last 15 entries for 0x971..0x97f of the validity table are all zero

415

* because no Indic script uses such Unicode code points.

416

*/

417

/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO

418

};

419

420

static const uint16_t fromUnicodeTable[128]={

421

0x00a0 ,/* 0x0900 */

422

0x00a1 ,/* 0x0901 */

423

0x00a2 ,/* 0x0902 */

424

0x00a3 ,/* 0x0903 */

425

0xa4e0 ,/* 0x0904 */

426

0x00a4 ,/* 0x0905 */

427

0x00a5 ,/* 0x0906 */

428

0x00a6 ,/* 0x0907 */

429

0x00a7 ,/* 0x0908 */

430

0x00a8 ,/* 0x0909 */

431

0x00a9 ,/* 0x090a */

432

0x00aa ,/* 0x090b */

433

0xA6E9 ,/* 0x090c */

434

0x00ae ,/* 0x090d */

435

0x00ab ,/* 0x090e */

436

0x00ac ,/* 0x090f */

437

0x00ad ,/* 0x0910 */

438

0x00b2 ,/* 0x0911 */

439

0x00af ,/* 0x0912 */

440

0x00b0 ,/* 0x0913 */

441

0x00b1 ,/* 0x0914 */

442

0x00b3 ,/* 0x0915 */

443

0x00b4 ,/* 0x0916 */

444

0x00b5 ,/* 0x0917 */

445

0x00b6 ,/* 0x0918 */

446

0x00b7 ,/* 0x0919 */

447

0x00b8 ,/* 0x091a */

448

0x00b9 ,/* 0x091b */

449

0x00ba ,/* 0x091c */

450

0x00bb ,/* 0x091d */

451

0x00bc ,/* 0x091e */

452

0x00bd ,/* 0x091f */

453

0x00be ,/* 0x0920 */

454

0x00bf ,/* 0x0921 */

455

0x00c0 ,/* 0x0922 */

456

0x00c1 ,/* 0x0923 */

457

0x00c2 ,/* 0x0924 */

458

0x00c3 ,/* 0x0925 */

459

0x00c4 ,/* 0x0926 */

460

0x00c5 ,/* 0x0927 */

461

0x00c6 ,/* 0x0928 */

462

0x00c7 ,/* 0x0929 */

463

0x00c8 ,/* 0x092a */

464

0x00c9 ,/* 0x092b */

465

0x00ca ,/* 0x092c */

466

0x00cb ,/* 0x092d */

467

0x00cc ,/* 0x092e */

468

0x00cd ,/* 0x092f */

469

0x00cf ,/* 0x0930 */

470

0x00d0 ,/* 0x0931 */

471

0x00d1 ,/* 0x0932 */

472

0x00d2 ,/* 0x0933 */

473

0x00d3 ,/* 0x0934 */

474

0x00d4 ,/* 0x0935 */

475

0x00d5 ,/* 0x0936 */

476

0x00d6 ,/* 0x0937 */

477

0x00d7 ,/* 0x0938 */

478

0x00d8 ,/* 0x0939 */

479

0xFFFF ,/* 0x093A */

480

0xFFFF ,/* 0x093B */

481

0x00e9 ,/* 0x093c */

482

0xEAE9 ,/* 0x093d */

483

0x00da ,/* 0x093e */

484

0x00db ,/* 0x093f */

485

0x00dc ,/* 0x0940 */

486

0x00dd ,/* 0x0941 */

487

0x00de ,/* 0x0942 */

488

0x00df ,/* 0x0943 */

489

0xDFE9 ,/* 0x0944 */

490

0x00e3 ,/* 0x0945 */

491

0x00e0 ,/* 0x0946 */

492

0x00e1 ,/* 0x0947 */

493

0x00e2 ,/* 0x0948 */

494

0x00e7 ,/* 0x0949 */

495

0x00e4 ,/* 0x094a */

496

0x00e5 ,/* 0x094b */

497

0x00e6 ,/* 0x094c */

498

0x00e8 ,/* 0x094d */

499

0x00ec ,/* 0x094e */

500

0x00ed ,/* 0x094f */

501

0xA1E9 ,/* 0x0950 */ /* OM Symbol */

502

0xFFFF ,/* 0x0951 */

503

0xF0B8 ,/* 0x0952 */

504

0xFFFF ,/* 0x0953 */

505

0xFFFF ,/* 0x0954 */

506

0xFFFF ,/* 0x0955 */

507

0xFFFF ,/* 0x0956 */

508

0xFFFF ,/* 0x0957 */

509

0xb3e9 ,/* 0x0958 */

510

0xb4e9 ,/* 0x0959 */

511

0xb5e9 ,/* 0x095a */

512

0xbae9 ,/* 0x095b */

513

0xbfe9 ,/* 0x095c */

514

0xC0E9 ,/* 0x095d */

515

0xc9e9 ,/* 0x095e */

516

0x00ce ,/* 0x095f */

517

0xAAe9 ,/* 0x0960 */

518

0xA7E9 ,/* 0x0961 */

519

0xDBE9 ,/* 0x0962 */

520

0xDCE9 ,/* 0x0963 */

521

0x00ea ,/* 0x0964 */

522

0xeaea ,/* 0x0965 */

523

0x00f1 ,/* 0x0966 */

524

0x00f2 ,/* 0x0967 */

525

0x00f3 ,/* 0x0968 */

526

0x00f4 ,/* 0x0969 */

527

0x00f5 ,/* 0x096a */

528

0x00f6 ,/* 0x096b */

529

0x00f7 ,/* 0x096c */

530

0x00f8 ,/* 0x096d */

531

0x00f9 ,/* 0x096e */

532

0x00fa ,/* 0x096f */

533

0xF0BF ,/* 0x0970 */

534

0xFFFF ,/* 0x0971 */

535

0xFFFF ,/* 0x0972 */

536

0xFFFF ,/* 0x0973 */

537

0xFFFF ,/* 0x0974 */

538

0xFFFF ,/* 0x0975 */

539

0xFFFF ,/* 0x0976 */

540

0xFFFF ,/* 0x0977 */

541

0xFFFF ,/* 0x0978 */

542

0xFFFF ,/* 0x0979 */

543

0xFFFF ,/* 0x097a */

544

0xFFFF ,/* 0x097b */

545

0xFFFF ,/* 0x097c */

546

0xFFFF ,/* 0x097d */

547

0xFFFF ,/* 0x097e */

548

0xFFFF ,/* 0x097f */

549

};

550

static const uint16_t toUnicodeTable[256]={

551

0x0000,/* 0x00 */

552

0x0001,/* 0x01 */

553

0x0002,/* 0x02 */

554

0x0003,/* 0x03 */

555

0x0004,/* 0x04 */

556

0x0005,/* 0x05 */

557

0x0006,/* 0x06 */

558

0x0007,/* 0x07 */

559

0x0008,/* 0x08 */

560

0x0009,/* 0x09 */

561

0x000a,/* 0x0a */

562

0x000b,/* 0x0b */

563

0x000c,/* 0x0c */

564

0x000d,/* 0x0d */

565

0x000e,/* 0x0e */

566

0x000f,/* 0x0f */

567

0x0010,/* 0x10 */

568

0x0011,/* 0x11 */

569

0x0012,/* 0x12 */

570

0x0013,/* 0x13 */

571

0x0014,/* 0x14 */

572

0x0015,/* 0x15 */

573

0x0016,/* 0x16 */

574

0x0017,/* 0x17 */

575

0x0018,/* 0x18 */

576

0x0019,/* 0x19 */

577

0x001a,/* 0x1a */

578

0x001b,/* 0x1b */

579

0x001c,/* 0x1c */

580

0x001d,/* 0x1d */

581

0x001e,/* 0x1e */

582

0x001f,/* 0x1f */

583

0x0020,/* 0x20 */

584

0x0021,/* 0x21 */

585

0x0022,/* 0x22 */

586

0x0023,/* 0x23 */

587

0x0024,/* 0x24 */

588

0x0025,/* 0x25 */

589

0x0026,/* 0x26 */

590

0x0027,/* 0x27 */

591

0x0028,/* 0x28 */

592

0x0029,/* 0x29 */

593

0x002a,/* 0x2a */

594

0x002b,/* 0x2b */

595

0x002c,/* 0x2c */

596

0x002d,/* 0x2d */

597

0x002e,/* 0x2e */

598

0x002f,/* 0x2f */

599

0x0030,/* 0x30 */

600

0x0031,/* 0x31 */

601

0x0032,/* 0x32 */

602

0x0033,/* 0x33 */

603

0x0034,/* 0x34 */

604

0x0035,/* 0x35 */

605

0x0036,/* 0x36 */

606

0x0037,/* 0x37 */

607

0x0038,/* 0x38 */

608

0x0039,/* 0x39 */

609

0x003A,/* 0x3A */

610

0x003B,/* 0x3B */

611

0x003c,/* 0x3c */

612

0x003d,/* 0x3d */

613

0x003e,/* 0x3e */

614

0x003f,/* 0x3f */

615

0x0040,/* 0x40 */

616

0x0041,/* 0x41 */

617

0x0042,/* 0x42 */

618

0x0043,/* 0x43 */

619

0x0044,/* 0x44 */

620

0x0045,/* 0x45 */

621

0x0046,/* 0x46 */

622

0x0047,/* 0x47 */

623

0x0048,/* 0x48 */

624

0x0049,/* 0x49 */

625

0x004a,/* 0x4a */

626

0x004b,/* 0x4b */

627

0x004c,/* 0x4c */

628

0x004d,/* 0x4d */

629

0x004e,/* 0x4e */

630

0x004f,/* 0x4f */

631

0x0050,/* 0x50 */

632

0x0051,/* 0x51 */

633

0x0052,/* 0x52 */

634

0x0053,/* 0x53 */

635

0x0054,/* 0x54 */

636

0x0055,/* 0x55 */

637

0x0056,/* 0x56 */

638

0x0057,/* 0x57 */

639

0x0058,/* 0x58 */

640

0x0059,/* 0x59 */

641

0x005a,/* 0x5a */

642

0x005b,/* 0x5b */

643

0x005c,/* 0x5c */

644

0x005d,/* 0x5d */

645

0x005e,/* 0x5e */

646

0x005f,/* 0x5f */

647

0x0060,/* 0x60 */

648

0x0061,/* 0x61 */

649

0x0062,/* 0x62 */

650

0x0063,/* 0x63 */

651

0x0064,/* 0x64 */

652

0x0065,/* 0x65 */

653

0x0066,/* 0x66 */

654

0x0067,/* 0x67 */

655

0x0068,/* 0x68 */

656

0x0069,/* 0x69 */

657

0x006a,/* 0x6a */

658

0x006b,/* 0x6b */

659

0x006c,/* 0x6c */

660

0x006d,/* 0x6d */

661

0x006e,/* 0x6e */

662

0x006f,/* 0x6f */

663

0x0070,/* 0x70 */

664

0x0071,/* 0x71 */

665

0x0072,/* 0x72 */

666

0x0073,/* 0x73 */

667

0x0074,/* 0x74 */

668

0x0075,/* 0x75 */

669

0x0076,/* 0x76 */

670

0x0077,/* 0x77 */

671

0x0078,/* 0x78 */

672

0x0079,/* 0x79 */

673

0x007a,/* 0x7a */

674

0x007b,/* 0x7b */

675

0x007c,/* 0x7c */

676

0x007d,/* 0x7d */

677

0x007e,/* 0x7e */

678

0x007f,/* 0x7f */

679

0x0080,/* 0x80 */

680

0x0081,/* 0x81 */

681

0x0082,/* 0x82 */

682

0x0083,/* 0x83 */

683

0x0084,/* 0x84 */

684

0x0085,/* 0x85 */

685

0x0086,/* 0x86 */

686

0x0087,/* 0x87 */

687

0x0088,/* 0x88 */

688

0x0089,/* 0x89 */

689

0x008a,/* 0x8a */

690

0x008b,/* 0x8b */

691

0x008c,/* 0x8c */

692

0x008d,/* 0x8d */

693

0x008e,/* 0x8e */

694

0x008f,/* 0x8f */

695

0x0090,/* 0x90 */

696

0x0091,/* 0x91 */

697

0x0092,/* 0x92 */

698

0x0093,/* 0x93 */

699

0x0094,/* 0x94 */

700

0x0095,/* 0x95 */

701

0x0096,/* 0x96 */

702

0x0097,/* 0x97 */

703

0x0098,/* 0x98 */

704

0x0099,/* 0x99 */

705

0x009a,/* 0x9a */

706

0x009b,/* 0x9b */

707

0x009c,/* 0x9c */

708

0x009d,/* 0x9d */

709

0x009e,/* 0x9e */

710

0x009f,/* 0x9f */

711

0x00A0,/* 0xa0 */

712

0x0901,/* 0xa1 */

713

0x0902,/* 0xa2 */

714

0x0903,/* 0xa3 */

715

0x0905,/* 0xa4 */

716

0x0906,/* 0xa5 */

717

0x0907,/* 0xa6 */

718

0x0908,/* 0xa7 */

719

0x0909,/* 0xa8 */

720

0x090a,/* 0xa9 */

721

0x090b,/* 0xaa */

722

0x090e,/* 0xab */

723

0x090f,/* 0xac */

724

0x0910,/* 0xad */

725

0x090d,/* 0xae */

726

0x0912,/* 0xaf */

727

0x0913,/* 0xb0 */

728

0x0914,/* 0xb1 */

729

0x0911,/* 0xb2 */

730

0x0915,/* 0xb3 */

731

0x0916,/* 0xb4 */

732

0x0917,/* 0xb5 */

733

0x0918,/* 0xb6 */

734

0x0919,/* 0xb7 */

735

0x091a,/* 0xb8 */

736

0x091b,/* 0xb9 */

737

0x091c,/* 0xba */

738

0x091d,/* 0xbb */

739

0x091e,/* 0xbc */

740

0x091f,/* 0xbd */

741

0x0920,/* 0xbe */

742

0x0921,/* 0xbf */

743

0x0922,/* 0xc0 */

744

0x0923,/* 0xc1 */

745

0x0924,/* 0xc2 */

746

0x0925,/* 0xc3 */

747

0x0926,/* 0xc4 */

748

0x0927,/* 0xc5 */

749

0x0928,/* 0xc6 */

750

0x0929,/* 0xc7 */

751

0x092a,/* 0xc8 */

752

0x092b,/* 0xc9 */

753

0x092c,/* 0xca */

754

0x092d,/* 0xcb */

755

0x092e,/* 0xcc */

756

0x092f,/* 0xcd */

757

0x095f,/* 0xce */

758

0x0930,/* 0xcf */

759

0x0931,/* 0xd0 */

760

0x0932,/* 0xd1 */

761

0x0933,/* 0xd2 */

762

0x0934,/* 0xd3 */

763

0x0935,/* 0xd4 */

764

0x0936,/* 0xd5 */

765

0x0937,/* 0xd6 */

766

0x0938,/* 0xd7 */

767

0x0939,/* 0xd8 */

768

0x200D,/* 0xd9 */

769

0x093e,/* 0xda */

770

0x093f,/* 0xdb */

771

0x0940,/* 0xdc */

772

0x0941,/* 0xdd */

773

0x0942,/* 0xde */

774

0x0943,/* 0xdf */

775

0x0946,/* 0xe0 */

776

0x0947,/* 0xe1 */

777

0x0948,/* 0xe2 */

778

0x0945,/* 0xe3 */

779

0x094a,/* 0xe4 */

780

0x094b,/* 0xe5 */

781

0x094c,/* 0xe6 */

782

0x0949,/* 0xe7 */

783

0x094d,/* 0xe8 */

784

0x093c,/* 0xe9 */

785

0x0964,/* 0xea */

786

0xFFFF,/* 0xeb */

787

0xFFFF,/* 0xec */

788

0xFFFF,/* 0xed */

789

0xFFFF,/* 0xee */

790

0xFFFF,/* 0xef */

791

0xFFFF,/* 0xf0 */

792

0x0966,/* 0xf1 */

793

0x0967,/* 0xf2 */

794

0x0968,/* 0xf3 */

795

0x0969,/* 0xf4 */

796

0x096a,/* 0xf5 */

797

0x096b,/* 0xf6 */

798

0x096c,/* 0xf7 */

799

0x096d,/* 0xf8 */

800

0x096e,/* 0xf9 */

801

0x096f,/* 0xfa */

802

0xFFFF,/* 0xfb */

803

0xFFFF,/* 0xfc */

804

0xFFFF,/* 0xfd */

805

0xFFFF,/* 0xfe */

806

0xFFFF /* 0xff */

807

};

808

809

static const uint16_t vowelSignESpecialCases[][2]={

810

{ 2 /*length of array*/ , 0 },

811

{ 0xA4 , 0x0904 },

812

};

813

814

static const uint16_t nuktaSpecialCases[][2]={

815

{ 16 /*length of array*/ , 0 },

816

{ 0xA6 , 0x090c },

817

{ 0xEA , 0x093D },

818

{ 0xDF , 0x0944 },

819

{ 0xA1 , 0x0950 },

820

{ 0xb3 , 0x0958 },

821

{ 0xb4 , 0x0959 },

822

{ 0xb5 , 0x095a },

823

{ 0xba , 0x095b },

824

{ 0xbf , 0x095c },

825

{ 0xC0 , 0x095d },

826

{ 0xc9 , 0x095e },

827

{ 0xAA , 0x0960 },

828

{ 0xA7 , 0x0961 },

829

{ 0xDB , 0x0962 },

830

{ 0xDC , 0x0963 },

831

};

832

833

834

#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(targetByteUnit <= 0xFF){ *(target
)++ = (uint8_t)(targetByteUnit); if(offsets){ *(offsets++) = offset
; } }else{ if (targetByteUnit > 0xFFFF) { *(target)++ = (uint8_t
)(targetByteUnit>>16); if (offsets) { --offset; *(offsets
++) = offset; } } if (!(target < targetLimit)) { args->
converter->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)(targetByteUnit >> 8); args->converter
->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)targetByteUnit; *err = U_BUFFER_OVERFLOW_ERROR
; } else { *(target)++ = (uint8_t)(targetByteUnit>>8); if
(offsets){ *(offsets++) = offset; } if(target < targetLimit
){ *(target)++ = (uint8_t) targetByteUnit; if(offsets){ *(offsets
++) = offset ; } }else{ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
targetByteUnit); *err = U_BUFFER_OVERFLOW_ERROR; } } } }else{
if (targetByteUnit & 0xFF0000) { args->converter->
charErrorBuffer[args->converter->charErrorBufferLength++
] = (uint8_t) (targetByteUnit >>16); } if(targetByteUnit
& 0xFF00){ args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
>>8); } args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false) UPRV_BLOCK_MACRO_BEGINdo { \

835

int32_t offset = (int32_t)(source - args->source-1); \

836

/* write the targetUniChar to target */ \

837

if(target < targetLimit){ \

838

if(targetByteUnit <= 0xFF){ \

839

*(target)++ = (uint8_t)(targetByteUnit); \

840

if(offsets){ \

841

*(offsets++) = offset; \

842

} \

843

}else{ \

844

if (targetByteUnit > 0xFFFF) { \

845

*(target)++ = (uint8_t)(targetByteUnit>>16); \

846

if (offsets) { \

847

--offset; \

848

*(offsets++) = offset; \

849

} \

850

} \

851

if (!(target < targetLimit)) { \

852

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

853

(uint8_t)(targetByteUnit >> 8); \

854

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

855

(uint8_t)targetByteUnit; \

856

*err = U_BUFFER_OVERFLOW_ERROR; \

857

} else { \

858

*(target)++ = (uint8_t)(targetByteUnit>>8); \

859

if(offsets){ \

860

*(offsets++) = offset; \

861

} \

862

if(target < targetLimit){ \

863

*(target)++ = (uint8_t) targetByteUnit; \

864

if(offsets){ \

865

*(offsets++) = offset ; \

866

} \

867

}else{ \

868

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\

869

(uint8_t) (targetByteUnit); \

870

*err = U_BUFFER_OVERFLOW_ERROR; \

871

} \

872

} \

873

} \

874

}else{ \

875

if (targetByteUnit & 0xFF0000) { \

876

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

877

(uint8_t) (targetByteUnit >>16); \

878

} \

879

if(targetByteUnit & 0xFF00){ \

880

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

881

(uint8_t) (targetByteUnit >>8); \

882

} \

883

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

884

(uint8_t) (targetByteUnit); \

885

*err = U_BUFFER_OVERFLOW_ERROR; \

886

} \

887

} UPRV_BLOCK_MACRO_ENDwhile (false)

888

889

/* Rules:

890

* Explicit Halant :

891

* <HALANT> + <ZWNJ>

892

* Soft Halant :

893

* <HALANT> + <ZWJ>

894

*/

895

static void U_CALLCONV

896

UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(

897

UConverterFromUnicodeArgs * args, UErrorCode * err) {

898

const UChar *source = args->source;

899

const UChar *sourceLimit = args->sourceLimit;

900

unsigned char *target = (unsigned char *) args->target;

901

unsigned char *targetLimit = (unsigned char *) args->targetLimit;

902

int32_t* offsets = args->offsets;

903

uint32_t targetByteUnit = 0x0000;

904

UChar32 sourceChar = 0x0000;

905

UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */

906

UConverterDataISCII *converterData;

907

uint16_t newDelta=0;

908

uint16_t range = 0;

909

UBool deltaChanged = FALSE0;

910

911

if ((args->converter == NULL__null) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {

912

*err = U_ILLEGAL_ARGUMENT_ERROR;

913

return;

914

}

915

/* initialize data */

916

converterData=(UConverterDataISCII*)args->converter->extraInfo;

917

newDelta=converterData->currentDeltaFromUnicode;

918

range = (uint16_t)(newDelta/DELTA);

919

920

if ((sourceChar = args->converter->fromUChar32)!=0) {

921

goto getTrail;

922

}

923

924

/*writing the char to the output stream */

925

while (source < sourceLimit) {

926

/* Write the language code following LF only if LF is not the last character. */

927

if (args->converter->fromUnicodeStatus == LF0x0A) {

928

targetByteUnit = ATR0xEF<<8;

929

targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;

930

args->converter->fromUnicodeStatus = 0x0000;

931

/* now append ATR and language code */

932

WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(targetByteUnit <= 0xFF){ *(target
)++ = (uint8_t)(targetByteUnit); if(offsets){ *(offsets++) = offset
; } }else{ if (targetByteUnit > 0xFFFF) { *(target)++ = (uint8_t
)(targetByteUnit>>16); if (offsets) { --offset; *(offsets
++) = offset; } } if (!(target < targetLimit)) { args->
converter->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)(targetByteUnit >> 8); args->converter
->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)targetByteUnit; *err = U_BUFFER_OVERFLOW_ERROR
; } else { *(target)++ = (uint8_t)(targetByteUnit>>8); if
(offsets){ *(offsets++) = offset; } if(target < targetLimit
){ *(target)++ = (uint8_t) targetByteUnit; if(offsets){ *(offsets
++) = offset ; } }else{ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
targetByteUnit); *err = U_BUFFER_OVERFLOW_ERROR; } } } }else{
if (targetByteUnit & 0xFF0000) { args->converter->
charErrorBuffer[args->converter->charErrorBufferLength++
] = (uint8_t) (targetByteUnit >>16); } if(targetByteUnit
& 0xFF00){ args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
>>8); } args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

933

if (U_FAILURE(*err)) {

934

break;

935

}

936

}

937

938

sourceChar = *source++;

939

tempContextFromUnicode = converterData->contextCharFromUnicode;

940

941

targetByteUnit = missingCharMarker0xFFFF;

942

943

/*check if input is in ASCII and C0 control codes range*/

944

if (sourceChar <= ASCII_END0xA0) {

945

args->converter->fromUnicodeStatus = sourceChar;

946

WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(sourceChar <= 0xFF){ *(target
)++ = (uint8_t)(sourceChar); if(offsets){ *(offsets++) = offset
; } }else{ if (sourceChar > 0xFFFF) { *(target)++ = (uint8_t
)(sourceChar>>16); if (offsets) { --offset; *(offsets++
) = offset; } } if (!(target < targetLimit)) { args->converter
->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)(sourceChar >> 8); args->converter->
charErrorBuffer[args->converter->charErrorBufferLength++
] = (uint8_t)sourceChar; *err = U_BUFFER_OVERFLOW_ERROR; } else
{ *(target)++ = (uint8_t)(sourceChar>>8); if(offsets){
*(offsets++) = offset; } if(target < targetLimit){ *(target
)++ = (uint8_t) sourceChar; if(offsets){ *(offsets++) = offset
; } }else{ args->converter->charErrorBuffer[args->converter
->charErrorBufferLength++] = (uint8_t) (sourceChar); *err =
U_BUFFER_OVERFLOW_ERROR; } } } }else{ if (sourceChar & 0xFF0000
) { args->converter->charErrorBuffer[args->converter
->charErrorBufferLength++] = (uint8_t) (sourceChar >>
16); } if(sourceChar & 0xFF00){ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
sourceChar >>8); } args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
sourceChar); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false
);

947

if (U_FAILURE(*err)) {

948

break;

949

}

950

continue;

951

}

952

switch (sourceChar) {

953

case ZWNJ0x200c:

954

/* contextChar has HALANT */

955

if (converterData->contextCharFromUnicode) {

956

converterData->contextCharFromUnicode = 0x00;

957

targetByteUnit = ISCII_HALANT0xE8;

958

} else {

959

/* consume ZWNJ and continue */

960

converterData->contextCharFromUnicode = 0x00;

961

continue;

962

}

963

break;

964

case ZWJ0x200d:

965

/* contextChar has HALANT */

966

if (converterData->contextCharFromUnicode) {

967

targetByteUnit = ISCII_NUKTA0xE9;

968

} else {

969

targetByteUnit =ISCII_INV0xD9;

970

}

971

converterData->contextCharFromUnicode = 0x00;

972

break;

973

default:

974

/* is the sourceChar in the INDIC_RANGE? */

975

if ((uint16_t)(INDIC_BLOCK_END0x0D7F-sourceChar) <= INDIC_RANGE(0x0D7F - 0x0900)) {

976

/* Danda and Double Danda are valid in Northern scripts.. since Unicode

977

* does not include these codepoints in all Northern scrips we need to

978

* filter them out

979

*/

980

if (sourceChar!= DANDA0x0964 && sourceChar != DOUBLE_DANDA0x0965) {

981

/* find out to which block the souceChar belongs*/

982

range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN0x0900)/DELTA);

983

newDelta =(uint16_t)(range*DELTA);

984

985

/* Now are we in the same block as the previous? */

986

if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {

987

converterData->currentDeltaFromUnicode = newDelta;

988

converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;

989

deltaChanged =TRUE1;

990

converterData->isFirstBuffer=FALSE0;

991

}

992

993

if (converterData->currentDeltaFromUnicode == PNJ_DELTA0x0100) {

994

if (sourceChar == PNJ_TIPPI0x0A70) {

995

/* Make sure Tippi is converted to Bindi. */

996

sourceChar = PNJ_BINDI0x0A02;

997

} else if (sourceChar == PNJ_ADHAK0x0A71) {

998

/* This is for consonant cluster handling. */

999

converterData->contextCharFromUnicode = PNJ_ADHAK0x0A71;

1000

}

1001

1002

}

1003

/* Normalize all Indic codepoints to Devanagari and map them to ISCII */

1004

/* now subtract the new delta from sourceChar*/

1005

sourceChar -= converterData->currentDeltaFromUnicode;

1006

}

1007

1008

/* get the target byte unit */

1009

targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];

1010

1011

/* is the code point valid in current script? */

1012

if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {

1013

/* Vocallic RR is assigned in ISCII Telugu and Unicode */

1014

if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTADELTA * TELUGU) || sourceChar!=VOCALLIC_RR0x0931) {

1015

targetByteUnit=missingCharMarker0xFFFF;

1016

}

1017

}

1018

1019

if (deltaChanged) {

1020

/* we are in a script block which is different than

1021

* previous sourceChar's script block write ATR and language codes

1022

*/

1023

uint32_t temp=0;

1024

temp =(uint16_t)(ATR0xEF<<8);

1025

temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);

1026

/* reset */

1027

deltaChanged=FALSE0;

1028

/* now append ATR and language code */

1029

WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(temp <= 0xFF){ *(target)++ =
(uint8_t)(temp); if(offsets){ *(offsets++) = offset; } }else
{ if (temp > 0xFFFF) { *(target)++ = (uint8_t)(temp>>
16); if (offsets) { --offset; *(offsets++) = offset; } } if (
!(target < targetLimit)) { args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t)(
temp >> 8); args->converter->charErrorBuffer[args
->converter->charErrorBufferLength++] = (uint8_t)temp; *
err = U_BUFFER_OVERFLOW_ERROR; } else { *(target)++ = (uint8_t
)(temp>>8); if(offsets){ *(offsets++) = offset; } if(target
< targetLimit){ *(target)++ = (uint8_t) temp; if(offsets)
{ *(offsets++) = offset ; } }else{ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
temp); *err = U_BUFFER_OVERFLOW_ERROR; } } } }else{ if (temp &
0xFF0000) { args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (temp >>
16); } if(temp & 0xFF00){ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
temp >>8); } args->converter->charErrorBuffer[args
->converter->charErrorBufferLength++] = (uint8_t) (temp
); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1030

if (U_FAILURE(*err)) {

1031

break;

1032

}

1033

}

1034

1035

if (converterData->currentDeltaFromUnicode == PNJ_DELTA0x0100 && (sourceChar + PNJ_DELTA0x0100) == PNJ_ADHAK0x0A71) {

1036

continue;

1037

}

1038

}

1039

/* reset context char */

1040

converterData->contextCharFromUnicode = 0x00;

1041

break;

1042

}

1043

if (converterData->currentDeltaFromUnicode == PNJ_DELTA0x0100 && tempContextFromUnicode == PNJ_ADHAK0x0A71 && isPNJConsonant((sourceChar + PNJ_DELTA0x0100))) {

1044

/* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */

1045

/* reset context char */

1046

converterData->contextCharFromUnicode = 0x0000;

1047

targetByteUnit = targetByteUnit << 16 | ISCII_HALANT0xE8 << 8 | targetByteUnit;

1048

/* write targetByteUnit to target */

1049

WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(targetByteUnit <= 0xFF){ *(target
)++ = (uint8_t)(targetByteUnit); if(offsets){ *(offsets++) = offset
; } }else{ if (targetByteUnit > 0xFFFF) { *(target)++ = (uint8_t
)(targetByteUnit>>16); if (offsets) { --offset; *(offsets
++) = offset; } } if (!(target < targetLimit)) { args->
converter->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)(targetByteUnit >> 8); args->converter
->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)targetByteUnit; *err = U_BUFFER_OVERFLOW_ERROR
; } else { *(target)++ = (uint8_t)(targetByteUnit>>8); if
(offsets){ *(offsets++) = offset; } if(target < targetLimit
){ *(target)++ = (uint8_t) targetByteUnit; if(offsets){ *(offsets
++) = offset ; } }else{ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
targetByteUnit); *err = U_BUFFER_OVERFLOW_ERROR; } } } }else{
if (targetByteUnit & 0xFF0000) { args->converter->
charErrorBuffer[args->converter->charErrorBufferLength++
] = (uint8_t) (targetByteUnit >>16); } if(targetByteUnit
& 0xFF00){ args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
>>8); } args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1050

if (U_FAILURE(*err)) {

1051

break;

1052

}

1053

} else if (targetByteUnit != missingCharMarker0xFFFF) {

1054

if (targetByteUnit==ISCII_HALANT0xE8) {

1055

converterData->contextCharFromUnicode = (UChar)targetByteUnit;

1056

}

1057

/* write targetByteUnit to target*/

1058

WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err)do { int32_t offset = (int32_t)(source - args->source-1); if
(target < targetLimit){ if(targetByteUnit <= 0xFF){ *(target
)++ = (uint8_t)(targetByteUnit); if(offsets){ *(offsets++) = offset
; } }else{ if (targetByteUnit > 0xFFFF) { *(target)++ = (uint8_t
)(targetByteUnit>>16); if (offsets) { --offset; *(offsets
++) = offset; } } if (!(target < targetLimit)) { args->
converter->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)(targetByteUnit >> 8); args->converter
->charErrorBuffer[args->converter->charErrorBufferLength
++] = (uint8_t)targetByteUnit; *err = U_BUFFER_OVERFLOW_ERROR
; } else { *(target)++ = (uint8_t)(targetByteUnit>>8); if
(offsets){ *(offsets++) = offset; } if(target < targetLimit
){ *(target)++ = (uint8_t) targetByteUnit; if(offsets){ *(offsets
++) = offset ; } }else{ args->converter->charErrorBuffer
[args->converter->charErrorBufferLength++] = (uint8_t) (
targetByteUnit); *err = U_BUFFER_OVERFLOW_ERROR; } } } }else{
if (targetByteUnit & 0xFF0000) { args->converter->
charErrorBuffer[args->converter->charErrorBufferLength++
] = (uint8_t) (targetByteUnit >>16); } if(targetByteUnit
& 0xFF00){ args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
>>8); } args->converter->charErrorBuffer[args->
converter->charErrorBufferLength++] = (uint8_t) (targetByteUnit
); *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1059

if (U_FAILURE(*err)) {

1060

break;

1061

}

1062

} else {

1063

/* oops.. the code point is unassigned */

1064

/*check if the char is a First surrogate*/

1065

if (U16_IS_SURROGATE(sourceChar)(((sourceChar)&0xfffff800)==0xd800)) {

1066

if (U16_IS_SURROGATE_LEAD(sourceChar)(((sourceChar)&0x400)==0)) {

1067

getTrail:

1068

/*look ahead to find the trail surrogate*/

1069

if (source < sourceLimit) {

1070

/* test the following code unit */

1071

UChar trail= (*source);

1072

if (U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {

1073

source++;

1074

sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail)(((UChar32)(sourceChar)<<10UL)+(UChar32)(trail)-((0xd800
<<10UL)+0xdc00-0x10000));

1075

*err =U_INVALID_CHAR_FOUND;

1076

/* convert this surrogate code point */

1077

/* exit this condition tree */

1078

} else {

1079

/* this is an unmatched lead code unit (1st surrogate) */

1080

/* callback(illegal) */

1081

*err=U_ILLEGAL_CHAR_FOUND;

1082

}

1083

} else {

1084

/* no more input */

1085

*err = U_ZERO_ERROR;

1086

}

1087

} else {

1088

/* this is an unmatched trail code unit (2nd surrogate) */

1089

/* callback(illegal) */

1090

*err=U_ILLEGAL_CHAR_FOUND;

1091

}

1092

} else {

1093

/* callback(unassigned) for a BMP code point */

1094

*err = U_INVALID_CHAR_FOUND;

1095

}

1096

1097

args->converter->fromUChar32=sourceChar;

1098

break;

1099

}

1100

}/* end while(mySourceIndex<mySourceLength) */

1101

1102

/*save the state and return */

1103

args->source = source;

1104

args->target = (char*)target;

1105

}

1106

1107

static const uint16_t lookupTable[][2]={

1108

{ ZERO, ZERO }, /*DEFAULT*/

1109

{ ZERO, ZERO }, /*ROMAN*/

1110

{ DEVANAGARI, DEV_MASK },

1111

{ BENGALI, BNG_MASK },

1112

{ TAMIL, TML_MASK },

1113

{ TELUGU, KND_MASK },

1114

{ BENGALI, BNG_MASK },

1115

{ ORIYA, ORI_MASK },

1116

{ KANNADA, KND_MASK },

1117

{ MALAYALAM, MLM_MASK },

1118

{ GUJARATI, GJR_MASK },

1119

{ GURMUKHI, PNJ_MASK }

1120

};

1121

1122

#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(delta); } if(target<args->targetLimit){ *(target
)++ = (UChar)targetUniChar; if(offsets){ *(offsets)++ = (int32_t
)(offset); } }else{ args->converter->UCharErrorBuffer[args
->converter->UCharErrorBufferLength++] = (UChar)targetUniChar
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false) UPRV_BLOCK_MACRO_BEGINdo { \

1123

/* add offset to current Indic Block */ \

1124

if(targetUniChar>ASCII_END0xA0 && \

1125

targetUniChar != ZWJ0x200d && \

1126

targetUniChar != ZWNJ0x200c && \

1127

targetUniChar != DANDA0x0964 && \

1128

targetUniChar != DOUBLE_DANDA0x0965){ \

1129

\

1130

targetUniChar+=(uint16_t)(delta); \

1131

} \

1132

/* now write the targetUniChar */ \

1133

if(target<args->targetLimit){ \

1134

*(target)++ = (UChar)targetUniChar; \

1135

if(offsets){ \

1136

*(offsets)++ = (int32_t)(offset); \

1137

} \

1138

}else{ \

1139

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \

1140

(UChar)targetUniChar; \

1141

*err = U_BUFFER_OVERFLOW_ERROR; \

1142

} \

1143

} UPRV_BLOCK_MACRO_ENDwhile (false)

1144

1145

#define GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false) UPRV_BLOCK_MACRO_BEGINdo { \

1146

targetUniChar = toUnicodeTable[(sourceChar)] ; \

1147

/* is the code point valid in current script? */ \

1148

if(sourceChar> ASCII_END0xA0 && \

1149

(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \

1150

/* Vocallic RR is assigned in ISCII Telugu and Unicode */ \

1151

if(data->currentDeltaToUnicode!=(TELUGU_DELTADELTA * TELUGU) || \

1152

targetUniChar!=VOCALLIC_RR0x0931){ \

1153

targetUniChar=missingCharMarker0xFFFF; \

1154

} \

1155

} \

1156

} UPRV_BLOCK_MACRO_ENDwhile (false)

1157

1158

/***********

1159

* Rules for ISCII to Unicode converter

1160

* ISCII is stateful encoding. To convert ISCII bytes to Unicode,

1161

* which has both precomposed and decomposed forms characters

1162

* pre-context and post-context need to be considered.

1163

*

1164

* Post context

1165

* i) ATR : Attribute code is used to declare the font and script switching.

1166

* Currently we only switch scripts and font codes consumed without generating an error

1167

* ii) EXT : Extension code is used to declare switching to Sanskrit and for obscure,

1168

* obsolete characters

1169

* Pre context

1170

* i) Halant: if preceded by a halant then it is a explicit halant

1171

* ii) Nukta :

1172

* a) if preceded by a halant then it is a soft halant

1173

* b) if preceded by specific consonants and the ligatures have pre-composed

1174

* characters in Unicode then convert to pre-composed characters

1175

* iii) Danda: If Danda is preceded by a Danda then convert to Double Danda

1176

*

1177

*/

1178

1179

static void U_CALLCONV

1180

UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {

1181

const char *source = ( char *) args->source;

1182

UChar *target = args->target;

1183

const char *sourceLimit = args->sourceLimit;

1184

const UChar* targetLimit = args->targetLimit;

1185

uint32_t targetUniChar = 0x0000;

1186

uint8_t sourceChar = 0x0000;

1187

UConverterDataISCII* data;

1188

UChar32* toUnicodeStatus=NULL__null;

1189

UChar32 tempTargetUniChar = 0x0000;

1190

UChar* contextCharToUnicode= NULL__null;

1191

UBool found;

1192

int i;

1193

int offset = 0;

1194

1195

if ((args->converter == NULL__null) || (target < args->target) || (source < args->source)) {

1196

*err = U_ILLEGAL_ARGUMENT_ERROR;

1197

return;

1198

}

1199

1200

data = (UConverterDataISCII*)(args->converter->extraInfo);

1201

contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */

1202

toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/

1203

1204

while (U_SUCCESS(*err) && source<sourceLimit) {

1205

1206

targetUniChar = missingCharMarker0xFFFF;

1207

1208

if (target < targetLimit) {

1209

sourceChar = (unsigned char)*(source)++;

1210

1211

/* look at the post-context perform special processing */

1212

if (*contextCharToUnicode==ATR0xEF) {

1213

1214

/* If we have ATR in *contextCharToUnicode then we need to change our

1215

* state to the Indic Script specified by sourceChar

1216

*/

1217

1218

/* check if the sourceChar is supported script range*/

1219

if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {

1220

data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);

1221

data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];

1222

} else if (sourceChar==DEF) {

1223

/* switch back to default */

1224

data->currentDeltaToUnicode = data->defDeltaToUnicode;

1225

data->currentMaskToUnicode = data->defMaskToUnicode;

1226

} else {

1227

if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {

1228

/* these are display codes consume and continue */

1229

} else {

1230

*err =U_ILLEGAL_CHAR_FOUND;

1231

/* reset */

1232

*contextCharToUnicode=NO_CHAR_MARKER0xFFFE;

1233

goto CALLBACK;

1234

}

1235

}

1236

1237

/* reset */

1238

*contextCharToUnicode=NO_CHAR_MARKER0xFFFE;

1239

1240

continue;

1241

1242

} else if (*contextCharToUnicode==EXT0xF0) {

1243

/* check if sourceChar is in 0xA1-0xEE range */

1244

if ((uint8_t) (EXT_RANGE_END0xEE - sourceChar) <= (EXT_RANGE_END0xEE - EXT_RANGE_BEGIN0xA1)) {

1245

/* We currently support only Anudatta and Devanagari abbreviation sign */

1246

if (sourceChar==0xBF || sourceChar == 0xB8) {

1247

targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN0x0970 : DEV_ANUDATTA0x0952;

1248

1249

/* find out if the mapping is valid in this state */

1250

if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

1251

*contextCharToUnicode= NO_CHAR_MARKER0xFFFE;

1252

1253

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1254

if (data->prevToUnicodeStatus) {

1255

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1256

data->prevToUnicodeStatus = 0x0000;

1257

}

1258

/* write to target */

1259

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(data->currentDeltaToUnicode); } if(target<args
->targetLimit){ *(target)++ = (UChar)targetUniChar; if(args
->offsets){ *(args->offsets)++ = (int32_t)((source-args
->source -2)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)targetUniChar
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1260

1261

continue;

1262

}

1263

}

1264

/* byte unit is unassigned */

1265

targetUniChar = missingCharMarker0xFFFF;

1266

*err= U_INVALID_CHAR_FOUND;

1267

} else {

1268

/* only 0xA1 - 0xEE are legal after EXT char */

1269

*contextCharToUnicode= NO_CHAR_MARKER0xFFFE;

1270

*err = U_ILLEGAL_CHAR_FOUND;

1271

}

1272

goto CALLBACK;

1273

} else if (*contextCharToUnicode==ISCII_INV0xD9) {

1274

if (sourceChar==ISCII_HALANT0xE8) {

1275

targetUniChar = 0x0020; /* replace with space according to Indic FAQ */

1276

} else {

1277

targetUniChar = ZWJ0x200d;

1278

}

1279

1280

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1281

if (data->prevToUnicodeStatus) {

1282

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1283

data->prevToUnicodeStatus = 0x0000;

1284

}

1285

/* write to target */

1286

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(data->currentDeltaToUnicode); } if(target<args
->targetLimit){ *(target)++ = (UChar)targetUniChar; if(args
->offsets){ *(args->offsets)++ = (int32_t)((source-args
->source -2)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)targetUniChar
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1287

/* reset */

1288

*contextCharToUnicode=NO_CHAR_MARKER0xFFFE;

1289

}

1290

1291

/* look at the pre-context and perform special processing */

1292

switch (sourceChar) {

1293

case ISCII_INV0xD9:

1294

case EXT0xF0:

1295

case ATR0xEF:

1296

*contextCharToUnicode = (UChar)sourceChar;

1297

1298

if (*toUnicodeStatus != missingCharMarker0xFFFF) {

1299

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1300

if (data->prevToUnicodeStatus) {

1301

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1302

data->prevToUnicodeStatus = 0x0000;

1303

}

1304

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err)do { if(*toUnicodeStatus>0xA0 && *toUnicodeStatus !=
0x200d && *toUnicodeStatus != 0x200c && *toUnicodeStatus
!= 0x0964 && *toUnicodeStatus != 0x0965){ *toUnicodeStatus
+=(uint16_t)(data->currentDeltaToUnicode); } if(target<
args->targetLimit){ *(target)++ = (UChar)*toUnicodeStatus;
if(args->offsets){ *(args->offsets)++ = (int32_t)((source
-args->source -2)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)*toUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1305

*toUnicodeStatus = missingCharMarker0xFFFF;

1306

}

1307

continue;

1308

case ISCII_DANDA0xEA:

1309

/* handle double danda*/

1310

if (*contextCharToUnicode== ISCII_DANDA0xEA) {

1311

targetUniChar = DOUBLE_DANDA0x0965;

1312

/* clear the context */

1313

*contextCharToUnicode = NO_CHAR_MARKER0xFFFE;

1314

*toUnicodeStatus = missingCharMarker0xFFFF;

1315

} else {

1316

GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false);

1317

*contextCharToUnicode = sourceChar;

1318

}

1319

break;

1320

case ISCII_HALANT0xE8:

1321

/* handle explicit halant */

1322

if (*contextCharToUnicode == ISCII_HALANT0xE8) {

1323

targetUniChar = ZWNJ0x200c;

1324

/* clear the context */

1325

*contextCharToUnicode = NO_CHAR_MARKER0xFFFE;

1326

} else {

1327

GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false);

1328

*contextCharToUnicode = sourceChar;

1329

}

1330

break;

1331

case 0x0A:

1332

case 0x0D:

1333

data->resetToDefaultToUnicode = TRUE1;

1334

GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false)

1335

;

1336

*contextCharToUnicode = sourceChar;

1337

break;

1338

1339

case ISCII_VOWEL_SIGN_E0xE0:

1340

i=1;

1341

found=FALSE0;

1342

for (; i<vowelSignESpecialCases[0][0]; i++) {

1343

U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases))(void)0;

1344

if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {

1345

targetUniChar=vowelSignESpecialCases[i][1];

1346

found=TRUE1;

1347

break;

1348

}

1349

}

1350

if (found) {

1351

/* find out if the mapping is valid in this state */

1352

if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

1353

/*targetUniChar += data->currentDeltaToUnicode ;*/

1354

*contextCharToUnicode= NO_CHAR_MARKER0xFFFE;

1355

*toUnicodeStatus = missingCharMarker0xFFFF;

1356

break;

1357

}

1358

}

1359

GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false);

1360

*contextCharToUnicode = sourceChar;

1361

break;

1362

1363

case ISCII_NUKTA0xE9:

1364

/* handle soft halant */

1365

if (*contextCharToUnicode == ISCII_HALANT0xE8) {

1366

targetUniChar = ZWJ0x200d;

1367

/* clear the context */

1368

*contextCharToUnicode = NO_CHAR_MARKER0xFFFE;

1369

break;

1370

} else if (data->currentDeltaToUnicode == PNJ_DELTA0x0100 && data->contextCharToUnicode == 0xc0) {

1371

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1372

if (data->prevToUnicodeStatus) {

1373

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1374

data->prevToUnicodeStatus = 0x0000;

1375

}

1376

/* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.

1377

* In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).

1378

*/

1379

targetUniChar = PNJ_RRA0x0A5C;

1380

WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(0); } if(target<args->targetLimit){ *(target)
++ = (UChar)targetUniChar; if(args->offsets){ *(args->offsets
)++ = (int32_t)((source-args->source)-2); } }else{ args->
converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength
++] = (UChar)targetUniChar; *err = U_BUFFER_OVERFLOW_ERROR; }
} while (false);

1381

if (U_SUCCESS(*err)) {

1382

targetUniChar = PNJ_SIGN_VIRAMA0x0A4D;

1383

WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(0); } if(target<args->targetLimit){ *(target)
++ = (UChar)targetUniChar; if(args->offsets){ *(args->offsets
)++ = (int32_t)((source-args->source)-2); } }else{ args->
converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength
++] = (UChar)targetUniChar; *err = U_BUFFER_OVERFLOW_ERROR; }
} while (false);

1384

if (U_SUCCESS(*err)) {

1385

targetUniChar = PNJ_HA0x0A39;

1386

WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(0); } if(target<args->targetLimit){ *(target)
++ = (UChar)targetUniChar; if(args->offsets){ *(args->offsets
)++ = (int32_t)((source-args->source)-2); } }else{ args->
converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength
++] = (UChar)targetUniChar; *err = U_BUFFER_OVERFLOW_ERROR; }
} while (false);

1387

} else {

1388

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA0x0A39;

1389

}

1390

} else {

1391

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA0x0A4D;

1392

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA0x0A39;

1393

}

1394

*toUnicodeStatus = missingCharMarker0xFFFF;

1395

data->contextCharToUnicode = NO_CHAR_MARKER0xFFFE;

1396

continue;

1397

} else {

1398

/* try to handle <CHAR> + ISCII_NUKTA special mappings */

1399

i=1;

1400

found =FALSE0;

1401

for (; i<nuktaSpecialCases[0][0]; i++) {

1402

if (nuktaSpecialCases[i][0]==(uint8_t)

1403

*contextCharToUnicode) {

1404

targetUniChar=nuktaSpecialCases[i][1];

1405

found =TRUE1;

1406

break;

1407

}

1408

}

1409

if (found) {

1410

/* find out if the mapping is valid in this state */

1411

if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

1412

/*targetUniChar += data->currentDeltaToUnicode ;*/

1413

*contextCharToUnicode= NO_CHAR_MARKER0xFFFE;

1414

*toUnicodeStatus = missingCharMarker0xFFFF;

1415

if (data->currentDeltaToUnicode == PNJ_DELTA0x0100) {

1416

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1417

if (data->prevToUnicodeStatus) {

1418

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1419

data->prevToUnicodeStatus = 0x0000;

1420

}

1421

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err)do { if(targetUniChar>0xA0 && targetUniChar != 0x200d
&& targetUniChar != 0x200c && targetUniChar !=
0x0964 && targetUniChar != 0x0965){ targetUniChar+=(
uint16_t)(data->currentDeltaToUnicode); } if(target<args
->targetLimit){ *(target)++ = (UChar)targetUniChar; if(args
->offsets){ *(args->offsets)++ = (int32_t)((source-args
->source -2)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)targetUniChar
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1422

continue;

1423

}

1424

break;

1425

}

1426

/* else fall through to default */

1427

}

1428

/* else fall through to default */

1429

U_FALLTHROUGH[[clang::fallthrough]];

1430

}

1431

default:GET_MAPPING(sourceChar,targetUniChar,data)do { targetUniChar = toUnicodeTable[(sourceChar)] ; if(sourceChar
> 0xA0 && (validityTable[(targetUniChar & 0x7F
)] & data->currentMaskToUnicode)==0){ if(data->currentDeltaToUnicode
!=(DELTA * TELUGU) || targetUniChar!=0x0931){ targetUniChar=0xFFFF
; } } } while (false)

1432

;

1433

*contextCharToUnicode = sourceChar;

1434

break;

1435

}

1436

1437

if (*toUnicodeStatus != missingCharMarker0xFFFF) {

1438

/* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */

1439

if (data->currentDeltaToUnicode == PNJ_DELTA0x0100 && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&

1440

(*toUnicodeStatus + PNJ_DELTA0x0100) == PNJ_SIGN_VIRAMA0x0A4D && ((UChar32)(targetUniChar + PNJ_DELTA0x0100) == data->prevToUnicodeStatus)) {

1441

/* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */

1442

offset = (int)(source-args->source - 3);

1443

tempTargetUniChar = PNJ_ADHAK0x0A71; /* This is necessary to avoid some compiler warnings. */

1444

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err)do { if(tempTargetUniChar>0xA0 && tempTargetUniChar
!= 0x200d && tempTargetUniChar != 0x200c && tempTargetUniChar
!= 0x0964 && tempTargetUniChar != 0x0965){ tempTargetUniChar
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)tempTargetUniChar; if(args->offsets){ *(args->
offsets)++ = (int32_t)(offset); } }else{ args->converter->
UCharErrorBuffer[args->converter->UCharErrorBufferLength
++] = (UChar)tempTargetUniChar; *err = U_BUFFER_OVERFLOW_ERROR
; } } while (false);

1445

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)(offset); } }else{ args->
converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength
++] = (UChar)data->prevToUnicodeStatus; *err = U_BUFFER_OVERFLOW_ERROR
; } } while (false);

1446

data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */

1447

*toUnicodeStatus = missingCharMarker0xFFFF;

1448

continue;

1449

} else {

1450

/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

1451

if (data->prevToUnicodeStatus) {

1452

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err)do { if(data->prevToUnicodeStatus>0xA0 && data->
prevToUnicodeStatus != 0x200d && data->prevToUnicodeStatus
!= 0x200c && data->prevToUnicodeStatus != 0x0964 &&
data->prevToUnicodeStatus != 0x0965){ data->prevToUnicodeStatus
+=(uint16_t)(0); } if(target<args->targetLimit){ *(target
)++ = (UChar)data->prevToUnicodeStatus; if(args->offsets
){ *(args->offsets)++ = (int32_t)((source-args->source -
1)); } }else{ args->converter->UCharErrorBuffer[args->
converter->UCharErrorBufferLength++] = (UChar)data->prevToUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1453

data->prevToUnicodeStatus = 0x0000;

1454

}

1455

/* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.

1456

* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.

1457

*/

1458

if (data->currentDeltaToUnicode == PNJ_DELTA0x0100 && (targetUniChar + PNJ_DELTA0x0100) == PNJ_BINDI0x0A02 && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA0x0100))) {

1459

targetUniChar = PNJ_TIPPI0x0A70 - PNJ_DELTA0x0100;

1460

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err)do { if(*toUnicodeStatus>0xA0 && *toUnicodeStatus !=
0x200d && *toUnicodeStatus != 0x200c && *toUnicodeStatus
!= 0x0964 && *toUnicodeStatus != 0x0965){ *toUnicodeStatus
+=(uint16_t)(0x0100); } if(target<args->targetLimit){ *
(target)++ = (UChar)*toUnicodeStatus; if(args->offsets){ *
(args->offsets)++ = (int32_t)((source-args->source -2))
; } }else{ args->converter->UCharErrorBuffer[args->converter
->UCharErrorBufferLength++] = (UChar)*toUnicodeStatus; *err
= U_BUFFER_OVERFLOW_ERROR; } } while (false);

1461

} else if (data->currentDeltaToUnicode == PNJ_DELTA0x0100 && (targetUniChar + PNJ_DELTA0x0100) == PNJ_SIGN_VIRAMA0x0A4D && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA0x0100))) {

1462

/* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */

1463

data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA0x0100;

1464

} else {

1465

/* write the previously mapped codepoint */

1466

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err)do { if(*toUnicodeStatus>0xA0 && *toUnicodeStatus !=
0x200d && *toUnicodeStatus != 0x200c && *toUnicodeStatus
!= 0x0964 && *toUnicodeStatus != 0x0965){ *toUnicodeStatus
+=(uint16_t)(data->currentDeltaToUnicode); } if(target<
args->targetLimit){ *(target)++ = (UChar)*toUnicodeStatus;
if(args->offsets){ *(args->offsets)++ = (int32_t)((source
-args->source -2)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)*toUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1467

}

1468

}

1469

*toUnicodeStatus = missingCharMarker0xFFFF;

1470

}

1471

1472

if (targetUniChar != missingCharMarker0xFFFF) {

1473

/* now save the targetUniChar for delayed write */

1474

*toUnicodeStatus = (UChar) targetUniChar;

1475

if (data->resetToDefaultToUnicode==TRUE1) {

1476

data->currentDeltaToUnicode = data->defDeltaToUnicode;

1477

data->currentMaskToUnicode = data->defMaskToUnicode;

1478

data->resetToDefaultToUnicode=FALSE0;

1479

}

1480

} else {

1481

1482

/* we reach here only if targetUniChar == missingCharMarker

1483

* so assign codes to reason and err

1484

*/

1485

*err = U_INVALID_CHAR_FOUND;

1486

CALLBACK:

1487

args->converter->toUBytes[0] = (uint8_t) sourceChar;

1488

args->converter->toULength = 1;

1489

break;

1490

}

1491

1492

} else {

1493

*err =U_BUFFER_OVERFLOW_ERROR;

1494

break;

1495

}

1496

}

1497

1498

if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {

1499

/* end of the input stream */

1500

UConverter *cnv = args->converter;

1501

1502

if (*contextCharToUnicode==ATR0xEF || *contextCharToUnicode==EXT0xF0 || *contextCharToUnicode==ISCII_INV0xD9) {

1503

/* set toUBytes[] */

1504

cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;

1505

cnv->toULength = 1;

1506

1507

/* avoid looping on truncated sequences */

1508

*contextCharToUnicode = NO_CHAR_MARKER0xFFFE;

1509

} else {

1510

cnv->toULength = 0;

1511

}

1512

1513

if (*toUnicodeStatus != missingCharMarker0xFFFF) {

1514

/* output a remaining target character */

1515

WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err)do { if(*toUnicodeStatus>0xA0 && *toUnicodeStatus !=
0x200d && *toUnicodeStatus != 0x200c && *toUnicodeStatus
!= 0x0964 && *toUnicodeStatus != 0x0965){ *toUnicodeStatus
+=(uint16_t)(data->currentDeltaToUnicode); } if(target<
args->targetLimit){ *(target)++ = (UChar)*toUnicodeStatus;
if(args->offsets){ *(args->offsets)++ = (int32_t)((source
- args->source -1)); } }else{ args->converter->UCharErrorBuffer
[args->converter->UCharErrorBufferLength++] = (UChar)*toUnicodeStatus
; *err = U_BUFFER_OVERFLOW_ERROR; } } while (false);

1516

*toUnicodeStatus = missingCharMarker0xFFFF;

1517

}

1518

}

1519

1520

args->target = target;

1521

args->source = source;

1522

}

1523

1524

/* structure for SafeClone calculations */

1525

struct cloneISCIIStruct {

1526

UConverter cnv;

1527

UConverterDataISCII mydata;

1528

};

1529

1530

static UConverter * U_CALLCONV

1531

_ISCII_SafeClone(const UConverter *cnv,

1532

void *stackBuffer,

1533

int32_t *pBufferSize,

1534

UErrorCode *status)

1535

{

1536

struct cloneISCIIStruct * localClone;

1537

int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);

1538

1539

if (U_FAILURE(*status)) {

1540

return 0;

1541

}

1542

1543

if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */

1544

*pBufferSize = bufferSizeNeeded;

1545

return 0;

1546

}

1547

1548

localClone = (struct cloneISCIIStruct *)stackBuffer;

1549

/* ucnv.c/ucnv_safeClone() copied the main UConverter already */

1550

1551

uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII))do { clang diagnostic push clang diagnostic ignored "-Waddress"
(void)0; (void)0; clang diagnostic pop :: memcpy(&localClone
->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
} while (false);

1552

localClone->cnv.extraInfo = &localClone->mydata;

1553

localClone->cnv.isExtraLocal = TRUE1;

1554

1555

return &localClone->cnv;

1556

}

1557

1558

static void U_CALLCONV

1559

_ISCIIGetUnicodeSet(const UConverter *cnv,

1560

const USetAdder *sa,

1561

UConverterUnicodeSet which,

1562

UErrorCode *pErrorCode)

1563

{

1564

(void)cnv;

1565

(void)which;

1566

(void)pErrorCode;

1567

int32_t idx, script;

1568

uint8_t mask;

1569

1570

/* Since all ISCII versions allow switching to other ISCII

1571

scripts, we add all roundtrippable characters to this set. */

1572

sa->addRange(sa->set, 0, ASCII_END0xA0);

1573

for (script = DEVANAGARI; script <= MALAYALAM; script++) {

1574

mask = (uint8_t)(lookupInitialData[script].maskEnum);

1575

for (idx = 0; idx < DELTA; idx++) {

1576

/* added check for TELUGU character */

1577

if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {

1578

sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN0x0900);

1579

}

1580

}

1581

}

1582

sa->add(sa->set, DANDA0x0964);

1583

sa->add(sa->set, DOUBLE_DANDA0x0965);

1584

sa->add(sa->set, ZWNJ0x200c);

1585

sa->add(sa->set, ZWJ0x200d);

1586

}

1587

U_CDECL_END}

1588

static const UConverterImpl _ISCIIImpl={

1589

1590

UCNV_ISCII,

1591

1592

NULL__null,

1593

NULL__null,

1594

1595

_ISCIIOpen,

1596

_ISCIIClose,

1597

_ISCIIReset,

1598

1599

UConverter_toUnicode_ISCII_OFFSETS_LOGIC,

1600

UConverter_toUnicode_ISCII_OFFSETS_LOGIC,

1601

UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,

1602

UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,

1603

NULL__null,

1604

1605

NULL__null,

1606

_ISCIIgetName,

1607

NULL__null,

1608

_ISCII_SafeClone,

1609

_ISCIIGetUnicodeSet,

1610

NULL__null,

1611

NULL__null

1612

};

1613

1614

static const UConverterStaticData _ISCIIStaticData={

1615

sizeof(UConverterStaticData),

1616

"ISCII",

1617

0,

1618

UCNV_IBM,

1619

UCNV_ISCII,

1620

1,

1621

4,

1622

{ 0x1a, 0, 0, 0 },

1623

0x1,

1624

FALSE0,

1625

FALSE0,

1626

0x0,

1627

0x0,

1628

{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

1629

1630

};

1631

1632

const UConverterSharedData _ISCIIData_ISCIIData_71=

1633

UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, &
_ISCIIStaticData, false, false, &_ISCIIImpl, 0, { 0, 0, 0
, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null
, __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null
} };

1634

1635

#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

File:	out/../deps/icu-small/source/common/ucnvisci.cpp
Warning:	line 1265, column 21 Value stored to 'targetUniChar' is never read

Bug Summary

Annotated Source Code