map.go 18.8 KB
Newer Older
Jeromy's avatar
Jeromy committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cases

// This file contains the definitions of case mappings for all supported
// languages. The rules for the language-specific tailorings were taken and
// modified from the CLDR transform definitions in common/transforms.

import (
	"strings"
	"unicode"
	"unicode/utf8"

	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/language"
	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/transform"
	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/unicode/norm"
)

// A mapFunc takes a context set to the current rune and writes the mapped
// version to the same context. It may advance the context to the next rune. It
// returns whether a checkpoint is possible: whether the pDst bytes written to
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool

// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30

// supported lists the language tags for which we have tailorings.
const supported = "und af az el lt nl tr"

func init() {
	tags := []language.Tag{}
	for _, s := range strings.Split(supported, " ") {
		tags = append(tags, language.MustParse(s))
	}
	matcher = language.NewMatcher(tags)
	Supported = language.NewCoverage(tags)
}

var (
	matcher language.Matcher

	Supported language.Coverage

	// We keep the following lists separate, instead of having a single per-
	// language struct, to give the compiler a chance to remove unused code.

	// Some uppercase mappers are stateless, so we can precompute the
	// Transformers and save a bit on runtime allocations.
	upperFunc = []mapFunc{
		nil,              // und
		nil,              // af
		aztrUpper(upper), // az
		elUpper,          // el
		ltUpper(upper),   // lt
		nil,              // nl
		aztrUpper(upper), // tr
	}

	undUpper transform.Transformer = &undUpperCaser{}

	lowerFunc = []mapFunc{
		lower,     // und
		lower,     // af
		aztrLower, // az
		lower,     // el
		ltLower,   // lt
		lower,     // nl
		aztrLower, // tr
	}

	titleInfos = []struct {
		title, lower mapFunc
		rewrite      func(*context)
	}{
		{title, lower, nil},                // und
		{title, lower, afnlRewrite},        // af
		{aztrUpper(title), aztrLower, nil}, // az
		{title, lower, nil},                // el
		{ltUpper(title), ltLower, nil},     // lt
		{nlTitle, lower, afnlRewrite},      // nl
		{aztrUpper(title), aztrLower, nil}, // tr
	}
)

func makeUpper(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	f := upperFunc[i]
	if f == nil {
		return undUpper
	}
	return &simpleCaser{f: f}
}

func makeLower(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	f := lowerFunc[i]
	if o.noFinalSigma {
		return &simpleCaser{f: f}
	}
	return &lowerCaser{
		first:   f,
		midWord: finalSigma(f),
	}
}

func makeTitle(t language.Tag, o options) transform.Transformer {
	_, i, _ := matcher.Match(t)
	x := &titleInfos[i]
	lower := x.lower
	if o.noLower {
		lower = (*context).copy
	} else if !o.noFinalSigma {
		lower = finalSigma(lower)
	}
	return &titleCaser{
		title:   x.title,
		lower:   lower,
		rewrite: x.rewrite,
	}
}

// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.

type undUpperCaser struct{ transform.NopResetter }

// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}
	for c.next() {
		upper(&c)
		c.checkpoint()
	}
	return c.ret()
}

type simpleCaser struct {
	context
	f mapFunc
}

// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF}
	c := &t.context
	for c.next() && t.f(c) {
		c.checkpoint()
	}
	return c.ret()
}

// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
	context

	first, midWord mapFunc
}

func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF}
	c := &t.context

	for isInterWord := true; c.next(); {
		if isInterWord {
			if c.info.isCased() {
				if !t.first(c) {
					break
				}
				isInterWord = false
			} else if !c.copy() {
				break
			}
		} else {
			if c.info.isNotCasedAndNotCaseIgnorable() {
				if !c.copy() {
					break
				}
				isInterWord = true
			} else if !t.midWord(c) {
				break
			}
		}
		c.checkpoint()
	}
	return c.ret()
}

// titleCaser implements the Transformer interface. Title casing algorithms
// distinguish between the first letter of a word and subsequent letters of the
// same word. It uses state to avoid requiring a potentially infinite lookahead.
type titleCaser struct {
	context

	// rune mappings used by the actual casing algorithms.
	title, lower mapFunc

	rewrite func(*context)
}

// Transform implements the standard Unicode title case algorithm as defined in
// Chapter 3 of The Unicode Standard:
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
// first cased character F following the word boundary. If F exists, map F to
// Titlecase_Mapping(F); then map all characters C between F and the following
// word boundary to Lowercase_Mapping(C).
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
	c := &t.context

	if !c.next() {
		return c.ret()
	}

	for {
		p := c.info
		if t.rewrite != nil {
			t.rewrite(c)
		}

		wasMid := p.isCaseIgnorableAndNonBreakStarter()
		// Break out of this loop on failure to ensure we do not modify the
		// state incorrectly.
		if p.isCased() && !p.isCaseIgnorableAndNotCased() {
			if !c.isMidWord {
				if !t.title(c) {
					break
				}
				c.isMidWord = true
			} else if !t.lower(c) {
				break
			}
		} else if !c.copy() {
			break
		}

		// TODO: make this an "else if" if we can prove that no rune that does
		// not match the first condition of the if statement can be a break.
		if p.isBreak() {
			c.isMidWord = false
		}

		// As we save the state of the transformer, it is safe to call
		// checkpoint after any successful write.
		c.checkpoint()

		if !c.next() {
			break
		}
		if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
			c.isMidWord = false
		}
	}
	return c.ret()
}

// lower writes the lowercase version of the current rune to dst.
func lower(c *context) bool {
	if c.info&hasMappingMask == 0 || c.caseType() == cLower {
		return c.copy()
	}
	if c.info&exceptionBit == 0 {
		return c.copyXOR()
	}
	e := exceptions[c.info>>exceptionShift+1:]
	if nLower := (e[0] >> lengthBits) & lengthMask; nLower != noChange {
		return c.writeString(e[1 : 1+nLower])
	}
	return c.copy()
}

// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
	ct := c.caseType()
	if c.info&hasMappingMask == 0 || ct == cUpper {
		return c.copy()
	}
	if c.info&exceptionBit == 0 {
		return c.copyXOR()
	}
	e := exceptions[c.info>>exceptionShift+1:]
	// Get length of first special case mapping.
	n := (e[0] >> lengthBits) & lengthMask
	if ct == cTitle {
		// The first special case mapping is for lower. Set n to the second.
		if n == noChange {
			n = 0
		}
		n, e = e[0]&lengthMask, e[n:]
	}
	if n != noChange {
		return c.writeString(e[1 : 1+n])
	}
	return c.copy()
}

// title writes the title case version of the current rune to dst.
func title(c *context) bool {
	ct := c.caseType()
	if c.info&hasMappingMask == 0 || ct == cTitle {
		return c.copy()
	}
	if c.info&exceptionBit == 0 {
		if ct == cLower {
			return c.copyXOR()
		}
		return c.copy()
	}
	// Get the exception data.
	e := exceptions[c.info>>exceptionShift+1:]

	nFirst := (e[0] >> lengthBits) & lengthMask
	if nTitle := e[0] & lengthMask; nTitle != noChange {
		if nFirst != noChange {
			e = e[nFirst:]
		}
		return c.writeString(e[1 : 1+nTitle])
	}
	if ct == cLower && nFirst != noChange {
		// Use the uppercase version instead.
		return c.writeString(e[1 : 1+nFirst])
	}
	// Already in correct case.
	return c.copy()
}

// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
	return func(c *context) bool {
		// ::NFD();
		// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
		// Σ } [:case-ignorable:]* [:cased:] → σ;
		// [:cased:] [:case-ignorable:]* { Σ → ς;
		// ::Any-Lower;
		// ::NFC();

		if !c.hasPrefix("Σ") {
			return f(c)
		}

		p := c.pDst
		c.writeString("ς")
		// We need to do one more iteration after maxIgnorable, as a cased
		// letter is not an ignorable and may modify the result.
		for i := 0; i < maxIgnorable+1; i++ {
			if !c.next() {
				return false
			}
			if !c.info.isCaseIgnorable() {
				if c.info.isCased() {
					// p+1 is guaranteed to be in bounds: if writing ς was
					// successful, p+1 will contain the second byte of ς. If not,
					// this function will have returned after c.next returned false.
					c.dst[p+1]++ // ς → σ
				}
				c.unreadRune()
				return true
			}
			// A case ignorable may also introduce a word break, so we may need
			// to continue searching even after detecting a break.
			c.isMidWord = c.isMidWord && !c.info.isBreak()
			c.copy()
		}
		return true
	}
}

// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
// Example: "Οδός" -> "ΟΔΟΣ".
func elUpper(c *context) bool {
	// From CLDR:
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;

	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
	oldPDst := c.pDst
	if !upper(c) {
		return false
	}
	if !unicode.Is(unicode.Greek, r) {
		return true
	}
	i := 0
	// Take the properties of the uppercased rune that is already written to the
	// destination. This saves us the trouble of having to uppercase the
	// decomposed rune again.
	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
		// Restore the destination position and process the decomposed rune.
		r, sz := utf8.DecodeRune(b)
		if r <= 0xFF { // See A.6.1
			return true
		}
		c.pDst = oldPDst
		// Insert the first rune and ignore the modifiers. See A.6.2.
		c.writeBytes(b[:sz])
		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
	}

	for ; i < maxIgnorable && c.next(); i++ {
		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
		// Above and Iota Subscript
		case 0x0300, // U+0300 COMBINING GRAVE ACCENT
			0x0301, // U+0301 COMBINING ACUTE ACCENT
			0x0304, // U+0304 COMBINING MACRON
			0x0306, // U+0306 COMBINING BREVE
			0x0308, // U+0308 COMBINING DIAERESIS
			0x0313, // U+0313 COMBINING COMMA ABOVE
			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
			0x0342, // U+0342 COMBINING GREEK PERISPOMENI
			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
			// No-op. Gobble the modifier.

		default:
			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
			case cccZero:
				c.unreadRune()
				return true

			// We don't need to test for IotaSubscript as the only rune that
			// qualifies (U+0345) was already excluded in the switch statement
			// above. See A.4.

			case cccAbove:
				return c.copy()
			default:
				// Some other modifier. We're still allowed to gobble Greek
				// modifiers after this.
				c.copy()
			}
		}
	}
	return i == maxIgnorable
}

func ltLower(c *context) bool {
	// From CLDR:
	// # Introduce an explicit dot above when lowercasing capital I's and J's
	// # whenever there are more accents above.
	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
	// ::NFD();
	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
	// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
	// Ì → i \u0307 \u0300;
	// Í → i \u0307 \u0301;
	// Ĩ → i \u0307 \u0303;
	// ::Any-Lower();
	// ::NFC();

	i := 0
	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
		lower(c)
		if r != 'I' && r != 'J' {
			return true
		}
	} else {
		p := norm.NFD.Properties(c.src[c.pSrc:])
		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
			// UTF-8 optimization: the decomposition will only have an above
			// modifier if the last rune of the decomposition is in [U+300-U+311].
			// In all other cases, a decomposition starting with I is always
			// an I followed by modifiers that are not cased themselves. See A.2.
			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
				if !c.writeBytes(d[:1]) {
					return false
				}
				c.dst[c.pDst-1] += 'a' - 'A' // lower

				// Assumption: modifier never changes on lowercase. See A.1.
				// Assumption: all modifiers added have CCC = Above. See A.2.3.
				return c.writeString("\u0307") && c.writeBytes(d[1:])
			}
			// In all other cases the additional modifiers will have a CCC
			// that is less than 230 (Above). We will insert the U+0307, if
			// needed, after these modifiers so that a string in FCD form
			// will remain so. See A.2.2.
			lower(c)
			i = 1
		} else {
			return lower(c)
		}
	}

	for ; i < maxIgnorable && c.next(); i++ {
		switch c.info.cccType() {
		case cccZero:
			c.unreadRune()
			return true
		case cccAbove:
			return c.writeString("\u0307") && c.copy() // See A.1.
		default:
			c.copy() // See A.1.
		}
	}
	return i == maxIgnorable
}

func ltUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
		// From CLDR:
		// ::NFD();
		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
		// ::Any-Upper();
		// ::NFC();

		// TODO: See A.5. A soft-dotted rune never has an exception. This would
		// allow us to overload the exception bit and encode this property in
		// info. Need to measure performance impact of this.
		r, _ := utf8.DecodeRune(c.src[c.pSrc:])
		oldPDst := c.pDst
		if !f(c) {
			return false
		}
		if !unicode.Is(unicode.Soft_Dotted, r) {
			return true
		}

		// We don't need to do an NFD normalization, as a soft-dotted rune never
		// contains U+0307. See A.3.

		i := 0
		for ; i < maxIgnorable && c.next(); i++ {
			switch c.info.cccType() {
			case cccZero:
				c.unreadRune()
				return true
			case cccAbove:
				if c.hasPrefix("\u0307") {
					// We don't do a full NFC, but rather combine runes for
					// some of the common cases. (Returning NFC or
					// preserving normal form is neither a requirement nor
					// a possibility anyway).
					if !c.next() {
						return false
					}
					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
						s := ""
						switch c.src[c.pSrc+1] {
						case 0x80: // U+0300 COMBINING GRAVE ACCENT
							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
						case 0x81: // U+0301 COMBINING ACUTE ACCENT
							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
						case 0x83: // U+0303 COMBINING TILDE
							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
						case 0x88: // U+0308 COMBINING DIAERESIS
							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
						default:
						}
						if s != "" {
							c.pDst = oldPDst
							return c.writeString(s)
						}
					}
				}
				return c.copy()
			default:
				c.copy()
			}
		}
		return i == maxIgnorable
	}
}

func aztrUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
		// i→İ;
		if c.src[c.pSrc] == 'i' {
			return c.writeString("İ")
		}
		return f(c)
	}
}

func aztrLower(c *context) (done bool) {
	// From CLDR:
	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	// İ→i;
	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	// # This matches the behavior of the canonically equivalent I-dot_above
	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
	// I→ı ;
	// ::Any-Lower();
	if c.hasPrefix("\u0130") { // İ
		return c.writeString("i")
	}
	if c.src[c.pSrc] != 'I' {
		return lower(c)
	}

	// We ignore the lower-case I for now, but insert it later when we know
	// which form we need.
	start := c.pSrc + c.sz

	i := 0
Loop:
	// We check for up to n ignorables before \u0307. As \u0307 is an
	// ignorable as well, n is maxIgnorable-1.
	for ; i < maxIgnorable && c.next(); i++ {
		switch c.info.cccType() {
		case cccAbove:
			if c.hasPrefix("\u0307") {
				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
			}
			done = true
			break Loop
		case cccZero:
			c.unreadRune()
			done = true
			break Loop
		default:
			// We'll write this rune after we know which starter to use.
		}
	}
	if i == maxIgnorable {
		done = true
	}
	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}

func nlTitle(c *context) bool {
	// From CLDR:
	// # Special titlecasing for Dutch initial "ij".
	// ::Any-Title();
	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
		return title(c)
	}

	if !c.writeString("I") || !c.next() {
		return false
	}
	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
		return c.writeString("J")
	}
	c.unreadRune()
	return true
}

// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
	if c.hasPrefix("'") || c.hasPrefix("’") {
		c.isMidWord = true
	}
}