context.go 4.85 KB
Newer Older
Jeromy's avatar
Jeromy committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cases

import (
	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/transform"
)

// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
//
// Casing operations may need more than one rune of context to decide how a rune
// should be cased. Casing implementations should call checkpoint on context
// whenever it is known to be safe to return the runes processed so far.
//
// It is recommended for implementations to not allow for more than 30 case
// ignorables as lookahead (analogous to the limit in norm) and to use state if
// unbounded lookahead is needed for cased runes.
type context struct {
	dst, src []byte
	atEOF    bool

	pDst int // pDst points past the last written rune in dst.
	pSrc int // pSrc points to the start of the currently scanned rune.

	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
	nDst, nSrc int
	err        error

	sz   int  // size of current rune
	info info // case information of currently scanned rune

	// State preserved across calls to Transform.
	isMidWord bool // false if next cased letter needs to be title-cased.
}

func (c *context) Reset() {
	c.isMidWord = false
}

// ret returns the return values for the Transform method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) ret() (nDst, nSrc int, err error) {
	if c.err != nil || c.nSrc == len(c.src) {
		return c.nDst, c.nSrc, c.err
	}
	// This point is only reached by mappers if there was no short destination
	// buffer. This means that the source buffer was exhausted and that c.sz was
	// set to 0 by next.
	if c.atEOF && c.pSrc == len(c.src) {
		return c.pDst, c.pSrc, nil
	}
	return c.nDst, c.nSrc, transform.ErrShortSrc
}

// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
	if c.err == nil {
		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
	}
}

// unreadRune causes the last rune read by next to be reread on the next
// invocation of next. Only one unreadRune may be called after a call to next.
func (c *context) unreadRune() {
	c.sz = 0
}

func (c *context) next() bool {
	c.pSrc += c.sz
	if c.pSrc == len(c.src) || c.err != nil {
		c.info, c.sz = 0, 0
		return false
	}
	v, sz := trie.lookup(c.src[c.pSrc:])
	c.info, c.sz = info(v), sz
	if c.sz == 0 {
		if c.atEOF {
			// A zero size means we have an incomplete rune. If we are atEOF,
			// this means it is an illegal rune, which we will consume one
			// byte at a time.
			c.sz = 1
		} else {
			c.err = transform.ErrShortSrc
			return false
		}
	}
	return true
}

// writeBytes adds bytes to dst.
func (c *context) writeBytes(b []byte) bool {
	if len(c.dst)-c.pDst < len(b) {
		c.err = transform.ErrShortDst
		return false
	}
	// This loop is faster than using copy.
	for _, ch := range b {
		c.dst[c.pDst] = ch
		c.pDst++
	}
	return true
}

// writeString writes the given string to dst.
func (c *context) writeString(s string) bool {
	if len(c.dst)-c.pDst < len(s) {
		c.err = transform.ErrShortDst
		return false
	}
	// This loop is faster than using copy.
	for i := 0; i < len(s); i++ {
		c.dst[c.pDst] = s[i]
		c.pDst++
	}
	return true
}

// copy writes the current rune to dst.
func (c *context) copy() bool {
	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
}

// copyXOR copies the current rune to dst and modifies it by applying the XOR
// pattern of the case info. It is the responsibility of the caller to ensure
// that this is a rune with a XOR pattern defined.
func (c *context) copyXOR() bool {
	if !c.copy() {
		return false
	}
	if c.info&xorIndexBit == 0 {
		// Fast path for 6-bit XOR pattern, which covers most cases.
		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
	} else {
		// Interpret XOR bits as an index.
		// TODO: test performance for unrolling this loop. Verify that we have
		// at least two bytes and at most three.
		idx := c.info >> xorShift
		for p := c.pDst - 1; ; p-- {
			c.dst[p] ^= xorData[idx]
			idx--
			if xorData[idx] == 0 {
				break
			}
		}
	}
	return true
}

// hasPrefix returns true if src[pSrc:] starts with the given string.
func (c *context) hasPrefix(s string) bool {
	b := c.src[c.pSrc:]
	if len(b) < len(s) {
		return false
	}
	for i, c := range b[:len(s)] {
		if c != s[i] {
			return false
		}
	}
	return true
}

// caseType returns an info with only the case bits, normalized to either
// cLower, cUpper, cTitle or cUncased.
func (c *context) caseType() info {
	cm := c.info & 0x7
	if cm < 4 {
		return cm
	}
	if cm >= cXORCase {
		// xor the last bit of the rune with the case type bits.
		b := c.src[c.pSrc+c.sz-1]
		return info(b&1) ^ cm&0x3
	}
	if cm == cIgnorableCased {
		return cLower
	}
	return cUncased
}