cases.go 3.67 KB
Newer Older
Jeromy's avatar
Jeromy committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:generate go run gen.go gen_trieval.go

// Package cases provides general and language-specific case mappers.
package cases

import (
	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/language"
	"gx/QmVgdgtv5rUcWWcHvHN1vdGkXi8UNztJ7JuT72YUJgG2ic/go-text/transform"
)

// References:
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
// - http://www.unicode.org/reports/tr29/
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
// - http://userguide.icu-project.org/transforms/casemappings

// TODO:
// - Case folding
// - Wide and Narrow?
// - Segmenter option for title casing.
// - ASCII fast paths
// - Encode Soft-Dotted property within trie somehow.

// A Caser transforms given input to a certain case. It implements
// transform.Transformer.
//
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
	t transform.Transformer
}

// Bytes returns a new byte slice with the result of converting b to the case
// form implemented by c.
func (c Caser) Bytes(b []byte) []byte {
	b, _, _ = transform.Bytes(c.t, b)
	return b
}

// String returns a string with the result of transforming s to the case form
// implemented by c.
func (c Caser) String(s string) string {
	s, _, _ = transform.String(c.t, s)
	return s
}

// Reset resets the Caser to be reused for new input after a previous call to
// Transform.
func (c Caser) Reset() { c.t.Reset() }

// Transform implements the Transformer interface and transforms the given input
// to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	return c.t.Transform(dst, src, atEOF)
}

// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
	return Caser{makeUpper(t, getOpts(opts...))}
}

// Lower returns a Caser for language-specific lowercasing.
func Lower(t language.Tag, opts ...Option) Caser {
	return Caser{makeLower(t, getOpts(opts...))}
}

// Title returns a Caser for language-specific title casing. It uses an
// approximation of the default Unicode Word Break algorithm.
func Title(t language.Tag, opts ...Option) Caser {
	return Caser{makeTitle(t, getOpts(opts...))}
}

// Fold returns a Caser that implements Unicode case folding.
//
// Folding is like mapping to lowercase without context-dependent mappings. Its
// primary use is for caseless matching. Note that case folding does not
// normalize the input and may not preserve a normal form.
func Fold(t language.Tag, opts ...Option) Caser {
	panic("TODO: implement")
}

// An Option is used to modify the behavior of a Caser.
type Option func(o *options)

var (
	// NoLower disables the lowercasing of non-leading letters for a title
	// caser.
	NoLower Option = noLower

	// Compact omits mappings in case folding for characters that would grow the
	// input.
	Compact Option = compact
)

// TODO: option to preserve a normal form, if applicable?

type options struct {
	noLower bool
	simple  bool

	// TODO: segmenter, max ignorable, alternative versions, etc.

	noFinalSigma bool // Only used for testing.
}

func getOpts(o ...Option) (res options) {
	for _, f := range o {
		f(&res)
	}
	return
}

func noLower(o *options) {
	o.noLower = true
}

func compact(o *options) {
	o.simple = true
}