update_emoji.pl 3.72 KB
Newer Older
1
2
3
4
5
6
7
#!/usr/bin/perl -w
#
# update_emoji.pl
#
# This script generates the emoji.plugin.zsh emoji definitions from the Unicode
# character data for the emoji characters.
#
Janosch Schwalm's avatar
Janosch Schwalm committed
8
9
# The data file can be found at https://unicode.org/Public/emoji/latest/emoji-data.txt
# as referenced in Unicode TR51 (https://www.unicode.org/reports/tr51/index.html).
10
11
12
13
14
15
#
# This is known to work with the data file from version 1.0. It may not work with later
# versions if the format changes. In particular, this reads line comments to get the
# emoji character name and unicode version.
#
# Country names have punctuation and other non-letter characters removed from their name,
Janosch Schwalm's avatar
Janosch Schwalm committed
16
# to avoid possible complications with having to escape the strings when using them as
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# array subscripts. The definition file seems to use some combining characters like accents
# that get stripped during this process.

use strict;
use warnings;
use 5.010;
use autodie;

use Path::Class;
use File::Copy;

# Parse definitions out of the data file and convert
sub process_emoji_data_file {
	my ( $infile, $outfilename ) = @_;
	my $file = file($infile);
	my $outfile = file($outfilename);
	my $outfilebase = $outfile->basename();
	my $tempfilename = "$outfilename.tmp";
	my $tempfile = file($tempfilename);
	my $outfh = $tempfile->openw();
	$outfh->print("
# $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin
#
# This file is auto-generated by update_emoji.pl. Do not edit it manually.
#
# This contains the definition for:
#   \$emoji         - which maps character names to Unicode characters
Janosch Schwalm's avatar
Janosch Schwalm committed
44
#   \$emoji_flags   - maps country names to Unicode flag characters using region indicators
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# Main emoji
typeset -gAH emoji
# National flags
typeset -gAH emoji_flags
# Combining modifiers
typeset -gAH emoji_mod

");

	my $fh = $file->openr();
	my $line_num = 0;
	while ( my $line = $fh->getline() ) {
		$line_num++;
		$_ = $line;
		# Skip all-comment lines (from the header) and blank lines
		# (But don't strip comments on normal lines; we need to parse those for
		# the emoji names.)
		next if /^\s*#/ or /^\s*$/;

		if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) {
Janosch Schwalm's avatar
Janosch Schwalm committed
66
			my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name)
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
				= ($1, $2, $3, $4, $5, $6, $7);
			#print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n";
			my @code_points = split /\s+/, $code;
			my @sources = split /\s+/, $sources;

			my $flag_country = "";
			if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) {
				$flag_country = $1;
			}

			my $zsh_code = join '', map { "\\U$_" } @code_points;
			# Convert keycap names to valid associative array names that do not require any
			# quoting. Works fine for most stuff, but is clumsy for flags.
			my $omz_name = lc($keycap_name);
			$omz_name =~ s/[^A-Za-z0-9]/_/g;
			my $zsh_flag_country = $flag_country;
			$zsh_flag_country =~ s/[^\p{Letter}]/_/g;
			if ($flag_country) {
				$outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n");
			} else {
Janosch Schwalm's avatar
Janosch Schwalm committed
87
				$outfh->print("emoji[$omz_name]=\$'$zsh_code'\n");
88
89
90
			}
			# Modifiers are included in both the main set and their separate map,
			# because they have a standalone representation as a color swatch.
91
			if ( $modifier_status eq "modifier" ) {
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
				$outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n");
			}
		} else {
			die "Failed parsing line $line_num: '$_'";
		}
	}
	$fh->close();
	$outfh->print("\n");
	$outfh->close();

	move($tempfilename, $outfilename)
		or die "Failed moving temp file to $outfilename: $!";
}

my $datafile = "emoji-data.txt";
my $zsh_def_file = "emoji-char-definitions.zsh";
process_emoji_data_file($datafile, $zsh_def_file);

print "Updated definition file $zsh_def_file\n";