GIF89a;
1;
use constant FALSE => "";
use constant NOMATCHPOS => -1;
# A coderef to get combining class imported from Unicode::Normalize
# (i.e. \&Unicode::Normalize::getCombinClass).
# This is also used as a HAS_UNICODE_NORMALIZE flag.
my $CVgetCombinClass;
# Supported Levels
use constant MinLevel => 1;
use constant MaxLevel => 4;
# Minimum weights at level 2 and 3, respectively
use constant Min2Wt => 0x20;
use constant Min3Wt => 0x02;
# Shifted weight at 4th level
use constant Shift4Wt => 0xFFFF;
# A boolean for Variable and 16-bit weights at 4 levels of Collation Element
# PROBLEM: The Default Unicode Collation Element Table
# has weights over 0xFFFF at the 4th level.
# The tie-breaking in the variable weights
# other than "shift" (as well as "shift-trimmed") is unreliable.
use constant VCE_TEMPLATE => 'Cn4';
# A sort key: 16-bit weights
# See also the PROBLEM on VCE_TEMPLATE above.
use constant KEY_TEMPLATE => 'n*';
# Level separator in a sort key:
# i.e. pack(KEY_TEMPLATE, 0)
use constant LEVEL_SEP => "\0\0";
# As Unicode code point separator for hash keys.
# A joined code point string (denoted by JCPS below)
# like "65;768" is used for internal processing
# instead of Perl's Unicode string like "\x41\x{300}",
# as the native code point is different from the Unicode code point
# on EBCDIC platform.
# This character must not be included in any stringified
# representation of an integer.
use constant CODE_SEP => ';';
# NOTE: in regex /;/ is used for $jcps!
# boolean values of variable weights
use constant NON_VAR => 0; # Non-Variable character
use constant VAR => 1; # Variable character
# specific code points
use constant Hangul_SIni => 0xAC00;
use constant Hangul_SFin => 0xD7A3;
# Logical_Order_Exception in PropList.txt
my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
sub UCA_Version { "24" }
sub Base_Unicode_Version { "6.1.0" }
######
sub pack_U {
return pack('U*', @_);
}
######
my (%VariableOK);
@VariableOK{ qw/
blanked non-ignorable shifted shift-trimmed
/ } = (); # keys lowercased
our @ChangeOK = qw/
alternate backwards level normalization rearrange
katakana_before_hiragana upper_before_lower ignore_level2
overrideHangul overrideCJK preprocess UCA_Version
hangul_terminator variable
/;
our @ChangeNG = qw/
entry mapping table maxlength contraction
ignoreChar ignoreName undefChar undefName rewrite
versionTable alternateTable backwardsTable forwardsTable
rearrangeTable variableTable
derivCode normCode rearrangeHash backwardsFlag
suppress suppressHash
__useXS /; ### XS only
# The hash key 'ignored' is deleted at v 0.21.
# The hash key 'isShift' is deleted at v 0.23.
# The hash key 'combining' is deleted at v 0.24.
# The hash key 'entries' is deleted at v 0.30.
# The hash key 'L3_ignorable' is deleted at v 0.40.
sub version {
my $self = shift;
return $self->{versionTable} || 'unknown';
}
my (%ChangeOK, %ChangeNG);
@ChangeOK{ @ChangeOK } = ();
@ChangeNG{ @ChangeNG } = ();
sub change {
my $self = shift;
my %hash = @_;
my %old;
if (exists $hash{variable} && exists $hash{alternate}) {
delete $hash{alternate};
}
elsif (!exists $hash{variable} && exists $hash{alternate}) {
$hash{variable} = $hash{alternate};
}
foreach my $k (keys %hash) {
if (exists $ChangeOK{$k}) {
$old{$k} = $self->{$k};
$self->{$k} = $hash{$k};
}
elsif (exists $ChangeNG{$k}) {
croak "change of $k via change() is not allowed!";
}
# else => ignored
}
$self->checkCollator();
return wantarray ? %old : $self;
}
sub _checkLevel {
my $level = shift;
my $key = shift; # 'level' or 'backwards'
MinLevel <= $level or croak sprintf
"Illegal level %d (in value for key '%s') lower than %d.",
$level, $key, MinLevel;
$level <= MaxLevel or croak sprintf
"Unsupported level %d (in value for key '%s') higher than %d.",
$level, $key, MaxLevel;
}
my %DerivCode = (
8 => \&_derivCE_8,
9 => \&_derivCE_9,
11 => \&_derivCE_9, # 11 == 9
14 => \&_derivCE_14,
16 => \&_derivCE_14, # 16 == 14
18 => \&_derivCE_18,
20 => \&_derivCE_20,
22 => \&_derivCE_22,
24 => \&_derivCE_24,
);
sub checkCollator {
my $self = shift;
_checkLevel($self->{level}, "level");
$self->{derivCode} = $DerivCode{ $self->{UCA_Version} }
or croak "Illegal UCA version (passed $self->{UCA_Version}).";
$self->{variable} ||= $self->{alternate} || $self->{variableTable} ||
$self->{alternateTable} || 'shifted';
$self->{variable} = $self->{alternate} = lc($self->{variable});
exists $VariableOK{ $self->{variable} }
or croak "$PACKAGE unknown variable parameter name: $self->{variable}";
if (! defined $self->{backwards}) {
$self->{backwardsFlag} = 0;
}
elsif (! ref $self->{backwards}) {
_checkLevel($self->{backwards}, "backwards");
$self->{backwardsFlag} = 1 << $self->{backwards};
}
else {
my %level;
$self->{backwardsFlag} = 0;
for my $b (@{ $self->{backwards} }) {
_checkLevel($b, "backwards");
$level{$b} = 1;
}
for my $v (sort keys %level) {
$self->{backwardsFlag} += 1 << $v;
}
}
defined $self->{rearrange} or $self->{rearrange} = [];
ref $self->{rearrange}
or croak "$PACKAGE: list for rearrangement must be store in ARRAYREF";
# keys of $self->{rearrangeHash} are $self->{rearrange}.
$self->{rearrangeHash} = undef;
if (@{ $self->{rearrange} }) {
@{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = ();
}
$self->{normCode} = undef;
if (defined $self->{normalization}) {
eval { require Unicode::Normalize };
$@ and croak "Unicode::Normalize is required to normalize strings";
$CVgetCombinClass ||= \&Unicode::Normalize::getCombinClass;
if ($self->{normalization} =~ /^(?:NF)D\z/) { # tweak for default
$self->{normCode} = \&Unicode::Normalize::NFD;
}
elsif ($self->{normalization} ne 'prenormalized') {
my $norm = $self->{normalization};
$self->{normCode} = sub {
Unicode::Normalize::normalize($norm, shift);
};
eval { $self->{normCode}->("") }; # try
$@ and croak "$PACKAGE unknown normalization form name: $norm";
}
}
return;
}
sub new
{
my $class = shift;
my $self = bless { @_ }, $class;
### begin XS only ###
if (! exists $self->{table} && !defined $self->{rewrite} &&
!defined $self->{undefName} && !defined $self->{ignoreName} &&
!defined $self->{undefChar} && !defined $self->{ignoreChar}) {
$self->{__useXS} = \&_fetch_simple;
} else {
$self->{__useXS} = undef;
}
### end XS only ###
# keys of $self->{suppressHash} are $self->{suppress}.
if ($self->{suppress} && @{ $self->{suppress} }) {
@{ $self->{suppressHash} }{ @{ $self->{suppress} } } = ();
} # before read_table()
# If undef is passed explicitly, no file is read.
$self->{table} = $KeyFile if ! exists $self->{table};
$self->read_table() if defined $self->{table};
if ($self->{entry}) {
while ($self->{entry} =~ /([^\n]+)/g) {
$self->parseEntry($1, TRUE);
}
}
$self->{level} ||= MaxLevel;
$self->{UCA_Version} ||= UCA_Version();
$self->{overrideHangul} = FALSE
if ! exists $self->{overrideHangul};
$self->{overrideCJK} = FALSE
if ! exists $self->{overrideCJK};
$self->{normalization} = 'NFD'
if ! exists $self->{normalization};
$self->{rearrange} = $self->{rearrangeTable} ||
($self->{UCA_Version} <= 11 ? $DefaultRearrange : [])
if ! exists $self->{rearrange};
$self->{backwards} = $self->{backwardsTable}
if ! exists $self->{backwards};
$self->checkCollator();
return $self;
}
sub parseAtmark {
my $self = shift;
my $line = shift; # after s/^\s*\@//
if ($line =~ /^version\s*(\S*)/) {
$self->{versionTable} ||= $1;
}
elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9
$self->{variableTable} ||= $1;
}
elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8
$self->{alternateTable} ||= $1;
}
elsif ($line =~ /^backwards\s+(\S*)/) {
push @{ $self->{backwardsTable} }, $1;
}
elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use
push @{ $self->{forwardsTable} }, $1;
}
elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG
push @{ $self->{rearrangeTable} }, _getHexArray($1);
}
}
sub read_table {
my $self = shift;
### begin XS only ###
if ($self->{__useXS}) {
my @rest = _fetch_rest(); # complex matter need to parse
for my $line (@rest) {
next if $line =~ /^\s*#/;
if ($line =~ s/^\s*\@//) {
$self->parseAtmark($line);
} else {
$self->parseEntry($line);
}
}
return;
}
### end XS only ###
my($f, $fh);
foreach my $d (@INC) {
$f = File::Spec->catfile($d, @Path, $self->{table});
last if open($fh, $f);
$f = undef;
}
if (!defined $f) {
$f = File::Spec->catfile(@Path, $self->{table});
croak("$PACKAGE: Can't locate $f in \@INC (\@INC contains: @INC)");
}
while (my $line = <$fh>) {
next if $line =~ /^\s*#/;
if ($line =~ s/^\s*\@//) {
$self->parseAtmark($line);
} else {
$self->parseEntry($line);
}
}
close $fh;
}
##
## get $line, parse it, and write an entry in $self
##
sub parseEntry
{
my $self = shift;
my $line = shift;
my $tailoring = shift;
my($name, $entry, @uv, @key);
if (defined $self->{rewrite}) {
$line = $self->{rewrite}->($line);
}
return if $line !~ /^\s*[0-9A-Fa-f]/;
# removes comment and gets name
$name = $1
if $line =~ s/[#%]\s*(.*)//;
return if defined $self->{undefName} && $name =~ /$self->{undefName}/;
# gets element
my($e, $k) = split /;/, $line;
croak "Wrong Entry: