https://github.com/mozilla/gecko-dev
Raw File
Tip revision: db407080c2299748eb528331ad8cfbfe14198a26 authored by ffxbld on 12 October 2011, 00:36:23 UTC
Added tag FENNEC_8_0b3_BUILD1 for changeset 9e5a89bbd98c. CLOSED TREE a=release
Tip revision: db40708
genignorable.pl
#!/usr/bin/perl 

open $f, 'UnicodeData-Latest.txt' or die $!;
while (<$f>) {
  @columns = split(/;/);
#  print "$columns[0] : $columns[1]\n";
  $names{hex($columns[0])} = $columns[1];
}
close $f;

open $f, 'DerivedCoreProperties.txt' or die $!;
$re = '[';
while (<$f>) {
  next unless /Default_Ignorable_Code_Point/;
  next unless /^([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))?/;

  ($start, $end) = (hex($1), hex($2));
  $end = $start unless $end;

  for ($c = $start; $c <= $end; $c++) {
    printf "0x%04X", $c;
    printf " // $names{$c}" if $names{$c};
    print "\n";
  }

  if (!$prevend || $start > $prevend + 1) {
    $re .= make_unicode_range($prevstart, $prevend) if $prevstart;
    $prevstart = $start;
  }
  $prevend = $end;
}
$re .= make_unicode_range($prevstart, $prevend).']';
print STDERR $re;
close $f;

sub make_unicode_range
{
  my ($start, $end) = @_;

  if ($start > 0xffff) {
    my $starths = ($start - 0x10000) >> 10 | 0xd800;
    my $startls = ($start - 0x10000) & 0x3ff | 0xdc00;
    my $endhs = ($end - 0x10000) >> 10 | 0xd800;
    my $endls = ($end - 0x10000) & 0x3ff | 0xdc00;
    if ($starths == $endhs) {
      return sprintf("]|\\u%04x[\\u%04x-\\u%04x", $starths, $startls, $endls)
    }
    my $re = '';
    if ($startls > 0xdc00) {
      $re .= sprintf("]|\\u%04x[\\u%04x-\\udfff", $starths, $startls);
      $starths++;
    }
    if ($endhs > $starths) {
      $endhs-- if ($endls < 0xdfff);
      $re .= sprintf("]|[\\u%04x-\\u%04x][\\udc00-\\udfff", $starths, $endhs);
    }
    if ($endls < 0xdfff) {
      $re .= sprintf("]|\\u%04x[\\udc00-\\u%04x", $endhs, $endls);
    }
    return $re;
  } elsif ($start == $end) {
    return sprintf("\\u%04x", $start);
  } else {
    return sprintf("\\u%04x-\\u%04x", $start, $end);
  }
}
back to top