From c2ecaf87e462fd6600ce5ee6a29cb352de042714 Mon Sep 17 00:00:00 2001 From: Philip Wittamore Date: Wed, 24 Sep 2025 22:26:00 +0200 Subject: update --- scripts/tbl2html.pl | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100755 scripts/tbl2html.pl (limited to 'scripts/tbl2html.pl') diff --git a/scripts/tbl2html.pl b/scripts/tbl2html.pl new file mode 100755 index 0000000..ba8ae09 --- /dev/null +++ b/scripts/tbl2html.pl @@ -0,0 +1,364 @@ +#!/usr/bin/perl -w +# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ +# +# Translate one or more ".tbl" files into ".html" files which can be used to +# test the charset support in lynx. Each of the ".html" files will use the +# charset that corresponds to the input ".tbl" file. + +use strict; + +use Getopt::Std; +use File::Basename; +use POSIX qw(strtod); + +sub field($$) { + my $value = $_[0]; + my $count = $_[1]; + + while ( $count > 0 ) { + $count -= 1; + $value =~ s/^\S*\s*//; + } + $value =~ s/\s.*//; + return $value; +} + +sub notes($) { + my $value = $_[0]; + + $value =~ s/^[^#]*//; + $value =~ s/^#//; + $value =~ s/^\s+//; + + return $value; +} + +sub make_header($$$) { + my $source = $_[0]; + my $charset = $_[1]; + my $official = $_[2]; + + printf FP "\n"; + printf FP "\n"; + printf FP "\n"; + printf FP "\n"; + printf FP "%s table\n", &escaped($official); + printf FP "\n", &escaped($charset); + printf FP "\n"; + printf FP "\n"; + printf FP " \n"; + printf FP "\n"; + printf FP "

%s table

\n", &escaped($charset); + printf FP "\n"; + printf FP "
\n";
+	printf FP "Code  Char  Entity   Render          Description\n";
+}
+
+sub make_mark() {
+	printf FP "----  ----  ------   ------          -----------------------------------\n";
+}
+
+sub escaped($) {
+	my $result = $_[0];
+	$result =~ s/&/&/g;
+	$result =~ s//>/g;
+	return $result;
+}
+
+sub make_row($$$) {
+	my $old_code = $_[0];
+	my $new_code = $_[1];
+	my $comments = $_[2];
+
+	# printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
+	my $visible = sprintf("&#%d;      ", $new_code);
+	if ($old_code < 256) {
+		printf FP "%4x    %c   %.13s  &#%d;             %s\n",
+			$old_code, $old_code,
+			$visible, $new_code,
+			&escaped($comments);
+	} else {
+		printf FP "%4x    .   %.13s  &#%d;             %s\n",
+			$old_code,
+			$visible, $new_code,
+			&escaped($comments);
+	}
+}
+
+sub null_row($$) {
+	my $old_code = $_[0];
+	my $comments = $_[1];
+
+	if ($old_code < 256) {
+		printf FP "%4x    %c                     %s\n",
+			$old_code, $old_code,
+			&escaped($comments);
+	} else {
+		printf FP "%4x    .                     %s\n",
+			$old_code,
+			&escaped($comments);
+	}
+}
+
+sub make_footer() {
+	printf FP "
\n"; + printf FP "\n"; + printf FP "\n"; +} + +# return true if the string describes a range +sub is_range($) { + return ($_[0] =~ /.*-.*/); +} + +# convert the U+'s to 0x's so strtod() can convert them. +sub zeroxes($) { + my $result = $_[0]; + $result =~ s/^U\+/0x/; + $result =~ s/-U\+/-0x/; + return $result; +} + +# convert a string to a number (-1's are outside the range of Unicode). +sub value_of($) { + my ($result, $oops) = strtod($_[0]); + $result = -1 if ($oops ne 0); + return $result; +} + +# return the first number in a range +sub first_of($) { + my $range = &zeroxes($_[0]); + $range =~ s/-.*//; + return &value_of($range); +} + +# return the last number in a range +sub last_of($) { + my $range = &zeroxes($_[0]); + $range =~ s/^.*-//; + return &value_of($range); +} + +sub one_many($$$) { + my $oldcode = $_[0]; + my $newcode = &zeroxes($_[1]); + my $comment = $_[2]; + + my $old_code = &value_of($oldcode); + if ( $old_code lt 0 ) { + printf "? Problem with number \"%s\"\n", $oldcode; + } else { + &make_mark if (( $old_code % 8 ) == 0 ); + + if ( $newcode =~ /^#.*/ ) { + &null_row($old_code, $comment); + } elsif ( &is_range($newcode) ) { + my $first_item = &first_of($newcode); + my $last_item = &last_of($newcode); + my $item; + + if ( $first_item lt 0 or $last_item lt 0 ) { + printf "? Problem with one:many numbers \"%s\"\n", $newcode; + } else { + if ( $comment =~ /^$/ ) { + $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item); + } else { + $comment = $comment . " (range)"; + } + for $item ( $first_item..$last_item) { + &make_row($old_code, $item, $comment); + } + } + } else { + my $new_code = &value_of($newcode); + if ( $new_code lt 0 ) { + printf "? Problem with number \"%s\"\n", $newcode; + } else { + if ( $comment =~ /^$/ ) { + $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code); + } + &make_row($old_code, $new_code, $comment); + } + } + } +} + +sub many_many($$$) { + my $oldcode = $_[0]; + my $newcode = $_[1]; + my $comment = $_[2]; + + my $first_old = &first_of($oldcode); + my $last_old = &last_of($oldcode); + my $item; + + if (&is_range($newcode)) { + my $first_new = &first_of($newcode); + my $last_new = &last_of($newcode); + for $item ( $first_old..$last_old) { + &one_many($item, $first_new, $comment); + $first_new += 1; + } + } else { + for $item ( $first_old..$last_old) { + &one_many($item, $newcode, $comment); + } + } +} + +sub approximate($$$) { + my $values = $_[0]; + my $expect = sprintf("%-8s", $_[1]); + my $comment = $_[2]; + my $escaped = &escaped($expect); + my $left; + my $this; + my $next; + + $escaped =~ s/\\134/\\/g; + $escaped =~ s/\\015/\ \;/g; + $escaped =~ s/\\012/\ \;/g; + + while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) { + $left = $escaped; + $left =~ s/\\[0-7]{3}.*//; + $this = substr $escaped,length($left)+1,3; + $next = substr $escaped,length($left)+4; + $escaped = sprintf("%s&#%d;%s", $left, oct $this, $next); + } + + my $visible = sprintf("&#%d; ", $values); + if ($values < 256) { + printf FP "%4x %c %.13s &#%d; approx: %s\n", + $values, $values, + $visible, + $values, + $escaped; + } else { + printf FP "%4x . %.13s &#%d; approx: %s\n", + $values, + $visible, + $values, + $escaped; + } +} + +sub doit($) { + my $source = $_[0]; + + printf "** %s\n", $source; + + my $target = basename($source, ".tbl"); + + # Read the file into an array in memory. + open(FP,$source) || do { + print STDERR "Can't open input $source: $!\n"; + return; + }; + my (@input) = ; + chomp @input; + close(FP); + + my $n; + my $charset = ""; + my $official = ""; + my $empty = 1; + + for $n (0..$#input) { + $input[$n] =~ s/\s*$//; # trim trailing blanks + $input[$n] =~ s/^\s*//; # trim leading blanks + $input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff + + next if $input[$n] =~ /^$/; + next if $input[$n] =~ /^#.*$/; + + if ( $empty + and ( $input[$n] =~ /^\d/ + or $input[$n] =~ /^U\+/ ) ) { + $target = $charset . ".html"; + printf "=> %s\n", $target; + open(FP,">$target") || do { + print STDERR "Can't open output $target: $!\n"; + return; + }; + &make_header($source, $charset, $official); + $empty = 0; + } + + if ( $input[$n] =~ /^M.*/ ) { + $charset = $input[$n]; + $charset =~ s/^.//; + } elsif ( $input[$n] =~ /^O.*/ ) { + $official = $input[$n]; + $official =~ s/^.//; + } elsif ( $input[$n] =~ /^\d/ ) { + + my $newcode = &field($input[$n], 1); + + next if ( $newcode eq "idem" ); + next if ( $newcode eq "" ); + + my $oldcode = &field($input[$n], 0); + if ( &is_range($oldcode) ) { + &many_many($oldcode, $newcode, ¬es($input[$n])); + } else { + &one_many($oldcode, $newcode, ¬es($input[$n])); + } + } elsif ( $input[$n] =~ /^U\+/ ) { + if ( $input[$n] =~ /^U\+\w+:/ ) { + my $values = $input[$n]; + my $expect = $input[$n]; + + $values =~ s/:.*//; + $values = &zeroxes($values); + $expect =~ s/^[^:]+://; + + if ( &is_range($values) ) { + printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect; + } else { + &approximate(&value_of($values), $expect, ¬es($input[$n])); + } + } else { + my $value = $input[$n]; + $value =~ s/\s*".*//; + $value = &value_of(&zeroxes($value)); + if ($value gt 0) { + my $quote = $input[$n]; + my $comment = ¬es($input[$n]); + $quote =~ s/^[^"]*"//; + $quote =~ s/".*//; + &approximate($value, $quote, $comment); + } else { + printf "fixme:%d(%s)\n", $n, $input[$n]; + } + } + } else { + # printf "skipping line %d:%s\n", $n + 1, $input[$n]; + } + } + if ( ! $empty ) { + &make_footer(); + } + close FP; +} + +sub usage() { + print <= 0 ) { + &doit ( shift @ARGV ); + } +} +exit (0); -- cgit v1.2.3