#!/usr/bin/perl -w
#
# HelpPC 2.10 to HTML Converter v1.00 by Timo Bingmann
#
# This Perl script will convert the HelpPC quick reference library by David
# Jurgens into XHTML 1.0 files. The reference library may be old, but many
# parts are still applicable to present-day PCs.
#
# One main goal as to retain the look and feel of the original HelpPC browser
# program, so CSS was used to recreate the original colors and layout as close
# as possible. The script uses Template::Toolkit with embedded text templates
# to output the HTML files. All output files are valid XHTML 1.0 Strict.
#
# Usage:
#
# Download the original helpc21.zip from simtel.net:
# ftp://ftp.simtel.net/pub/simtelnet/msdos/info/helppc21.zip
#
# Extract the zip file and run the Perl script in the path containing the .TXT
# files. The script will create a subdirectory "html" with 1014 html files.
# You can modify the nostalgic.css or the embedded templates to fit your taste.
#
# Copyright 2006 by Timo Bingmann (tb AT idlebox.net)
# Published on 2006-07-10 under the GNU General Public Licence v2 (GPLv2)
#
use strict;
use warnings;
use Template;
# helppc txt files are dos style
$/ = "\r\n";
### Input functions
# global data
my @filetitles; # contains the files and their titles
my @nodelist; # contains all text nodes
my
# of the node
my
# duplicate checking
my $currnode = -1;
# calculate the file name of a node: removes illegal characters
sub calcfilename {
my ($node) = @_;
my $title = $$node{title};
$title =~ tr/ ,/__/;
$title =~ s/[\(\).]//g;
return $$node{file}."-".$title.".html";
}
# find the node referenced by a keyword
sub makekeywordlink {
my ($kwbase) = @_;
foreach my $kw ($kwbase, $kwbase."s", "_".$kwbase)
{
my $idx = $keywordslc{lc $kw};
if (defined $idx) {
return calcfilename( $nodelist[$idx] );
}
}
# check if only one keyword is a substring match
my @submatch = grep({ index(lc($_), lc($kwbase)) >= 0 } keys
if (scalar(@submatch) == 1) {
return calcfilename( $nodelist[ $keywordslc{$submatch[0]} ] );
}
print(STDERR "missing cross-reference keyword: $kwbase\n");
return "";
}
# character from cp437 to unicode entitiy transformation map
my @charmap =
(
# 0/8 1/9 2/A 3/B 4/C 5/D 6/E 7/F
0x0020, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, # 0
0x25D8, 0x25E6, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C, # 8
0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, # 10
0x2191, 0x2193, 0x2192, 0x2190, 0x2310, 0x2194, 0x25B2, 0x25BC, # 18
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, # 20
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, # 28
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, # 30
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, # 38
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, # 40
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, # 48
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, # 50
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, # 58
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, # 60
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, # 68
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, # 70
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x0394, # 78
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, # 80
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, # 88
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, # 90
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, # 98
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, # A0
0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, # A8
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, # B0
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, # B8
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, # C0
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, # C8
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, # D0
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, # D8
0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x03BC, 0x03C4, # E0
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x2205, 0x2208, 0x2229, # E8
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, # F0
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, # F8
);
# convert one line of ascii text into html. add hyperlinks for ~title~
sub transformline {
my ($in, $hyperref) = @_;
my $out = "";
my $col = 0; # count column to calculate tabs
my $ws = 0; # count number of consecutive whitespaces
my $inref = 0;
my $reftext;
my
foreach my $l (split(//,$in))
{
if ($hyperref and $l eq "~") # beginning or end of a hyperlink
{
if (!$inref) { # beginning
$inref = 1;
$reftext = "";
}
else { # end
if ($reftext eq "") { # was a ~~
$out .= "~";
$col++;
}
else {
$out .= sprintf('<a href="%s">%s</a>',
makekeywordlink($reftext), $reftext);
$col += length($reftext);
}
$inref = 0;
}
}
elsif ($inref) {
$reftext .= $l;
next;
}
elsif ($l eq "\t")
{
# expand tab.
$out .= " " if $ws
$out .= " " if $ws
$col++;
$ws++;
while($col
$out .= " " if $ws
$out .= " " if $ws
$col++;
$ws++;
}
}
elsif ($l eq " ")
{
$out .= " " if $ws
$out .= " " if $ws
$col++;
$ws++;
}
elsif (defined $htmlescape{$l})
{
$out .= $htmlescape{$l};
$col++;
$ws = 0;
}
elsif (ord($l) == $charmap[ord($l)])
{
$out .= $l;
$col++;
$ws = 0;
}
else {
$out .= sprintf("&#%d;",$charmap[ord($l)]);
$col++;
$ws = 0;
}
}
return $out;
}
# convert a whole text node into html. handles centering for lines beginning
# with
sub transformtext {
my ($intext) = @_;
my $out = "";
foreach my $ln (split(/\n/, $intext))
{
my $firstchar = substr($ln, 0, 1);
$ln = (length($ln) > 0) ? substr($ln, 1) : "";
my $highlight = 0;
# just text
if ($firstchar eq "%") { # highlight line
$highlight = 1;
}
elsif ($firstchar eq "^") { # center and highlight line
# center to 80 chars per line
$ln = (" " x int((80 - length($ln))/2)) . $ln;
$highlight = 1;
}
else {
$ln = $firstchar.$ln;
}
$out .= '<span class="highlight">' if $highlight;
$out .= transformline($ln, 1);
$out .= '</span>' if $highlight;
$out .= "<br />\n";
}
return $out;
}
# load one of the helppc txt files into the global data
sub loadtxt {
my ($filename, $fileshort) = @_;
open(F, $filename) or die("Could not open $filename: $!");
my $filetitle;
my
while( my $ln = <F> )
{
chomp($ln);
my $firstchar = substr($ln, 0, 1);
$ln = (length($ln) > 0) ? substr($ln, 1) : "";
if ($firstchar eq "@")
{
print(STDERR "loading file: $ln\n");
$filetitle = $ln;
push(@filetitles, { file => $fileshort, title => $filetitle });
}
elsif ($firstchar eq ":")
{
my @keywords = split(/:/, $ln);
print(STDERR "node title: $keywords[0]\n");
#print(STDERR "node keywords: ".join(" = ", @keywords)."\n");
# save current node
if ($node{title}) {
push(@nodelist, {
}
$currnode++;
$currnode == scalar(@nodelist) or die();
foreach my $kw (@keywords) {
if (defined $keywordslc{lc $kw}) {
print(STDERR "duplicate keywords: $kw\n");
next;
}
$keywords{$kw} = scalar(@nodelist);
$keywordslc{lc $kw} = scalar(@nodelist);
}
(
title => $keywords[0],
keywords => [@keywords],
file => $fileshort,
text => "",
);
}
else
{
$node{text} .= $firstchar.$ln."\n";
}
}
if ($node{title}) {
push(@nodelist, {
}
close(F);
return $filetitle;
}
### Output functions using Template::Toolkit
# base tt layout used as WRAPPER
my $ttlayout = <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<meta content="text/html; charset=ISO-8859-1" http-equiv="content-type" />
<title>HelpPC 2.10 - [
<link type="text/css" rel="stylesheet" href="nostalgic.css" title="nostalgic" />
</head>
<body>
<!-- Converted from the original text files by helppc21-convert.pl by Timo Bingmann - http://idlebox.net/ -->
<table class="bodytable" summary="">
<tr class="top">
<td class="unimono" style="text-align: left; width: 30%"><a class="menuplain" href="index.html">HelpPC 2.10</a></td>
<td class="unimono" style="text-align: center; width: 30%"><a class="menuplain" href="index.html">Quick Reference Utility</a></td>
<td class="unimono" style="text-align: right; width: 40%">Copyright 1991 David Jurgens</td>
</tr>
<tr>
<td colspan="3">
<table style="margin-left: auto; margin-right: auto" summary="">
<tr>
<td class="unimono">
<br />[
</td>
</tr>
</table>
</td>
</tr>
<tr class="bottom">
<td class="unimono" style="text-align: left; width: 30%">Esc or Alt-X to exit</td>
<td class="unimono" style="text-align: center; width: 40%">[
<td class="unimono" style="text-align: right; width: 30%">Home/PgUp/PgDn/End ←↑↓→</td>
</tr>
</table>
<div class="converter"><a href="http://idlebox.net/2006/helppc21/">Converted to HTML in 2006 by Timo Bingmann</a></div>
</body>
</html>
EOF
# reuse the same template object
my $templateproc = Template->new({ TRIM => 1,
PRE_CHOMP => 1,
POST_CHOMP => 1,
BLOCKS => {
ttlayout => $ttlayout
},
});
# write index.html containing the file titles
sub writeindex {
my $tttext = <<EOF;
[
<br /><br /><br />
<span class="highlight"> Main Topic Menu</span><br />
<br />
[
<a class="menulink" href="[% file.link %]">[
<br />
[
<br /><br /><br /><br />
[
EOF
# add the link to the converted HELPPC.DOC on the index page
my $helppcdocentry = {
file => "helppcdoc",
title => "HELPPC.DOC Readme",
};
my @filelist;
foreach my $f (@filetitles, $helppcdocentry)
{
push(@filelist,
{
title => " ".transformline($$f{title}.(" " x (40 - length($$f{title})))),
link => "$$f{file}.html",
}
);
}
my $vars = { filelist => \@filelist };
$templateproc->process(\$tttext, $vars, "html/index.html")
or die $templateproc->error();
}
# write a node index file for each txt contain the keywords
sub writefileindex {
my ($filehash) = @_;
my $tttext = <<EOF;
[
[
<br />
[
<a class="menulink" href="[% topic.link %]">[
[
[
<br />
[
[
<br />
[
EOF
# this creates the link's text: some magic to get the hover bars right
sub titleconvert {
my ($t) = @_;
(length $t > 20) and $t = substr($t, 1, 20);
$t .= (" " x (20 - length($t)));
return " ".transformline($t);
}
my @topiclist;
foreach my $key (sort { lc($a) cmp lc($b) } keys
{
my $nodeid = $keywords{$key};
next if $nodelist[$nodeid]{file} ne $$filehash{file};
push(@topiclist,
{
title => titleconvert($key),
link => calcfilename($nodelist[$nodeid])
}
);
}
my $vars =
{
title => transformtext("^".$$filehash{title}),
topiclist => \@topiclist
};
$templateproc->process(\$tttext, $vars, "html/$$filehash{file}.html")
or die $templateproc->error();
}
# write a node text file. quite straight forward
sub writenode {
my ($node) = @_;
my $fn = calcfilename($node);
my $tttext = <<EOF;
[
[
[
EOF
my $vars = { node => $node };
$templateproc->process(\$tttext, $vars, "html/$fn")
or die $templateproc->error();
}
# write a plain text file. pipe it through transformline
sub writeplaintext {
my ($infile, $outfile, $title) = @_;
open(IF, $infile)
or die("Could not open plain text file $infile: $!");
my $text = "";
my $firsttitle = 1;
while( my $ln = <IF> ) {
chomp($ln);
# remove page feeds and page titles
($ln =~ /^\s*\x0C\s*$/) and next;
if ($ln =~ /HelpPC 2\.10 \s+ Quick Reference Utility \s+ Copyright \(c\) 1991, David Jurgens/)
{
if ($firsttitle) { $firsttitle = 0; } # write it the first time
else { next; }
}
$text .= transformline($ln, 0)."<br />\n";
}
close(IF);
my $tttext = <<EOF;
[
[
[
EOF
my $vars = { title => $title, text => $text };
$templateproc->process(\$tttext, $vars, "html/$outfile")
or die $templateproc->error();
}
# write the nostalgic.css into the html dir
sub writecss {
open(F, "> html/nostalgic.css")
or die("Could not open html/nostalgic.css: $!");
my $css = <<EOF;
body { color: white; background: black; margin: 0px }
table.bodytable { width: 100
table.bodytable tr.top td { background: #0000AC; color: #FFFFFF; font-weight: bold }
table.bodytable tr.bottom td { background: #0000AC; color: #FFFFFF; font-weight: bold }
.unimono { font-family: monospace; font-size: 12pt; letter-spacing: 0; line-height: 11.9pt; color: #dddddd }
.unimono span.highlight { color: #52FFFF; font-weight: bold; text-decoration: none }
.unimono a { color: #ffff52; text-decoration: none }
.unimono a:hover { color: #5255FF; background: #ACAAAC }
.unimono a.menuplain { color: #ffffff; text-decoration: none }
.unimono a.menuplain:hover { color: #0000AC; background: #FFFFFF }
.unimono a.menulink { color: #dddddd; text-decoration: none }
.unimono a.menulink:hover { color: #ffffff; background: #0000AC; font-weight: bold; }
div.converter { text-align: right; font-family: monospace; font-size: 9pt; padding-top: 0.3ex }
div.converter a { color: #2222FF; text-decoration: none }
div.converter a:visited { color: #2222FF }
EOF
print(F $css);
close(F);
}
### Main Program begins here
## load each of the following helppc format txt files
my @scanfiles =
(
["ASM.TXT" => "asm"],
["C.TXT" => "c"],
["HARDWARE.TXT" => "hw"],
["INTERRUP.TXT" => "int"],
["TABLES.TXT" => "table"],
["MISC.TXT" => "misc"],
);
foreach my $f (@scanfiles) {
loadtxt($$f[0], $$f[1]);
}
# check that node filenames are unique
my
if (scalar(keys(
die("Duplicate filenames!");
}
# calculate text transformation and hyperlinks
foreach my $n (@nodelist) {
$$n{text} = transformtext($$n{text});
}
## write the files
writeindex();
writecss();
writeplaintext("HELPPC.DOC", "helppcdoc.html", "HelpPC Readme");
foreach my $f (@filetitles) {
writefileindex($f);
}
foreach my $n (@nodelist) {
writenode($n);
}
print(STDERR "All done.\n");
__END__