#!/usr/bin/perl

use strict;
use warnings;

# ----------------------------------------
# hardcoded config for help-map

# ignore these pages (they are linked too frequently)
my %ignore = (
              'glossary' => 1,
              'arb' => 1,
             );

my $start_from = 'pa_optimizer'; # start-page
my $depth      = 3; # show pages reachable by that many links (both link directions)

# ----------------------------------------


sub read_xml($);
sub read_xml($) {
  my ($xml_dir) = @_;

  my @xml = ();
  my @sub = ();

  opendir(DIR,$xml_dir) || die "Failed to read '$xml_dir' (Reason: $!)";
  foreach (readdir(DIR)) {
    if ($_ ne '.' and $_ ne '..') {
      my $full = $xml_dir.'/'.$_;
      if (-d $full) {
        push @sub, $_;
      }
      elsif (/\.xml$/o) {
        push @xml, $_;
      }
    }
  }
  closedir(DIR);

  foreach my $sub (@sub) {
    my @subxml = read_xml($xml_dir.'/'.$sub);
    foreach (@subxml) {
      push @xml, $sub.'/'.$_;
    }
  }

  return @xml;
}

sub print_index(\@) {
  my ($xml_r) = @_;

  my $header=<<HEADER;
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE PAGE SYSTEM 'arb_help.dtd' [
  <!ENTITY nbsp "&#160;">
  <!ENTITY acute "&#180;">
  <!ENTITY eacute "&#233;">
  <!ENTITY apostr "&#39;">
  <!ENTITY semi "&#59;">
]>
<!-- This file has been generated by ../generate_index.pl -->
<PAGE name="help_index" edit_warning="devel" source="generate_index.pl">
  <TITLE>ARB help index</TITLE>
  <SECTION name="List of existing helpfiles">
    <LIST>
      <ENTRY>
HEADER
  my $footer=<<FOOTER;
      </ENTRY>
    </LIST>
  </SECTION>
</PAGE>
FOOTER

  print $header;
  foreach my $xml (@$xml_r) {
    my $hlp  = $xml;
    $hlp =~ s/\.xml$/\.hlp/o;
    my $link = '        <T reflow="0"><LINK dest="'.$hlp.'" type="hlp" quoted="0"/></T>';
    print $link."\n";
  }
  print $footer;

}

sub find_indexed_xmls($$) {
  my ($index_name,$xml_dir) = @_;

  my @xml = read_xml($xml_dir);
  @xml = sort map {
    if ($_ eq $index_name) { ; } # dont index the index
    else { $_; }
  } @xml;
  return @xml;
}

my %link = (); # key='from>to' value=bitvalue(1=uplink,2=sublink)

sub storeLink($$$) {
  my ($from,$to,$us) = @_;
  # print STDERR "storeLink from='$from' to='$to' us='$us'\n";

  my $concat = $from.' '.$to;
  die "invalid char '>' in '$concat'" if $concat =~ />/o;

  my $bit = 0;
  if ($us eq 'UP') { $bit = 1; }
  elsif ($us eq 'SUB') { $bit = 2; }

  my $key = $from.'>'.$to;
  my $val = $link{$key};
  if (not defined $val) { $val = 0; }

  $val = $val | $bit;
  $link{$key} = $val;
}

my %title_line = (); # key=xml-filename, value=lineno of <TITLE>..
my %source_file = (); # key=xml-filename, value=source/filename.hlp

sub parse_titles($\@\%) {
  my ($xml_dir,$xml_r, $title_r) = @_;
  foreach my $name (@$xml_r) {
    my $xml = $xml_dir.'/'.$name;
    open(FILE,'<'.$xml) || die "can't read '$xml' (Reason: $!)";

    my $namePlain = $name;
    if ($namePlain=~ /\.xml$/o) { $namePlain = $`; }

    my $line;
  LINE: while (defined($line=<FILE>)) {
      if ($line =~ /<TITLE>(.*)<\/TITLE>/o) {
        $$title_r{$name} = $1;
        $title_line{$name} = $.;
        last LINE; # TITLE occurs behind UP/SUB links -> done here
      }
      if ($line =~ /<(UP|SUB)\s+dest=/o) {
        my $us = $1;
        if ($line =~ /"(.*)"\s+type="(.*)"/o) {
          my ($dest,$type) = ($1,$2);
          if ($dest =~ /\.hlp$/o) { $dest = $`; }
          storeLink($namePlain,$dest,$us);
        }
      }
      if ($line =~ /<PAGE.*source=/o) {
        my $rest = $';
        if ($rest =~ /\"([^\"]*)\"/o) {
          my $source = $1;
          $source_file{$name} = $source;
        }
      }
    }
    close(FILE);

    if (not defined $$title_r{$name}) {
      die "$xml:1: Failed to parse title\n ";
    }
  }
}

sub warn_duplicate_titles($$$\%) {
  my ($xml_dir, $hlp_dest_dir, $html_dest_dir, $title_r) = @_;
  my $hlpdir = $xml_dir;
  my %seen = ();
  foreach my $file (keys %$title_r) {
    my $title = $$title_r{$file};
    if (defined $seen{$title}) {
      my $firstFile = $seen{$title};
      my $thisLine  = $title_line{$file};
      my $firstLine = $title_line{$firstFile};

      print STDERR "${xml_dir}/${file}:${thisLine}: Warning: duplicated title '$title' ..\n";
      print STDERR "${xml_dir}/${firstFile}:${firstLine}: Warning: .. first seen here.\n";

      my $src2 = $source_file{$file};
      my $src1 = $source_file{$firstFile};
      my $auto_del = undef;
      my $miss_src = undef;

      if (-f $src1) {
        if (not -f $src2) {
          # src2 is missing -> auto-delete $file
          $miss_src = $src2;
          $auto_del = "${xml_dir}/${file}";
        }
      }
      elsif (-f $src2) {
        if (not -f $src1) {
          # src1 is missing -> auto-delete $firstFile
          $miss_src = $src1;
          $auto_del = "${xml_dir}/${firstFile}";
        }
      }

      # print STDERR "src1:     $src1\n";
      # print STDERR "src2:     $src2\n";
      # print STDERR "auto_del: $auto_del\n";
      # print STDERR "miss_src: $miss_src\n";

      if (defined $miss_src) {
        die 'internal error' if not defined $auto_del;
        print STDERR "Warning: source '${miss_src}' has disappeared\n";

        my @auto_deletes = ( $auto_del );

        my $name = undef;
        if ($auto_del =~ /^${xml_dir}/) {
          my $nameExt = $';
          if ($nameExt =~ /\.xml$/o) {
            $name = $`;
          }
        }
        defined $name || die "failed to detect name of generated help page from '$auto_del'";

        my $gen_hlp  = $hlp_dest_dir.$name.'.hlp';
        my $gen_html = $html_dest_dir.$name.'.html';
        if (-f $gen_hlp) { push @auto_deletes, $gen_hlp; } else { print "no such file: $gen_hlp (while checking for relict)\n"; }
        if (-f $gen_html) { push @auto_deletes, $gen_html; } else { print "no such file: $gen_html (while checking for relict)\n"; }

        foreach (@auto_deletes) {
          print STDERR "Note:    auto-deleting '${_}'\n";
          if (not unlink($_)) { print STDERR "Failed to auto-delete '$_' (Reason: $!)\n"; }
        }
      }
    }
    else {
      $seen{$title} = $file;
    }
  }
}

sub generate_index($$$$) {
  my ($index_name, $xml_dir, $hlp_dest_dir, $html_dest_dir) = @_;

  my @xml   = find_indexed_xmls($index_name,$xml_dir);
  my %title = ();
  parse_titles($xml_dir,@xml,%title);

  warn_duplicate_titles($xml_dir, $hlp_dest_dir, $html_dest_dir,%title);

  @xml = sort { $title{$a} cmp $title{$b}; } @xml;

  print_index(@xml);
}

sub dot_label($) {
  my ($target) = @_;
  return '"'.$target.'"';
}

sub generate_map($) {
  my ($map_name) = @_;

  # my $maxsize = 17; # inch
  # my $maxsize = 20; # inch
  # my $maxsize = 40; # inch
  my $maxsize = 80; # inch
  open(DOT,'>'.$map_name) || die "can't write '$map_name' (Reason: $!)";

  print DOT "digraph ARBHELPDEP {\n";
  # print DOT "  rankdir=LR;\n";
  print DOT "  concentrate=true;\n";
  print DOT "  searchsize=1000;\n";
  print DOT "  Damping=2.0;\n";
  print DOT "  size=\"$maxsize,$maxsize\";\n";
  # print DOT "  orientation=portrait;\n";
  print DOT "\n";

  my %use = ( $start_from => 1 );

  my $added = 1;
  while ($added==1) {
    $added = 0;
    foreach (keys %link) {
      die if (not $_ =~ />/o);
      my ($from,$to) = ($`,$');

      die "helpfile '$to' links to itself" if ($to eq $from);

      if (exists $use{$from}) {
        my $next = $use{$from}+1;
        if (($next<=$depth) and ((not exists $use{$to}) or ($use{$to}>$next)) and (not $ignore{$to})) {
          $use{$to} = $next;
          # print STDERR "'$to' set to $next (triggered by '$from' with use=".$use{$from}.")\n";
          $added = 1;
        }
      }
      if (exists $use{$to}) {
        my $next = $use{$to}+1;
        if (($next<=$depth) and ((not exists $use{$from}) or ($use{$from}>$next)) and (not $ignore{$from})) {
          $use{$from} = $next;
          # print STDERR "'$from' set to $next (triggered by '$to' with use=".$use{$to}.")\n";
          $added = 1;
        }
      }
    }
  }

  foreach (keys %link) {
    die if (not $_ =~ />/o);
    my ($from,$to) = ($`,$');
    if ((not exists $ignore{$from}) and (not exists $ignore{$to})) {
      if ((exists $use{$from}) and (exists $use{$to})) {
        ($from,$to) = (dot_label($from.'['.$use{$from}.']'),dot_label($to.'['.$use{$to}.']'));
        print DOT '    '.$from.' -> '.$to.';'."\n";
      }
    }
  }
  print DOT "}\n";
  close(DOT);
}

sub main() {
  my $args = scalar(@ARGV);
  if ($args != 5) {
    print "Usage: generate_index.pl XMLDIRECTORY HLPDESTDIR HTMLDESTDIR NAME_OF_INDEX.xml MAP.dot\n";
    print "Scans for xml-helpfiles in and below XMLDIRECTORY.\n";
    print "Generates\n";
    print "- list of all found helpfiles to STDOUT (assuming it is piped to NAME_OF_INDEX.xml)\n";
    print "- a (partial) help-map in file MAP.dot\n";
    print "Note: this script also handles cleanup after renaming/deleting help-files\n";
    die "Error: invalid number of arguments";
  }

  my $xml_dir       = $ARGV[0];
  my $hlp_dest_dir  = $ARGV[1];
  my $html_dest_dir = $ARGV[2];
  my $index_name    = $ARGV[3];
  my $map_name      = $ARGV[4];

  if (not -d $xml_dir) { die "No such directory '$xml_dir'"; }
  if (not -d $hlp_dest_dir) { die "No such directory '$hlp_dest_dir'"; }
  if (not -d $html_dest_dir) { die "No such directory '$html_dest_dir'"; }

  generate_index($index_name, $xml_dir, $hlp_dest_dir, $html_dest_dir);
  generate_map($map_name);
}
main();
