#!/usr/bin/perl

use strict;
use warnings;

my $ARBHOME = $ENV{ARBHOME};
defined $ARBHOME || die "Expected environment variable ARBHOME to be defined";
-d $ARBHOME || die "Expected directory '$ARBHOME' (from environment variable ARBHOME)";

my $ARBPERLDIR = $ARBHOME.'/PERL2ARB';
-d $ARBPERLDIR || die "Expected directory '$ARBPERLDIR'";

my %method = (); # ARB::name or BIO::name (value=1)
my %name   = (); # key = method, value = name only

sub parsePerlMethods() {
  my $arb_c = $ARBPERLDIR.'/ARB.c';
  open(CSRC, '<'.$arb_c) || die "failed to read '$arb_c' (Reason: $!)";

  my $line;
  my $reg = qr/(newXSproto|newXSproto_portable).*\"(ARB|BIO)::([a-zA-Z0-9_]+)\"/o;
  while (defined($line=<CSRC>)) {
    if ($line =~ $reg) {
      my ($package,$name) = ($2,$3);
      my $method = $package.'::'.$name;
      if (exists $method{$method}) {
        die "duplicated method '$method'";
      }
      $method{$method} = 1;
      $name{$method} = $name;
    }
  }

  close(CSRC);
  if (scalar(keys %method)<1) {
    die "No 'ARB perl interface'-functions detected in '$arb_c' (parsing error?)";
  }
}

my %prototype = (); # key = c++ method name, value=full prototype OR 0 (if duplicated)
my %file      = (); # key = method name, value=filename OR undef (if duplicated)

sub parsePrototypes() {
  my $proto_h = $ARBPERLDIR.'/proto.h';
  open(PROTO, '<'.$proto_h) || die "failed to read '$proto_h' (Reason: $!)";

  my $line;
  my $reg_file_start = qr/\/\*\s*([^\s]+)\s*\*\//o;
  my $reg_prototype = qr/([a-zA-Z0-9_]+)\(/o;

  my $current_file = undef;

  while (defined($line=<PROTO>)) {
    if ($line =~ $reg_prototype) {
      my $name = $1;
      chomp($line);
      if (exists $prototype{$name}) {
        $prototype{$name} = 0; # duplicated function name
        $file{$name} = undef;
      }
      else {
        $prototype{$name} = $line;
        $file{$name} = $current_file;
      }
    }
    elsif ($line =~ $reg_file_start) {
      my ($file) = $1;
      if ($file =~ /\.cxx$/o) { $current_file = $file; }
      # else { print "ignored file '$file'\n"; }
    }
  }

  close(PROTO);
  if (scalar(keys %prototype)<1) {
    die "No 'ARB perl interface'-functions detected in '$proto_h' (parsing error?)";
  }

  # hardcoded methods:
  $prototype{GB_await_error} = 'GB_ERROR GB_await_error(void);';
  $file{GB_await_error} = '../CORE/arb_msg.cxx';

  $prototype{GBP_prepare_to_die} = 'void GBP_prepare_to_die()';
  $file{GBP_prepare_to_die} = 'adperl.cxx';
}

my %cpp_method = (); # key = perl method (full), value=C++ method

sub linkMethods() {
  my $reg_cpp_method = qr/^([^_]+)_(.*)$/o;

  # see also ../PERLTOOLS/arb_proto_2_xsub.cxx@PREFIX_HANDLING
  my %ppackage =
    (
     'GB' => 'ARB',
     'GBP' => 'ARB',
     'GBT' => 'BIO',
     'GEN' => 'BIO',
    );

  foreach my $cpp_method (sort keys %prototype) {
    if ($cpp_method =~ $reg_cpp_method) {
      my ($prefix,$name) = ($1,$2);
      my $package = $ppackage{$prefix};
      if (defined $package) {
        my $perl_method = $package.'::'.$name;
        if (exists $method{$perl_method}) {
          if (exists $cpp_method{$perl_method}) {
            die "ambiguous function suffix matches for '$perl_method': $cpp_method and ".$cpp_method{$perl_method};
          }
          $cpp_method{$perl_method} = $cpp_method;
        }
        # else { print STDERR "Note: Found no perl-method for prototype '$cpp_method'\n"; } # shows unused prototypes
      }
      # else { print STDERR "Note: no package defined for prefix '$prefix' (cpp_method=$cpp_method)\n"; }
    }
    else {
      die "cannot parse method name: '$cpp_method'";
    }
  }

  foreach my $perl_method (sort keys %method) {
    if (not exists $cpp_method{$perl_method}) {
      die "failed to detect c++-function for '$perl_method'";
      # print "Error: failed to detect c++-function for '$perl_method'\n";
    }
  }
}

my $HEADER = <<'END_HEADER';
#
# The list below contains information about the functions provided by the arb PERL interface.
#
# The columns contain the following information:
# - function name (used to sort; not unique),
# - fully qualified name of perl function (unique),
# - name of C++ function and
# - name of C++ file where function is located.
#
# The list is automatically updated while arb is build.
#
# To find specific information for single C++ functions, refer to
#     http://dev.arb-home.de/source_doc/globals_func_g.html#index_g
# look there for the C++ function name (3rd column below) and
# click on one of the links behind the name.
#
# If no documentation is shown then, please also click on the line number at
#     "Definition at line ### of file MODULE.cxx."
# to look into the source code, which may contain documenting comments.
#
END_HEADER

sub dumpMethods() {
  my @sorted_method = sort {
    my $cmp = $name{$a} cmp $name{$b};
    if ($cmp == 0) {
      $cmp = $a cmp $b;
    }
    $cmp;
  } keys %method;

  my $name_width = 47;
  my $full_width = $name_width + 5;
  my $cpp_width  = $name_width + 3 + 1;

  print $HEADER;

  foreach my $perl_method (@sorted_method) {
    my $name = $name{$perl_method};
    my $name_length = length($name);
    if ($name_length > $name_width) { die "bad formatting: name_width < $name_length"; }

    my $cpp = $cpp_method{$perl_method};
    my $cpp_length = length($cpp);
    if ($cpp_length > $cpp_width) { die "bad formatting: cpp_width < $cpp_length"; }

    my $file = $file{$cpp};

    if (not defined $file) {
       die "Error: ambiguous function definition for '$cpp' (overloaded method exported to perl?)\n";
    }
    else {
      print sprintf('%-'.$name_width.'s %-'.$full_width.'s %-'.$cpp_width.'s %s'."\n", $name, $perl_method, $cpp, $file);
    }
  }
}

sub main() {
  parsePerlMethods();
  parsePrototypes();
  linkMethods();

  dumpMethods();
}

main();
