#!/usr/bin/perl

use strict;
use warnings;

my $seenGlobalDups = 0;
my $seenUnitDups   = undef;

my %gloc = (); # [global] key=symbol, value=location (or empty string)
my %lloc = (); # [local]  key=symbol, value=location (or empty string)
my %rloc = (); # [local]  key=symbol, value=location of reference

sub acceptDupSym($);
sub acceptDupSym($) {
  # return true for all symbols that shall be accepted if duplicated
  my ($sym) = @_;
  if ($sym =~ /^(ARB_main|aw_never_called_main)\(/o) {
    return 1; # special ARB function
  }
  if ($sym =~ /^aisc_main/o) {
    return 1; # special ARB variable
  }
  if ($sym =~ /^(_init|__bss_start|_edata|_fini|_end)$/o) {
    return 1; # shared lib entry/exit points
  }
  if ($sym =~ /^__odr_asan\./o) { # ignore ASAN prefixes
    return acceptDupSym($');
  }
  return 0;
}

sub isGlobalSymFlag($) {
  # return true for all flags that shall be checked for duplicates
  my ($f) = @_;
  if (($f ge 'A' and $f le 'D') or ($f eq 'G') or ($f ge 'R' and $f le 'T')) {
    return 1;
  }
  return 0;
}

sub isReferenceSymFlag($) {
  my ($f) = @_;
  if ($f eq 'U') {
    return 1;
  }
  return 0;
}

sub addSym($$) {
  my ($sym,$sloc) = @_;

  my $non_virtual_thunk = 0;
  if ($sym =~ /^non-virtual\sthunk\sto\s/o) {
    $sym = $';
    $non_virtual_thunk = 1;
  }

  my $haveLoc = $lloc{$sym};
  if (not defined $haveLoc) {
    $lloc{$sym} = $sloc; # first definition
  }
  elsif ($haveLoc eq '') { # first definition was empty
    $lloc{$sym} = $sloc; # overwrite empty definition
  }
  elsif ($haveLoc ne $sloc) { # differing locations reported for one symbol (inside one unit)
    my $ignore = 0;
    if ($sloc eq '') { $ignore = 1; } # ignore if second is empty (Note: does the same as overwrite above)
    if ($non_virtual_thunk) { $ignore = 1; } # ignore duplicate definitions for non_virtual_thunk|s

    if (not $ignore) {
      print "$haveLoc: Warning: first definition of $sym\n";
      print "$sloc: Error: duplicated definition of $sym (in same unit). Bug?\n";
      $seenUnitDups++;
    }
  }
  # debug: log all added symbols
  # print "$sloc: Note: added symbol: '$sym'\n";
}
sub addRef($$) {
  my ($sym,$sloc) = @_;
  my $haveRef = $rloc{$sym};
  if (not defined $haveRef) {
    die if $sloc eq ''; # not allowed here
    $rloc{$sym} = $sloc; # first reference
  }
  # ignore following references
}

my $reg_sym = qr/^[0-9a-f]*\s+([A-Z])\s(.*)$/i;
my $reg_loc = qr/^(.*)\s([^\s]+:[0-9]+)$/i;

sub scanSymdef($) {
  my ($line) = @_;
  if ($line ne '') {
    if ($line =~ $reg_sym) {
      my ($flag,$rest) = ($1,$2);
      if (isGlobalSymFlag($flag)) {
        if ($rest =~ $reg_loc) {
          my ($proto,$sloc) = ($1,$2);
          addSym($proto,$sloc);
        }
        else { # assume whole rest is prototype
          addSym($rest,'');
        }
      }
      elsif (isReferenceSymFlag($flag)) {
        if ($rest =~ $reg_loc) {
          my ($proto,$sloc) = ($1,$2);
          addRef($proto,$sloc);
        }
        # ignore refs w/o location (they are not useful)
      }
    }
    else {
      if ($line =~ /^[^\s]+:$/o) { # accept start of single objects
        ;
      }
      else {
        die "cannot parse line '$line'\n";
      }
    }
  }
}

sub nonEmpty($$) {
  my ($locMaybeEmpty,$fakeLoc) = @_;
  return $locMaybeEmpty eq '' ? $fakeLoc : $locMaybeEmpty;
}

sub addFileSymbols($) {
  # called after symfile has been completely scanned

  my ($symfile) = @_;
  my $fallback_loc = $ENV{ARBHOME}.'/UNIT_TESTER/'.$symfile.':1';

  foreach my $sym (keys %lloc) {
    my $loc = $lloc{$sym};
    if ($loc eq '') { # if location missing..
      my $seenRef = $rloc{$sym};
      if (defined $seenRef) {
        $loc = $seenRef; # .. use location of reference
      }
    }

    my $haveGlobal = $gloc{$sym};
    if (defined $haveGlobal) {
      if (not acceptDupSym($sym)) {
        print nonEmpty($haveGlobal,$fallback_loc).": Warning: previous occurrence of $sym\n";
        print nonEmpty($loc,$fallback_loc).": Warning: duplicated occurrence of $sym\n";
        $seenGlobalDups++;

        if ($loc ne '') {
          $gloc{$sym} = $loc;
        }
      }
    }
    else {
      $gloc{$sym} = $loc;
    }
  }

  # reset local symbol tracking:
  %lloc = ();
  %rloc = ();
}

sub scanSymfile($) {
  my ($symfile) = @_;
  open(IN,'<'.$symfile) || die "failed to open '$symfile' (Reason: $!)";

  my $lineno = 0;
  eval {
    my $line;
    while (defined($line=<IN>)) {
      chomp($line);
      $lineno++;
      scanSymdef($line);
    }
  };
  if ($@) {
    die "$symfile:$lineno: $@";
  }
  close(IN);
}

# --------------------------------------------------------------------------------

sub main() {
  my $args = scalar(@ARGV);
  if ($args<1) {
    die "Usage: dupsymwarn.pl nm-output [nm-output]+\n".
      "Collects and warns about duplicated global symbols\n";
  }

  foreach my $symfile (@ARGV) { # one symfile for each unit
    $seenUnitDups = 0;

    scanSymfile($symfile);
    addFileSymbols($symfile);

    if ($seenUnitDups>0) {
      print "Please note: detected $seenUnitDups duplicate symbols in unit ($symfile)\n";
      print "             This may occur e.g. after renaming a source file.\n";
      print "             You may attempt to clean that unit.\n";
    }
  }

  if ($seenGlobalDups>0) {
    print "Encountered $seenGlobalDups duplicated global symbols in arb-codebase\n";
    print "Notes:\n";
    print " * this is checked in TESTED modules and libraries only (i.e. it may miss some dups)\n";
    print " * this is unwanted => please fix it!\n";
  }
}
main();
