#!/usr/bin/perl

use strict;
use warnings;

# This script is called from ./make_symlist.sh@filter_objdump_syms
#
# For help on filtered format see .man objdump under '--syms'

my $show_converted = 1; # 1=result, 0=leftover(debug)
my $ENDIE = "\n "; # print error location, but in new line

sub list_symbol($$$) {
  my ($state, $type, $symbol) = @_;
  # $state: 'ref' = refer symbol
  #         'def' = define symbol
  # $type:  'sub' = subroutine
  #         'var' = variable
  #         'unk' = unknown ('ref' is always unknown)
  # $symbol: prototype or variable name
  #
  # see also ./make_symlist.sh@SYMLIST_FORMAT

  if ($symbol =~ /;/) {
    die "unexpected ';' contained in demangled symbol '$symbol'".$ENDIE;
  }
  if ($state eq 'def') {
    if ($type ne 'sub' and $type ne 'var') {
      die "unexpected type='$type' when defining symbol '$symbol'".$ENDIE;
    }
  }
  if ($show_converted==1) {
    print $state.';'.$type.';'.$symbol."\n";
  }
}

my $reg_objdump_symbol_linestart = qr/^([0-9a-f]+)\s([a-zA-Z !]{7})\s/o;
my $reg_symrest_lineend_elf = qr/^([^\s]+)\s([0-9a-f]+)\s+(.*)$/o;
my $reg_symrest_lineend_osx = qr/^([^\s]+)\s(.*)$/o;
my $reg_file_header = qr/^([^:]+):\s+(.*)$/o;

my %symbol_seen = ();

sub main() {
  my $style = undef;

 LINE: while (defined ($_=<STDIN>)) {
    # if ($_ =~ $reg_objdump_elf_line) {
    if ($_ =~ $reg_objdump_symbol_linestart) {
      my ($addr, $flags, $rest) = ($1, $2, $');
      my ($section, $symbol) = (undef, undef);

      my $thisStyle = undef;
      if ($rest =~ $reg_symrest_lineend_elf) {
        ($section, $symbol) = ($1, $3);
        $thisStyle = 'ELF';
      }
      elsif ($rest =~ $reg_symrest_lineend_osx) {
        ($section, $symbol) = ($1, $2);
        if ($symbol =~ /^\.hidden\s/o) { $symbol = $'; }
        $thisStyle = 'MACHO';
      }
      else {
        die "unexpected rest='$rest' in line: $_".$ENDIE;
      }

      if (defined $style) {
        if ($thisStyle ne $style) {
          die "objdump style change detected: $style -> $thisStyle (parser is broken)".$ENDIE;
        }
      }
      else {
        $style = $thisStyle;
      }

      # exclude useless symbols:
      if ($symbol =~ /^non-virtual thunk to /o) {
        next LINE;
      }

      if ($section eq '*UND*') {
        if ($symbol =~ /;/) { die "unexpected ';' contained in demangled symbol '$symbol'".$ENDIE; }
        list_symbol('ref', 'unk', $symbol); # references are stored with unknown type here
      }
      else {
        my $scope = substr($flags, 0, 1); # l=local, g=global, u=unique-global(gnu), !=broken, ' '=no-scope
        if ($scope   =~ /[ug]/o) {
          my $type = substr($flags, 6, 1);  # 'F'=function, 'f'=file, 'o'=object, ' '=normal

          if ($type eq 'F') { # function
            my $suppress = 0;
            if (exists $symbol_seen{$symbol}) {
              # duplicated symbol - accept only for ctors and dtors. no idea why these are duplicated.
              if ($symbol =~ /\(/o) {
                my ($name, $params) = ($`, $&.$');
                my @namepart = split /::/, $name;
                my $parts = scalar(@namepart);
                if ($parts>=2) {
                  my ($class, $method) = @namepart[$parts-2, $parts-1];
                  if (($class eq $method) or ($method eq '~'.$class)) { $suppress = 1; }
                  else { print STDERR "Warning: not ctor/dtor: class='$class' method='$method'\n"; }
                }
                else { print STDERR "Warning: non-class function\n"; }
              }
              else { print STDERR "Warning: function w/o parenthesis\n"; }
              if ($suppress==0) { die "detected duplicated symbol '$symbol'".$ENDIE; }
            }
            else { $symbol_seen{$symbol} = 1; }
            if ($suppress==0) { list_symbol('def', 'sub', $symbol); }
          }
          elsif ($type =~ /[O]/o) {
            if (($scope eq 'g') and (($section eq '.data') or ($section eq '.bss'))) {
              list_symbol('def', 'var', $symbol);
            }
          }
          elsif (($scope eq 'g') and (($section eq '.data') or ($section eq '.bss'))) {
            # silently accept some symbols occurring in dynamic libraries
            # (only occur in libCORE, libARBDB and libWINDOW; but neighter in libglAW nor in libglpng_arb)
          }
          else {
            if ($show_converted==0) { print $_; }
            else {
              die "unexpected line: $_".$ENDIE;
              # If this fails, set show_converted=0 => prints non-converted lines.
              # Decide whether to suppress or convert..
            }
          }
        }
      }
    }
    else {
      chomp;
      if (/./o) {
        if ($_ =~ $reg_file_header) {
          # print "matched reg_file_header: $_\n";
          ;
        }
        elsif (/:$/o) {
          my $b4 = $`;
          if (($b4 eq 'SYMBOL TABLE') or
              ($b4 =~ /^In\sarchive\s.*$/o)) {
            ;
          }
          else {
              die "can't parse line '$_'".$ENDIE;
          }
        }
        else {
          die "can't parse line '$_'".$ENDIE;
          # may be caused by non-ELF-object.
        }
      }
    }
  }
}

eval { main(); };
if ($@) { die "filter_objdump_syms.pl: Error: $@\n"; }
