#!/usr/bin/perl

use strict;
use warnings;

# This script is called from ./make_symlist.sh@annotate_dwarf_locations
#
# From STDIN it reads the output from objdump --dwarf
#
# It is passed the name of a symlist, it reads that list and
# annotates all locations found in STDIN.

my $ENDIE = "\n "; # print error location, but in new line

# ---------------------------------------- [ symlist ]

my %symbol_location = (); # key=demangled symbol; value=location(=file:line:col) or $LOCATION_UNKNOWN
my $LOCATION_UNKNOWN = '';

sub parse_symlist($) {
  # parse listed symbols into %symbol_location
  # Format is described at ./make_symlist.sh@SYMLIST_FORMAT
  my ($symlist) = @_;

  my $SYMS = undef;
  open($SYMS, '<'.$symlist) || die "Failed to read '$symlist' (Reason: $!)";
  my $line;
  while (defined($line = <$SYMS>)) {
    if ($line =~ /^def;(\w+);/o) {
      my ($type, $sym) = ($1, $');
      chomp($sym);
      if ($sym =~ /;/o) { die "found unexpected character ';' in symbol '$sym' ($type)".$ENDIE; }
      if (defined $symbol_location{$sym}) { die "found duplicated symbol '$sym' ($type)".$ENDIE; }
      $symbol_location{$sym} = $LOCATION_UNKNOWN;
    }
  }
  close($SYMS);
}

my %globally_located_symbol = (); # like %symbol_location, but collected for all object files of the artefact.

sub annotate_symlist($) {
  my ($symlist) = @_;

  my $SYMS = undef;
  open($SYMS, '<'.$symlist) || die "Failed to read '$symlist' (Reason: $!)";
  my $line;
  while (defined($line = <$SYMS>)) {
    if ($line =~ /^def;(\w+);/o) {
      my ($type, $sym) = ($1, $');
      chomp($sym);
      if ($sym =~ /;/o) { die "found unexpected character ';' in symbol '$sym'".$ENDIE; }
      my $location = $globally_located_symbol{$sym};
      if (defined $location) {
        print 'def;'.$type.';'.$sym.';'.$location."\n";
      }
      else {
        print $line;
      }
    }
    else {
      print $line;
    }
  }
  close($SYMS);
}

# ---------------------------------------- [ object-specific data ]

my %declaration_block = ();      # key=offset (of declaration-block), value=ref to block-hash (of declaration-block)
my %definition_referencing = (); # key=offset (of declaration-block), value=ref to block-hash (of definition-block) referencing declaration_block via 'specification' tag
my %combined_def_and_decl = ();  # key=offset (of declaration-block), value=1, if declaration-block has been combined with definition-block to define a symbol.

my $current_directory = undef; # directory of source file of current compile_unit
my $current_filename  = undef; # source file of current compile_unit
my $current_stmt_list = undef; # offset to file/directory table of current compile_unit

# ---------------------------------------- [ store object-specific data ]

sub dump_block(\%);

my %unit = (); # key = value of stmt_list, value=hash generated by finish_compile_unit

sub finish_compile_unit() {
  if (defined $current_stmt_list) {
    my %symbol_location_clone = %symbol_location;
    my $unit = {
                directory => $current_directory,
                filename => $current_filename,
                stmt_list => $current_stmt_list,
                location => \%symbol_location_clone,
               };

    $unit{$current_stmt_list} = $unit;

    $current_directory = undef;
    $current_filename  = undef;
    $current_stmt_list = undef;

    foreach my $decl_offset (keys %definition_referencing) {
      if (not exists $combined_def_and_decl{$decl_offset}) {
        my $block = $definition_referencing{$decl_offset};
        dump_block(%$block);
        die "missing declaration-block '$decl_offset' referenced from definition-block via 'specification'".$ENDIE;
      }
    }

    %declaration_block = ();
    %definition_referencing = ();
    %combined_def_and_decl = ();

    # reset collected locations:
    %symbol_location = map { $_ => $LOCATION_UNKNOWN; } keys %symbol_location;
  }
}

# ---------------------------------------- [ artefacts ]

my $current_artefact = undef;

sub finalize_artefact() {
  if (defined $current_artefact) {
    $current_artefact = undef;
  }
}

sub start_new_artefact($$) {
  my ($artefactName, $line) = @_;
  finalize_artefact();
  die if defined $current_artefact;
  $current_artefact = $artefactName;
}

sub ARB_building_with_DEBUG() {
  my $DEBUG = $ENV{DEBUG};
  return defined $DEBUG ? $DEBUG : 1;
}

sub detect_unlocated_symbols() {
  my $warn_unlocated = ARB_building_with_DEBUG();
  return if not $warn_unlocated;

  my $unlocated = 0;
  foreach my $symbol (keys %symbol_location) {
    if (not exists $globally_located_symbol{$symbol}) {
      if ($symbol =~ /^(_init|_fini)$/o) {
        # print STDERR "Note: accepting unlocated library ctor/dtor '$symbol'\n";
      }
      else {
        my $fail = 1;
        if ($symbol =~ / volatile/) {
          my $same_symbol = $`.$';
          if (exists $globally_located_symbol{$same_symbol}) {
            print STDERR "Warning: unlocated symbol '$symbol' was located as '$same_symbol' (accepting; known to occur with gcc-5.x)\n";
            $fail = 0;
          }
        }
        if ($fail==1) {
          print STDERR "Error: failed to locate symbol '$symbol'\n";
          $unlocated++;
        }
      }
    }
  }
  if ($unlocated>0) {
    die "failed to locate $unlocated symbols (see above)".$ENDIE;
  }
}

sub finish_scanning() {
  finalize_artefact();
  detect_unlocated_symbols();
}


# ---------------------------------------- [ debug info blocks ]

sub dump_block(\%) {
  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line

  print STDERR "- Block: <", $block->{offset}, "> ", $block->{number}, " (", $block->{tag}, ")\n";
  foreach my $attr (sort keys %$block) {
    next if $attr =~ /^(offset|number|tag)$/;
    print STDERR "  - $attr: ", $block->{$attr}, "\n";
  }
}

my %blockCount = (); # key=tag, value=count

# expression for removed substring: "(indirect string, offset: 0x31649): "
my $reg_indirect_string = qr/\(indirect string, offset: 0x[0-9a-f]+\):\s/o;

sub declare_symbol(\%) {
  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line

  die if $block->{external} ne 1;
  if (exists $block->{artificial}) {
    return;
  }

  my ($name, $decl_file, $decl_line, $decl_column)
    = (
       $block->{linkage_name},
       $block->{decl_file},
       $block->{decl_line},
       $block->{decl_column},
      );

  if (not defined $name) { $name = $block->{name}; }
  if (not defined $decl_file or not defined $name) {
    dump_block(%$block);
    die "no symbol-name or no location".$ENDIE;
  }
  $name =~ s/$reg_indirect_string//o;

  my $isListedSymbol = exists $symbol_location{$name};
  if ($isListedSymbol) {
    if (not defined $decl_line) {
      dump_block(%$block);
      die "invalid decl_line='$decl_line'".$ENDIE;
    }

    my $prevLocation = $symbol_location{$name};
    my $currLocation = "$decl_file:$decl_line"; # decl_file is a number. will be resolved later.
    if (defined $decl_column) { $currLocation .= ":$decl_column"; }

    if ($prevLocation ne $LOCATION_UNKNOWN) {
      dump_block(%$block);
      die "location redefined: previous='$prevLocation' current='$currLocation' symbol='$name'".$ENDIE;
    }
    $symbol_location{$name} = $currLocation;
  }
}

sub declare_symbol_by_combining_def_and_decl(\%\%) {
  my ($def_block, $decl_block) = @_; # $blocks are hash-refs: key=rest behind 'DW_AT_' tag; value=right side of same line

  die if not exists $def_block->{specification};
  die if $def_block->{specification} ne '<0x'.$decl_block->{offset}.'>';

  if (exists $decl_block->{external}) {
    my %mixed = %$decl_block;

    # copy from def_block -> mixed decl_block (overwrites entries if existing in both)
    my @transfer_defined = qw/decl_file decl_line decl_column linkage_name/;
    foreach my $xfer (@transfer_defined) {
      my $def_val = $def_block->{$xfer};
      if (defined $def_val) {
        $mixed{$xfer} = $def_val;
      }
    }

    declare_symbol(%mixed);

    $combined_def_and_decl{$decl_block->{offset}};
  }
}

sub process_block(\%) {
  my ($block) = @_; # $block is hash-ref: key=rest behind 'DW_AT_' tag; value=right side of same line

  my ($number, $tag, $external) = ($block->{number}, $block->{tag}, $block->{external});
  $blockCount{$tag}++;

  if ($tag eq 'compile_unit') {
    finish_compile_unit();

    my ($dir, $name, $stmt_list) = ($block->{comp_dir}, $block->{name}, $block->{stmt_list});

    if (not defined $dir or not defined $name or not defined $stmt_list) {
      die "expected data missing in compile_unit block (dir=$dir name=$name stmt_list=$stmt_list)".$ENDIE;
    }

    $dir  =~ s/$reg_indirect_string//o;
    $name =~ s/$reg_indirect_string//o;

    $current_directory = $dir;
    $current_filename  = $name;
    $current_stmt_list = $stmt_list;

    # print STDERR "compile_unit: stmt_list=$current_stmt_list file=$current_filename dir=$current_directory\n";
  }
  else {
    return if not $tag =~ /^(subprogram|variable|member)$/o;

    # Three block types are of interest here:
    # - declaration blocks  (contains 'definition' tag)
    # - definition blocks   (contains 'specification' tag)
    # - consolidated blocks (combines the two other block types; contains none of the tags mentioned above)
    #
    # The 'specification' tag refers to (the offset of) the corresponding declaration block.
    # Symbols from consolidated blocks are defined directly.
    # Symbols from the splitted block types get defined as soon as both corresponding blocks were seen, and
    # some contents of both blocks get mixed, with precedence for entries from the definition block.
    # (see declare_symbol_by_combining_def_and_decl)

    my ($declaration, $specification, $external)
      = ($block->{declaration}, $block->{specification}, $block->{external});

    if (defined $specification) {
      # this block is a definition block (containing a specification offset pointing to the corresponding declaration block)
      if ($specification =~ /^<0x([0-9a-f]+)>$/o) {
        my $declOffset = $1;
        # lookup referenced declaration block:
        if (exists $declaration_block{$declOffset}) {
          my $refBlock = $declaration_block{$declOffset};
          declare_symbol_by_combining_def_and_decl(%$block, %$refBlock);
        }
        else {
          $definition_referencing{$declOffset} = $block;
        }
      }
      else {
        dump_block(%$block);
        die "failed to parse specification='$specification'".$ENDIE;
      }
    }
    elsif (defined $declaration) {
      # this block is a declaration
      my $declOffset = $block->{offset};
      if (exists $definition_referencing{$declOffset}) {
        # directly declare if it has already been referenced:
        my $def_block = $definition_referencing{$declOffset};
        declare_symbol_by_combining_def_and_decl(%$def_block, %$block);
      }
      else {
        # otherwise store for optional later use:
        $declaration_block{$declOffset} = $block;
      }
    }
    elsif (defined $external) {
      if ($tag eq 'member') {
        dump_block(%$block);
        die "member block w/o declaration found";
      }
      declare_symbol(%$block);
    }
  }
}

# ---------------------------------------- [ dwarf info ]

my $reg_section_header = qr/^(?:Contents of the (\S+) (section):|(Raw) dump of debug contents of section (\S+):|(\S+):\s+(file format)\s+\S+)$/o;

my %skipped_section = ();

my $lines_parsed_debug_info = 0;
my $lines_skipped = 0;

sub skip_section($) {
  my ($section) = @_;

  $skipped_section{$section}++;

  my $line;
  while (defined($line = <>)) {
    chomp($line);
    return $line if $line =~ $reg_section_header;
    $lines_skipped++;
  }

  return undef;
}

sub parse_debug_info() {
  my $current_block; # hash ref
  my $line;

 LOOP: while (defined($line = <>)) {
    chomp($line);
    $lines_parsed_debug_info++;

    # Check if this is a new block (Abbrev Number line)
    if ($line =~ /^\s+([<>0-9a-f]+)/o) {
      my ($content, $addr) = ($', $1);
      if ($content =~ /^: Abbrev Number:\s+/o) {
        my $head = $';
        # Process new block if exists, before starting new one
        process_block(%$current_block) if (defined $current_block);
        $current_block = undef;

        if ($head eq '0') { ; } # skip "zero" blocks
        elsif ($head =~ /^([0-9]+)\s+\(([a-zA-Z0-9_]+)\)/) {
          my ($abbrev, $tag) = ($1, $2);
          if ($tag =~ /^DW_TAG_/o) { $tag = $'; }
          else { die "Expected tag '$tag' to start with 'DW_TAG_'".$ENDIE; }

          my $offset;
          if ($addr =~ /^<[0-9a-f]+><([0-9a-f]+)>$/o) { $offset = $1; }
          else { die "can't parse address '$addr'".$ENDIE; }

          $current_block = { offset => $offset, number => $abbrev, tag => $tag }; # Start new block
        }
        else { die "Unexpected content after 'Abbrev Number' in line '$line'".$ENDIE; }
      }
      elsif ($content =~ /\s+DW_AT_([^\s]+)\s*:\s(.*)/o) {
        my ($key, $value) = ($1, $2);
        if (defined $current_block) {
          $current_block->{$key} = $value;
          die "invalid key '$key'" if $key =~ /^(offset|number|tag)$/;
        }
        else { die "have no current_block (cannot set attribute) from line '$line'".$ENDIE; }
      }
      elsif ($content =~ /\s+Unknown AT value: ([0-9]+):/o) { # objdump does not know AT value generated by gcc used to build object
        my ($atValue, $rest) = ($1,$');
        my $atNum = int($atValue);
        die "expect '$atValue' to be numeric" if not $atNum;
        if ($atNum>=90 and $atNum<=91) {
          # accept some at-values (90+91) generated by gcc-15, which are not known by objdump 2.30
        }
        else {
          die "unknown and not tolerated atValue '$atValue'".$ENDIE;
          # this may occur with future gccs -> add documented exception in 'if' above, in case all works fine then.
        }
      }
      else { die "Unexpected line '$line'".$ENDIE; }
    }
    elsif ($line =~ $reg_section_header) { last LOOP; }
  }

  # Process final block if exists
  process_block(%$current_block) if defined $current_block;

  finish_compile_unit();

  return $line;
}

# ---------------------------------------- [ symbol promotion ]

my @file = (); # index -> filename (object-local)

sub translate_location($) {
  my ($location) = @_;
  my @locPart = split /:/, $location;
  my $count = scalar(@locPart);
  if ($count<2 or $count>3) {
    die "invalid number of parts ($count) in location='$location'".$ENDIE;
  }
  my $file = $file[$locPart[0]];
  if (not defined $file) {
    die 'could not resolve file at index ['.$locPart[0].']'.$ENDIE;
  }
  my $resolved_loc;
  if ($count==3) {
    $resolved_loc = $file.':'.$locPart[1].':'.$locPart[2];
  }
  else {
    $resolved_loc = $file.':'.$locPart[1];
  }
  return $resolved_loc;
}

sub promote_symbols(\%) {
  my ($sym_loc) = @_;
  # requires @file to be set correctly

  my %symbol_location = %$sym_loc; # instead directly use hash-ref below

  foreach my $symbol (keys %symbol_location) {
    my $location = $symbol_location{$symbol};
    if ($location ne $LOCATION_UNKNOWN) {
      my $transLoc = translate_location($location);
      if (exists $globally_located_symbol{$symbol}) {
        my $prevLoc = $globally_located_symbol{$symbol};
        if ($prevLoc ne $transLoc) {
          print STDERR "$prevLoc: Warning: previous location for $symbol\n";
          print STDERR "$transLoc: Warning: new location for $symbol\n";
          die "ambiguous re-define of location for symbol '$symbol'".$ENDIE;
        }
      }
      else {
        $globally_located_symbol{$symbol} = $transLoc;
      }
    }
  }
}

sub parse_debug_line() {

  use constant { SEARCH_OFFSET => 0,
                 FOUND_OFFSET => 1,
                 IN_DIRECTORY_TABLE => 2,
                 IN_FILE_NAME_TABLE => 3,
               };

  my @directory = (); # store directories

  my $state     = SEARCH_OFFSET;
  my $offset    = undef; # same as stmt_list
  my $curr_unit = undef;

  my $line;
 LINE: while (defined($line = <>)) {
    chomp($line);

    my $chewed = 0;
    if ($state eq IN_FILE_NAME_TABLE) {
      if ($line =~ /^\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+(.*)$/o) {
        my ($idx, $dirIdx, $filename) = ($1, $2, $3);
        my $directory = $directory[$dirIdx];
        if (not defined $directory) {
          die "Undefined directory index=$dirIdx referenced".$ENDIE;
        }
        if (defined $file[$idx]) {
          die "duplicated definition of filename at index=$idx".$ENDIE;
        }
        $file[$idx] = $directory.'/'.$filename;
        $chewed = 1;
      }
    }
    elsif ($state eq IN_DIRECTORY_TABLE) {
      if ($line =~ /^\s+(\d+)\s+(.*)$/o) {
        my ($idx, $directory) = ($1, $2);
        if (substr($directory, 0, 1) ne '/') {
          if ($directory eq '.') {
            # if directory is '.' -> replace by current object directory
            $directory = $directory[0];
          }
          else {
            $directory = $directory[0].'/'.$directory;
          }
        }
        if (defined $directory[$idx]) {
          die "duplicated definition of directory at index=$idx".$ENDIE;
        }
        $directory[$idx] = $directory;
        $chewed = 1;
      }
    }
    elsif ($state eq SEARCH_OFFSET) {
      if ($line =~ /^\s\sOffset:\s+(0x[0-9a-f]+)/o) {
        $offset = $1;
        $state = FOUND_OFFSET;
        $chewed = 1;

        $curr_unit = $unit{$offset};
        if (not defined $curr_unit) {
          die "No unit at offset '$offset'".$ENDIE;
        }

        $directory[0] = $curr_unit->{directory};
        die if not defined $directory[0];
      }
    }

    if (not $chewed) {
      if ($line =~ /^\sThe Directory Table.*:$/o) {
        die "did not find offset" if $state ne FOUND_OFFSET;
        $state = IN_DIRECTORY_TABLE;
      }
      elsif ($line =~ /^\sThe File Name Table.*:$/o) {
        die "did not find directory table" if $state ne IN_DIRECTORY_TABLE;
        $state = IN_FILE_NAME_TABLE;
      }
      elsif ($line =~ /^\sLine Number Statements:$/o or $line =~ /^\sNo Line Number Statements.$/o) {
        die "did not find file name table" if $state ne IN_FILE_NAME_TABLE;
        $state = SEARCH_OFFSET;

        die if not defined $curr_unit;
        promote_symbols(%{$curr_unit->{location}});
        @file = ();
        @directory = ();
      }
      elsif ($line =~ $reg_section_header) {
        return $line;
      }
    }
  }

  return $line;
}

sub scan_dwarf_info() {
  my $line = <>;
  while (defined $line) {
    chomp($line);
    if ($line =~ $reg_section_header) {
      if (defined $2 and ($2 eq 'section')) {
        my $current_section = $1;
        if ($current_section eq '.debug_info') {
          $line = parse_debug_info();
        }
        else {
          $line = skip_section($current_section);
        }
      }
      elsif (defined $3 and ($3 eq 'Raw')) {
        my $current_section = $4;
        if ($current_section eq '.debug_line') { $line = parse_debug_line(); }
        else { die "Unhandled raw section '$current_section'".$ENDIE; }
      }
      elsif (defined $6 and ($6 eq 'file format')) {
        my $artefactName = $5; # "name.o" or "/full/path/to/libSOME.so"
        $line = start_new_artefact($artefactName, $line);
      }
      else {
        print STDERR "1='$1' 2='$2' 3='$3' 4='$4' 5='$5' 6='$6'\n";
        die "Unexpected match on line '$line'".$ENDIE;
      }
    }
    else {
      $line = <>;
    }
  }

  finish_scanning();
}

# ---------------------------------------- [ main ]

sub main() {
  my $symlist = shift(@ARGV);
  if (not defined $symlist) {
    die "Missing argument: symbolList".$ENDIE;
  }
  if (not -f $symlist) {
    die "No such file: $symlist".$ENDIE;
  }

  parse_symlist($symlist);
  scan_dwarf_info();
  annotate_symlist($symlist);
}

eval { main(); };
if ($@) { die "annotate_dwarf_locations.pl: Error: $@\n"; }
exit(0);
