#!/usr/bin/perl
# ================================================================ #
#                                                                  #
#   File      : useFieldAsID.pl                                    #
#   Purpose   : hack IDs in ARB database                           #
#                                                                  #
#   Coded by Ralf Westram (coder@reallysoft.de) in February 2018   #
#   http://www.arb-home.de/                                        #
#                                                                  #
# ================================================================ #

use strict;
use warnings;

BEGIN {
  if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; }
  my $arbhome = $ENV{'ARBHOME'};
  push @INC, "$arbhome/lib";
  push @INC, "$arbhome/PERL_SCRIPTS/lib";
  1;
}

use ARB;
use tools;

sub hackIDs($) {
  my ($srcField) = @_;

  print "Overwriting field 'name' with content of '$srcField' ..\n";

  my $gb_main = ARB::open(":","r");
  $gb_main || expectError('db connect (no running ARB?)');

  dieOnError(ARB::begin_transaction($gb_main), 'begin_transaction');

  print " - scanning contents of '$srcField'\n";

  my $unique = 1;
  {
    my %content = ();
    for (my $gb_species = BIO::first_species($gb_main);
         $gb_species and $unique;
         $gb_species = BIO::next_species($gb_species)) {

      my $field_content = BIO::read_string($gb_species, $srcField);
      $field_content || expectError('read_string');

      if (exists $content{$field_content}) {
        $unique = 0;
        print " - field content '$field_content' occurs more than once\n";
        print "   => not usable as ID ('name')\n";
      }
      $content{$field_content} = 1;
    }
  }

  if ($unique) {
    print " - modifying IDs\n";
    my $count = 0;
    ARB::push_my_security($gb_main);
    for (my $gb_species = BIO::first_species($gb_main);
         $gb_species and $unique;
         $gb_species = BIO::next_species($gb_species)) {

      my $field_content = BIO::read_string($gb_species, $srcField);
      my $error = BIO::write_string($gb_species,"name",$field_content);
      if (defined $error) {
        die "Error writing to 'name': ".$error."\n";
      }
      $count++;
    }
    ARB::pop_my_security($gb_main);
    print "IDs of $count species were overwritten with content of $srcField\n";
  }
  else {
    print "Please choose a SOURCEFIELDNAME which has unique content for all species.\n";
  }

  ARB::commit_transaction($gb_main);
  ARB::close($gb_main);
}

sub main() {
  my $args = scalar(@ARGV);
  if ($args==0) {
    print "Usage: useFieldAsID.pl SOURCEFIELDNAME\n";
    print "Copies the content of field SOURCEFIELDNAME into the field 'name' (which is used as ID)\n";
    print "The contents of SOURCEFIELDNAME need to be unique\n";
    print "\n";
    print "Warning: Only use this script in special cases!\n";
    print "         Afterwards regenerate species IDs as soon as possible.\n";
  }
  else {
    my $srcField = shift @ARGV;
    hackIDs($srcField);
  }
}

main();
