#!/usr/bin/perl -w use lib "$ENV{'ARBHOME'}/lib/"; use ARB; $gb_main = ARB::open(":","r"); if (! $gb_main ) { $error = ARB::await_error(); print ("Error: $error\n"); exit 0; } sub message($) { my ($msg) = @_; BIO::message($gb_main, $msg); } sub error($) { my ($msg) = @_; $msg = "Error: ".$msg; ARB::abort_transaction($gb_main); # this undoes all changes made by this script ARB::begin_transaction($gb_main); BIO::message($gb_main, $msg); BIO::message($gb_main, "Script aborted!"); ARB::commit_transaction($gb_main); die $msg; } ARB::begin_transaction($gb_main); my $gb_species = BIO::first_marked_species($gb_main); my $current_alignment = BIO::get_default_alignment($gb_main); my $marked = 0; my $corr_ends = 0; my $corr_species = 0; message("Checking '$current_alignment' sequence ends of marked species.."); SPECIES: while ($gb_species) { $marked++; $gb_species = BIO::next_marked_species($gb_species); last SPECIES if not $gb_species; my $name = BIO::read_name($gb_species); my $gb_sequence = BIO::read_sequence($gb_species, $current_alignment); if ($gb_sequence) { my $sequence = ARB::read_string($gb_sequence); if ($sequence =~ /^([\.-]*)(.*[^\.-])([\.-]*)$/ig) { my $start = $1; my $mid = $2; my $end = $3; my $changed = 0; # translate '-' to '.' at sequence start if ($start =~ /-/) { $start =~ y/-/./; $changed = 1; $corr_ends++; } # translate '-' to '.' at sequence end if ($end =~ /-/) { $end =~ y/-/./; $changed = 1; $corr_ends++; } my $newSequence = $start.$mid.$end; # some safety belts: # # 1. check length my $oldLen = length($sequence); my $newLen = length($newSequence); if ($oldLen != $newLen) { error("sequence length of '$name' has changed"); } # 2. compare checksums my $old_crc = ARB::checksum($sequence,$oldLen,1,".-"); my $new_crc = ARB::checksum($newSequence,$newLen,1,".-"); if ($old_crc != $new_crc) { error("sequence checksum of '$name' changed"); } if ($changed == 1) { # store result in database ARB::write_string($gb_sequence, $newSequence); $corr_species++; } } else { error("Error in regexp - cannot analyze sequence"); } } else { message("Sequence '$name' has no data in alignment '$current_alignment' - skipped."); } } if ($marked) { if ($corr_ends == 0) { message("No sequence ends of your $marked marked species needed correction."); } else { message("Corrected $corr_ends sequence ends of $corr_species sequences (of $marked marked species)."); } } else { message("Please mark all species where sequence ends should be corrected."); } ARB::commit_transaction($gb_main); ARB::close($gb_main);