#!/bin/bash # renames output to 'treefile' usage() { arb_message "arb_raxml called with wrong parameters (look @ console for errors)" echo "Usage:" echo " arb_raxml 'DNA' SEQFILE WEIGHTS TREE CONSTRAINT RANDOMSTART OPTIMIZEPARAMETERS SEARCH INITIALREARRANGEMENT SEED NUMBEROFRUNS TAKETREES [import|consense] \"COMMENT\" RATEMODELNUC NUMCATEGORIES" echo " arb_raxml 'PROTEIN' SEQFILE WEIGHTS TREE CONSTRAINT RANDOMSTART OPTIMIZEPARAMETERS SEARCH INITIALREARRANGEMENT SEED NUMBEROFRUNS TAKETREES [import|consense] \"COMMENT\" RATEMODELPROT MATRIXNAME" exit 1 } if [ -z "${13}" ]; then usage else # parameters common for DNA and PROTEIN DATA=$1 ; shift SEQFILE=$1 ; shift WEIGHTS=$1 ; shift TREE=$1 ; shift CONSTRAINT=$1 ; shift RANDOMSTART=$1 ; shift OPTIMIZEPARAMETERS=$1 ; shift SEARCH=$1 ; shift INITIALREARRANGEMENT=$1 ; shift SEED=$1 ; shift NUMBEROFRUNS=$1 ; shift TAKETREES=$1 ; shift CONSENSE=$1 ; shift COMMENT=$1 ; shift if [ -z "$CONSENSE" ]; then usage fi if [ $(($NUMBEROFRUNS)) -lt 1 ]; then arb_message "Invalid number of runs specified (NUMBEROFRUNS=$NUMBEROFRUNS)" exit 1 fi DIST=unknown if [ "$DATA" == "DNA" ]; then if [ -z "$1" ]; then usage fi RATEMODELNUC=$1 NUMCATEGORIES=$2 DIST=${RATEMODELNUC}-${NUMCATEGORIES} else if [ "$DATA" == "PROTEIN" ]; then # if [ -z "$2" ]; then # usage # fi RATEMODELPROT=$1 MATRIXNAME=$2 DIST=${RATEMODELPROT}-${MATRIXNAME} else arb_message "Unknown datatype '$DATA'" exit 1 fi fi COMMENT=${COMMENT/ALGO=/DIST=$DIST ALGO=} if [ -z "$SEED" ]; then # seconds since 1970 SEED=`date +%s` fi # check inputfiles if [ \! -s $SEQFILE ]; then arb_message "Missing or empty sequence file '$SEQFILE'" exit 1 fi if [ \! -s $WEIGHTS ]; then arb_message "Missing or empty weights file '$WEIGHTS'" exit 1 fi # generate tree file TREEPARAMS= HAVEINPUTTREE=0 if [ "$TREE" != "tree_?????" -a ! -z "$TREE" ]; then # has to match ../TEMPLATES/arb_global_defs.h@NO_TREE_SELECTED CMD="arb_export_tree $TREE" echo "$CMD" echo `$CMD > treefile.in` if [ \! -s treefile.in ]; then arb_message "Couldn't export tree '$TREE'" exit 1 fi if [ "$CONSTRAINT" == "1" ]; then TREEPARAMS="-r treefile.in" else TREEPARAMS="-t treefile.in" fi HAVEINPUTTREE=1 else if [ "$RANDOMSTART" == "1" ]; then TREEPARAMS=-d fi # otherwise use default parsimony tree fi # model dependent parameters DEPENDENT_PARAMS= if [ "$OPTIMIZEPARAMETERS" == "1" ]; then if [ "$DATA" == "DNA" ]; then case "$RATEMODELNUC" in "GTRGAMMA" | "GTRGAMMAI" ) DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k" ;; * ) arb_message "Ignored 'Optimize branches/parameters'\n(only usable with GTRGAMMA)" ;; esac else case "$RATEMODELPROT" in "PROTGAMMA" | "PROTGAMMAI" ) DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k" ;; * ) arb_message "Ignored 'Optimize branches/parameters'\n(only usable with PROTGAMMA)" ;; esac fi fi if [ "$DATA" == "DNA" ]; then if [ "$RATEMODELNUC" == "GTRCAT" ]; then DEPENDENT_PARAMS="$DEPENDENT_PARAMS -c $NUMCATEGORIES" fi fi # search algorithm NEEDINPUTTREE=0 USESINPUTTREE=0 NEEDGAMMA=0 GENERATEDTREES=$NUMBEROFRUNS MODE=normal case "$SEARCH" in "e" ) # optimize input tree NEEDINPUTTREE=1 NEEDGAMMA=1 MODE=optimized ;; "t" ) # randomized tree searches NEEDINPUTTREE=1 ;; "d" ) # new rapid hill climbing # modes that use (but don't expect) an inputtree USESINPUTTREE=1 ;; "a" ) # rapid bootstrap analysis DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k -x $SEED" if [ $(($NUMBEROFRUNS)) -lt 2 ]; then arb_message "You need to specify at least 2 runs (NUMBEROFRUNS=$NUMBEROFRUNS)" exit 1 fi MODE=bootstrapped ;; "p" ) # add new sequences (MP) NEEDINPUTTREE=1 GENERATEDTREES=1 if [ "$NUMBEROFRUNS" != "1" ]; then arb_message "Ignoring number of runs (only performing 1 run)" NUMBEROFRUNS=1 fi TAKETREES=1 ;; esac if [ $(($GENERATEDTREES)) -lt $((TAKETREES)) ]; then arb_message "Can't take $TAKETREES of $GENERATEDTREES tree (using all)" $TAKETREES = $GENERATEDTREES fi if [ "$NEEDINPUTTREE" != "$HAVEINPUTTREE" ]; then if [ "$NEEDINPUTTREE" == "1" ]; then arb_message "Please select an input tree" exit 1; else if [ "$USESINPUTTREE" == "0" ]; then arb_message "Given input tree ignored" # don't use tree: TREEPARAMS= fi fi fi if [ "$NEEDGAMMA" == "1" ]; then if [ "$DATA" == "DNA" ]; then case "$RATEMODELNUC" in "GTRGAMMA" | "GTRGAMMAI" ) ;; * ) arb_message "Please choose GAMMA substitution model" exit 1 ;; esac else case "$RATEMODELPROT" in "PROTGAMMA" | "PROTGAMMAI" ) ;; * ) arb_message "Please choose GAMMA rate model" exit 1 ;; esac fi fi MODELPARAMS= if [ "$DATA" == "DNA" ]; then MODELPARAMS="-m $RATEMODELNUC" else MODELPARAMS="-m $RATEMODELPROT$MATRIXNAME" fi # build parameters for raxml RAXMLPARAMS="$MODELPARAMS -f $SEARCH" if [ ! -z "$INITIALREARRANGEMENT" ]; then RAXMLPARAMS="$RAXMLPARAMS -i $INITIALREARRANGEMENT" # otherwise autodetect by RAxML fi if [ "$NUMBEROFRUNS" != "1" ]; then RAXMLPARAMS="$RAXMLPARAMS -N $NUMBEROFRUNS" fi RUNNAME=viaARB RAXMLPARAMS="$RAXMLPARAMS -p $SEED -s $SEQFILE -a $WEIGHTS $TREEPARAMS $DEPENDENT_PARAMS -n $RUNNAME" echo "------------------------------------------" echo "Calling raxmlHPC $RAXMLPARAMS" time raxmlHPC $RAXMLPARAMS echo "------------------------------------------" # ../PERL_SCRIPTS/ARBTOOLS/raxml2arb.pl raxml2arb.pl "$RUNNAME" "$GENERATEDTREES" "$MODE" "$TAKETREES" "$CONSENSE" "$COMMENT" fi