#!/bin/bash -x
# renames output to 'treefile'

usage() {
    arb_message "arb_raxml called with wrong parameters (look @ console for errors)"
    echo "Usage:"
    echo "   arb_raxml 'DNA'     SEQFILE WEIGHTS TREE CONSTRAINT RANDOMSTART OPTIMIZEPARAMETERS SEARCH INITIALREARRANGEMENT SEED NUMBEROFRUNS TAKETREES [import|consense] \"COMMENT\" RATEMODELNUC  NUMCATEGORIES"
    echo "   arb_raxml 'PROTEIN' SEQFILE WEIGHTS TREE CONSTRAINT RANDOMSTART OPTIMIZEPARAMETERS SEARCH INITIALREARRANGEMENT SEED NUMBEROFRUNS TAKETREES [import|consense] \"COMMENT\" RATEMODELPROT MATRIXNAME"
    exit 1
}

if [ -z "${13}" ]; then
    usage
else
    # parameters common for DNA and PROTEIN
    DATA=$1 ; shift
    SEQFILE=$1 ; shift
    WEIGHTS=$1 ; shift
    TREE=$1 ; shift
    CONSTRAINT=$1 ; shift

    RANDOMSTART=$1 ; shift
    OPTIMIZEPARAMETERS=$1 ; shift
    SEARCH=$1 ; shift
    INITIALREARRANGEMENT=$1 ; shift
    SEED=$1 ; shift

    NUMBEROFRUNS=$1 ; shift
    TAKETREES=$1 ; shift
    CONSENSE=$1 ; shift
    COMMENT=$1 ; shift

    if [ -z "$CONSENSE" ]; then
            usage
    fi

    if [ $(($NUMBEROFRUNS)) -lt 1 ]; then
        arb_message "Invalid number of runs specified (NUMBEROFRUNS=$NUMBEROFRUNS)"
        exit 1
    fi

    DIST=unknown
    if [ "$DATA" == "DNA" ]; then
        if [ -z "$1" ]; then
            usage
        fi
        RATEMODELNUC=$1
        NUMCATEGORIES=$2
        DIST=${RATEMODELNUC}-${NUMCATEGORIES}
    else
        if [ "$DATA" == "PROTEIN" ]; then
            # if [ -z "$2" ]; then
                # usage
            # fi
            RATEMODELPROT=$1
            MATRIXNAME=$2
            DIST=${RATEMODELPROT}-${MATRIXNAME}
        else
            arb_message "Unknown datatype '$DATA'"
            exit 1
        fi
    fi

    COMMENT=${COMMENT/ALGO=/DIST=$DIST ALGO=}

    if [ -z "$SEED" ]; then
        # seconds since 1970
        SEED=`date +%s`
    fi

    # check inputfiles
    if [ \! -s $SEQFILE ]; then
        arb_message "Missing or empty sequence file '$SEQFILE'"
        exit 1
    fi
    if [ \! -s $WEIGHTS ]; then
        arb_message "Missing or empty weights file '$WEIGHTS'"
        exit 1
    fi

    # generate tree file
    TREEPARAMS=
    HAVEINPUTTREE=0
    if [ "$TREE" != "tree_?????" -a ! -z "$TREE" ]; then # has to match ../TEMPLATES/arb_global_defs.h@NO_TREE_SELECTED
        CMD="arb_export_tree $TREE"
        echo "$CMD"
        echo `$CMD > treefile.in`
        if [ \! -s treefile.in ]; then
            arb_message "Couldn't export tree '$TREE'"
            exit 1
        fi
        if [ "$CONSTRAINT" == "1" ]; then
            TREEPARAMS="-r treefile.in"
        else
            TREEPARAMS="-t treefile.in"
        fi
        HAVEINPUTTREE=1
    else
        if [ "$RANDOMSTART" == "1" ]; then
            TREEPARAMS=-d
        fi
        # otherwise use default parsimony tree
    fi

    # model dependent parameters
    DEPENDENT_PARAMS=
    if [ "$OPTIMIZEPARAMETERS" == "1" ]; then
        if [ "$DATA" == "DNA" ]; then
            case "$RATEMODELNUC" in
                "GTRGAMMA" | "GTRGAMMAI" )
                    DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k"
                    ;;
                * )
                    arb_message "Ignored 'Optimize branches/parameters'\n(only usable with GTRGAMMA)"
                    ;;
            esac
        else
            case "$RATEMODELPROT" in
                  "PROTGAMMA" | "PROTGAMMAI" )
                    DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k"
                    ;;
                * )
                    arb_message "Ignored 'Optimize branches/parameters'\n(only usable with PROTGAMMA)"
                    ;;
            esac
        fi
    fi
    
    if [ "$DATA" == "DNA" ]; then
        if [ "$RATEMODELNUC" == "GTRCAT" ]; then
            DEPENDENT_PARAMS="$DEPENDENT_PARAMS -c $NUMCATEGORIES"
        fi
    fi

    # search algorithm 
    NEEDINPUTTREE=0
    USESINPUTTREE=0
    NEEDGAMMA=0
    GENERATEDTREES=$NUMBEROFRUNS
    MODE=normal

    case "$SEARCH" in
        "e" ) # optimize input tree
            NEEDINPUTTREE=1
            NEEDGAMMA=1
            MODE=optimized
            ;;

        "t" ) # randomized tree searches
            NEEDINPUTTREE=1
            ;;

        "d" ) # new rapid hill climbing
            # modes that use (but don't expect) an inputtree
            USESINPUTTREE=1
            ;;

        "a" ) # rapid bootstrap analysis
            DEPENDENT_PARAMS="$DEPENDENT_PARAMS -k -x $SEED"
            if [ $(($NUMBEROFRUNS)) -lt 2 ]; then
                arb_message "You need to specify at least 2 runs (NUMBEROFRUNS=$NUMBEROFRUNS)"
                exit 1
            fi
            MODE=bootstrapped
            ;;

        "p" ) # add new sequences (MP)
            NEEDINPUTTREE=1
            GENERATEDTREES=1
            if [ "$NUMBEROFRUNS" != "1" ]; then
                arb_message "Ignoring number of runs (only performing 1 run)"
                NUMBEROFRUNS=1
            fi
            TAKETREES=1
            ;;

    esac

    if [ $(($GENERATEDTREES)) -lt $((TAKETREES)) ]; then
        arb_message "Can't take $TAKETREES of $GENERATEDTREES tree (using all)"
        $TAKETREES = $GENERATEDTREES
    fi

    if [ "$NEEDINPUTTREE" != "$HAVEINPUTTREE" ]; then
        if [ "$NEEDINPUTTREE" == "1" ]; then
            arb_message "Please select an input tree"
            exit 1;
        else
            if [ "$USESINPUTTREE" == "0" ]; then
                arb_message "Given input tree ignored"
                # don't use tree:
                TREEPARAMS=
            fi
        fi
    fi

    if [ "$NEEDGAMMA" == "1" ]; then
        if [ "$DATA" == "DNA" ]; then
            case "$RATEMODELNUC" in
                "GTRGAMMA" | "GTRGAMMAI" )
                    ;;
                * )
                    arb_message "Please choose GAMMA substitution model"
                    exit 1
                    ;;
            esac
        else
            case "$RATEMODELPROT" in
                "PROTGAMMA" | "PROTGAMMAI" )
                    ;;
                * )
                    arb_message "Please choose GAMMA rate model"
                    exit 1
                    ;;
            esac
        fi
    fi

    MODELPARAMS=
    if [ "$DATA" == "DNA" ]; then
        MODELPARAMS="-m $RATEMODELNUC"
    else
        MODELPARAMS="-m $RATEMODELPROT$MATRIXNAME"
    fi

    # build parameters for raxml
    RAXMLPARAMS="$MODELPARAMS -f $SEARCH"
    if [ ! -z "$INITIALREARRANGEMENT" ]; then
        RAXMLPARAMS="$RAXMLPARAMS -i $INITIALREARRANGEMENT"
        # otherwise autodetect by RAxML
    fi
    if [ "$NUMBEROFRUNS" != "1" ]; then
        RAXMLPARAMS="$RAXMLPARAMS -N $NUMBEROFRUNS"
    fi

    RUNNAME=viaARB
    RAXMLPARAMS="$RAXMLPARAMS -p $SEED -s $SEQFILE -a $WEIGHTS $TREEPARAMS $DEPENDENT_PARAMS -n $RUNNAME"

    echo "------------------------------------------"
    echo "Calling raxmlHPC $RAXMLPARAMS"
    time raxmlHPC $RAXMLPARAMS
    echo "------------------------------------------"

    # ../PERL_SCRIPTS/ARBTOOLS/raxml2arb.pl
    raxml2arb.pl "$RUNNAME" "$GENERATEDTREES" "$MODE" "$TAKETREES" "$CONSENSE" "$COMMENT"
fi