// =============================================================== //
//                                                                 //
//   File      : arb_test.cxx                                      //
//   Purpose   : unit tester for tools                             //
//                                                                 //
//   Coded by Ralf Westram (coder@reallysoft.de) in February 2011  //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#include <arbdbt.h>
#include <arb_defs.h>
#include <arb_sleep.h>
#include <arb_diff.h>
#include <unistd.h>

int ARB_main(int , char *[]) {
    fputs("don't call us\n", stderr);
    return EXIT_SUCCESS;
}

// --------------------------------------------------------------------------------

#ifdef UNIT_TESTS
#include <arb_file.h>
#include <test_unit.h>
#include <test_runtool.h>

// --------------------------------------------------------------------------------


void TEST_AFTER_SLOW_ascii_2_bin_2_ascii() { // run after TEST_SLOW_loadsave
    const char *ascii_ORG  = "TEST_loadsave_ascii.arb";
    const char *ascii      = "bin2ascii.arb";
    const char *binary     = "ascii2bin.arb";
    const char *binary_2ND = "ascii2bin2.arb";
    const char *binary_3RD = "ascii2bin3.arb";

    // test that errors from _each_ part of a piped command propagate correctly:
    const char *failing_piped_cmds[] = {
        "arb_weirdo | wc -l",      // first command fails
        "echo hello | arb_weirdo", // second command fails
        "arb_weirdo | arb_weirdo", // both commands fail

        "arb_weirdo | wc -l | sort",       // first command fails
        "echo hello | arb_weirdo | sort",  // second command fails
        "echo hello | wc -l | arb_weirdo", // third command fails
    };
    for (unsigned c = 0; c<ARRAY_ELEMS(failing_piped_cmds); ++c) {
        TEST_EXPECT_ERROR_CONTAINS(RUN_TOOL(failing_piped_cmds[c]), "System call failed");
    }

    TEST_RUN_TOOL("arb_2_ascii --help"); // checks proper documentation of available compression flags (in GB_get_supported_compression_flags)

    // test conversion file -> file
    TEST_RUN_TOOL(GBS_global_string("arb_2_bin   %s %s", ascii_ORG, binary));
    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary, ascii));

    TEST_EXPECT_TEXTFILES_EQUAL(ascii, ascii_ORG);

    // test conversion (bin->ascii->bin) via stream (this tests 'arb_repair')
    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s - | arb_2_bin - %s", binary, binary_2ND));
    // TEST_EXPECT_FILES_EQUAL(binary, binary_2ND); // can't compare binary files (binary_2ND differs (keys?))
    // instead convert back to ascii and compare result with original
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary_2ND, ascii));
    TEST_EXPECT_FILES_EQUAL(ascii, ascii_ORG);


    // test same using compression (gzip and bzip2)
    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii -Cz %s - | arb_2_bin -CB - %s", binary, binary_3RD));
    // TEST_EXPECT_FILES_EQUAL(binary, binary_2ND); // can't compare binary files (binary_3RD differs (keys?))
    // instead convert back to ascii and compare result with original
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary_3RD, ascii));
    TEST_EXPECT_FILES_EQUAL(ascii, ascii_ORG);

    TEST_EXPECT_ERROR_CONTAINS(RUN_TOOL("arb_2_ascii -Cq -"), "System call failed"); // "Unknown compression flag 'q'"

    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary_2ND));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary_3RD));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin.ARF"));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin2.ARF"));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin3.ARF"));
}

void TEST_arb_primer() {
    const char *primer_db       = "TEST_nuc.arb";
    const char *primer_stdin    = "tools/arb_primer.in";
    const char *primer_out      = "tools/arb_primer.out";
    const char *primer_expected = "tools/arb_primer_expected.out";

    TEST_RUN_TOOL(GBS_global_string("arb_primer %s < %s", primer_db, primer_stdin));
    TEST_EXPECT_FILES_EQUAL(primer_out, primer_expected);
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(primer_out));
}

static GB_ERROR removeVaryingDateFromTreeRemarks(const char *dbname) {
    GB_ERROR  error     = NULp;
    GB_shell  shell;
    GBDATA   *gb_main   = GB_open(dbname, "rw");
    if (!gb_main) error = GB_await_error();
    else {
        {
            GB_transaction ta(gb_main);

            GBDATA     *gb_tree_data    = GBT_get_tree_data(gb_main);
            const char *truncate_after  = "\nunittest-tree\n";
            size_t      truncate_offset = strlen(truncate_after);

            if (!gb_tree_data) error = GB_await_error();
            else {
                for (GBDATA *gb_tree = GB_child(gb_tree_data);
                     gb_tree && !error;
                     gb_tree = GB_nextChild(gb_tree))
                {
                    GBDATA *gb_remark = GB_entry(gb_tree, "remark");
                    if (!gb_remark) {
                        error = "could not find 'remark' entry";
                    }
                    else {
                        char *remark = GB_read_string(gb_remark);
                        char *found  = strstr(remark, truncate_after);

                        if (found) {
                            strcpy(found+truncate_offset, "<date removed for testing>");
                            error                  = GB_write_string(gb_remark, remark);
                        }
                        free(remark);
                    }
                }
            }

            ta.close(error);
        }
        if (!error) error = GB_save_as(gb_main, dbname, "a");
        GB_close(gb_main);
    }
    return error;
}

// #define TEST_AUTO_UPDATE_TREE // uncomment to auto-update expected tree

void TEST_SLOW_arb_read_tree() {
    struct {
        const char *basename;
        const char *extraArgs;
    }
    run[] = {
        { "newick",           "" },
        { "newick_sq",        "-commentFromFile general/text.input" },
        { "newick_dq",        "-scale 0.5" },
        { "newick_group",     "-scale 10 -consense 10" },
        { "newick_len",       "" },
        { "newick_len_group", "" },
    };

    const char *dbin       = "min_ascii.arb";
    const char *dbout      = "tools/read_tree_out.arb";
    const char *dbexpected = "tools/read_tree_out_expected.arb";

    for (size_t b = 0; b<ARRAY_ELEMS(run); ++b) {
        const char *basename  = run[b].basename;
        const char *extraArgs = run[b].extraArgs;
        char       *treefile  = GBS_global_string_copy("tools/%s.tree", basename);
        char       *treename  = GBS_global_string_copy("tree_%s", basename);

        TEST_RUN_TOOL(GBS_global_string("arb_read_tree -db %s %s %s %s \"test %s\" %s",
                                                   dbin, dbout, treename, treefile, basename, extraArgs));

        dbin = dbout; // use out-db from previous loop ( = write all trees into one db)

        free(treename);
        free(treefile);
    }

    TEST_EXPECT_NO_ERROR(removeVaryingDateFromTreeRemarks(dbout));
#if defined(TEST_AUTO_UPDATE_TREE)
    TEST_COPY_FILE(dbout, dbexpected);
#else // !defined(TEST_AUTO_UPDATE_TREE)
    TEST_EXPECT_TEXTFILES_EQUAL(dbexpected, dbout);
#endif
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(dbout));
}

#define TEST_ARB_REPLACE(infile,expected,args) do {                     \
        char *tmpfile = GBS_global_string_copy("%s.tmp", expected);     \
        TEST_RUN_TOOL_NEVER_VALGRIND(GBS_global_string("cp %s %s", infile, tmpfile));  \
        TEST_RUN_TOOL(GBS_global_string("arb_replace %s %s", args, tmpfile)); \
        TEST_EXPECT_TEXTFILES_EQUAL(tmpfile, expected);                 \
        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(tmpfile));             \
        free(tmpfile);                                                  \
    } while(0)

void TEST_arb_replace() {
    const char *infile = "tools/arb_replace.in";
    const char *file1  = "tools/arb_replace_1.out";
    const char *file2  = "tools/arb_replace_2.out";

    TEST_ARB_REPLACE(infile, "tools/arb_replace_1.out", "'gene=GONE'");
    TEST_ARB_REPLACE(file1,  infile,                    "-l 'GONE=gene'");
    TEST_ARB_REPLACE(file1,  file2,                     "-L 'GONE=gene:\"*\"=( * )'");
}

// --------------------------------------------------------------------------------

#include "command_output.h"

void TEST_arb_message() {
    TEST_STDERR_CONTAINS("arb_message \"this is the test message\"",
                         "arb_message: this is the test message\n");
}

void TEST_SLOW_arb_probe() {
    // test called here currently are duplicating the tests in
    // arb_probe.cxx@TEST_SLOW_match_probe
    // and arb_probe.cxx@TEST_SLOW_design_probe
    //
    // Here test of functionality is secondary.
    // The primary goal here is to test calling the tools (i.e. arb_probe)

    TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");
    TEST_STDOUT_EQUALS("arb_probe"
                       " serverid=-666"
                       " matchsequence=UAUCGGAGAGUUUGA",

                       /* ---- */ "    name---- fullname mis N_mis wmis pos ecoli rev          'UAUCGGAGAGUUUGA'\1"
                       "BcSSSS00\1" "  BcSSSS00            0     0  0.0   3     2 0   .......UU-===============-UCAAGUCGA\1"
        );

    TEST_STDOUT_EQUALS("arb_probe"
                       " serverid=-666"
                       " designnames=ClnCorin#CltBotul#CPPParap#ClfPerfr"
                       " designmintargets=100",

                       "Probe design parameters:\n"
                       "Length of probe    18\n"
                       "Temperature        [ 0.0 -400.0]\n"
                       "GC-content         [30.0 - 80.0]\n"
                       "E.Coli position    [any]\n"
                       "Max. nongroup hits 0\n"
                       "Min. group hits    100% (max. rejected coverage: 75%)\n"
                       "Target             le apos ecol qual grps   G+C temp     Probe sequence | Decrease T by n*.3C -> probe matches n non group species\n"
                       "CGAAAGGAAGAUUAAUAC 18 A=94   82   77    4  33.3 48.0 GUAUUAAUCUUCCUUUCG | - - - - - - - - - - - - - - - - - - - -\n"
                       "GAAAGGAAGAUUAAUACC 18 A+ 1   83   77    4  33.3 48.0 GGUAUUAAUCUUCCUUUC | - - - - - - - - - - - - - - - - - - - -\n"
                       "UCAAGUCGAGCGAUGAAG 18 B=18   17   61    4  50.0 54.0 CUUCAUCGCUCGACUUGA | - - - - - - - - - - - - - - - 2 2 2 2 2\n"
                       "AUCAAGUCGAGCGAUGAA 18 B- 1   16   45    4  44.4 52.0 UUCAUCGCUCGACUUGAU | - - - - - - - - - - - 2 2 2 2 2 2 2 2 2\n"
                       );
}

void TEST_SLOW_arb_probe_match() {
    TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");

    // this probe-match is also tested with 'arb_probe'. see arb_probe.cxx@TEST_arb_probe_match
    TEST_STDOUT_EQUALS("arb_probe_match"
                       " --port :../sok/pt.socket"
                       " --n-matches 0"
                       " --n-match-bound 4"
                       " --mismatches 3"
                       " --sequence GAGCGGUCAG",

                       "acc     \t"     "start\t" "stop\t" "pos\t" "mis\t" "wmis\t" "nmis\t" "dt\t" "rev\t" "seq\n"
                       "ARB_2CA9F764\t" "0\t"     "0\t"    "24\t"  "1\t"   "1.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======A===-AUGGGAGCU\t" "\n"
                       "ARB_6B04C30A\t" "10\t"    "20\t"   "24\t"  "2\t"   "2.2\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======A=C=-ACGGGAGCU\t" "\n"
                       "ARB_4C6C9E8C\t" "20\t"    "170\t"  "67\t"  "3\t"   "2.4\t"  "0\t"    "0\t"  "0\t"   "GGAUUUGUU-=g====CG==-CGGCGGACG\t" "\n"
                       "ARB_948948A3\t" "0\t"     "0\t"    "81\t"  "3\t"   "2.8\t"  "0\t"    "0\t"  "0\t"   "ACGAGUGGC-=gA===C===-UUGGAAACG\t" "\n"
                       "ARB_5BEE4C92\t" "0\t"     "0\t"    "85\t"  "3\t"   "3.2\t"  "0\t"    "0\t"  "0\t"   "CGGCGGGAC-=g==CU====-AACCUGCGG\t" "\n"
                       "ARB_2180C521\t" "0\t"     "0\t"    "24\t"  "3\t"   "3.6\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======Aa=C-GAUGGAAGC\t" "\n"
                       "ARB_815E94DB\t" "0\t"     "0\t"    "94\t"  "3\t"   "3.6\t"  "0\t"    "0\t"  "0\t"   "GGACUGCCC-==Aa==A===-CUAAUACCG\t" "\n"
                       "ARB_948948A3\t" "0\t"     "0\t"    "24\t"  "3\t"   "4\t"    "0\t"    "0\t"  "0\t"   "GAUCAAGUC-==A====a=C-AGGUCUUCG\t" "\n"
                       "ARB_9E1D1B16\t" "0\t"     "0\t"    "28\t"  "3\t"   "4\t"    "0\t"    "0\t"  "0\t"   "GAUCAAGUC-==A====a=C-GGGAAGGGA\t" "\n"
                       "ARB_CEB24FD3\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUCCUUCG\t" "\n"
                       "ARB_4FCDD74F\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   ".AUCAAGUC-=====A=G=A-GCUUCUUCG\t" "\n"
                       "ARB_CF69AC5C\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUCCUUCG\t" "\n"
                       "ARB_5BEE4C92\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUUCCUUC\t" "\n"
                       "ARB_815E94DB\t" "0\t"     "0\t"    "156\t" "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GUAGCCGUU-===GAA====-CGGCUGGAU\t" "\n"
                       "ARB_1763CF6\t"  "0\t"     "0\t"    "24\t"  "3\t"   "2.4\t"  "3\t"    "0\t"  "0\t"   "GAUCAAGUC-=======...-<more>\t" "\n"
                       "ARB_ED8B86F\t"  "0\t"     "0\t"    "28\t"  "3\t"   "2.4\t"  "3\t"    "0\t"  "0\t"   "GAUCAAGUC-=======...-<more>\t" "\n"
        );
}

#define IN_DB     "tools/dnarates.arb"
#define OUT_DB    "tools/dnarates_result.arb"
#define WANTED_DB "tools/dnarates_expected.arb"

// #define TEST_AUTO_UPDATE_SAI // uncomment to auto-update expected SAI

void TEST_SLOW_arb_dna_rates() {
    TEST_STDOUT_CONTAINS("arb_dnarates tools/dnarates.inp " IN_DB " " OUT_DB, "\nWriting 'POS_VAR_BY_ML_1'\n");

#if defined(TEST_AUTO_UPDATE_SAI)
    TEST_COPY_FILE(OUT_DB, WANTED_DB);
#else // !defined(TEST_AUTO_UPDATE_SAI)
    TEST_EXPECT_TEXTFILES_EQUAL(WANTED_DB, OUT_DB);
#endif
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(OUT_DB));
}

#define RATES_DB "tools/exportrates.arb"

void TEST_arb_export_rates() {
    // Note: just testing against regression here.
    // Since the output is quite longish, we just test the checksums of the results.
    //
    // If one of the checksums changes unexpectedly and you want to see more details about the change,
    // - go back to a revision with a correct checksum,
    // - add passing TEST_OUTPUT_EQUALS for broken command and
    // - move that test to broken revision.

    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " POS_VAR_BY_PARSIMONY", 0xc75a5fad);
    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " -r POS_VAR_BY_PARSIMONY", 0xd69fb01e);
    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " -r \"\"", 0xad0461ce);
}

#define TREE_DB "tools/tree.arb"

void TEST_arb_export_tree() {
    TEST_STDOUT_EQUALS("arb_export_tree tree_mini " TREE_DB,
                       "((( 'VibFurni' :0.02952, 'VibVulni' :0.01880):0.04015, 'VibChole' :0.03760):1.00000,( 'AcnPleur' :0.12011, 'PrtVulga' :0.06756):1.00000, 'HlmHalod' :1.00000);\n");
    TEST_STDOUT_EQUALS("arb_export_tree --bifurcated tree_mini " TREE_DB,
                       "(((( 'VibFurni' :0.02952, 'VibVulni' :0.01880):0.04015, 'VibChole' :0.03760):0.04610,( 'AcnPleur' :0.12011, 'PrtVulga' :0.06756):0.01732):0.07176, 'HlmHalod' :0.12399);\n");
    TEST_STDOUT_EQUALS("arb_export_tree --doublequotes tree_mini " TREE_DB,
                       "((( \"VibFurni\" :0.02952, \"VibVulni\" :0.01880):0.04015, \"VibChole\" :0.03760):1.00000,( \"AcnPleur\" :0.12011, \"PrtVulga\" :0.06756):1.00000, \"HlmHalod\" :1.00000);\n");

    TEST_STDOUT_EQUALS("arb_export_tree --nobranchlens tree_mini " TREE_DB,
                       "((( 'VibFurni'  'VibVulni' ) 'VibChole' ),( 'AcnPleur'  'PrtVulga' ), 'HlmHalod' );\n");
    TEST_EXPECT__BROKEN(0); // the test above returns a wrong result (commas are missing)

    TEST_OUTPUT_EQUALS("arb_export_tree \"\" " TREE_DB,
                       ";\n",                                                                    // shall export an empty newick tree
                       "");                                                                      // without error!
    TEST_OUTPUT_EQUALS("arb_export_tree tree_nosuch " TREE_DB,
                       ";\n",                                                                    // shall export an empty newick tree
                       "arb_export_tree from '" TREE_DB "': ARB ERROR: Failed to read tree 'tree_nosuch' (Reason: tree not found)\n"); // with error!
}
TEST_PUBLISH(TEST_arb_export_tree);

// --------------------------------------------------------------------------------

// #define TEST_AUTO_UPDATE_EXP_SEQ // uncomment to auto-update expected sequence exports

#define EXPECTED(file) file ".expected"
#if defined(TEST_AUTO_UPDATE_EXP_SEQ)
#define UPDATE_OR_COMPARE(outfile) TEST_COPY_FILE(outfile, EXPECTED(outfile))
#else // !defined(TEST_AUTO_UPDATE_EXP_SEQ)
#define UPDATE_OR_COMPARE(outfile) TEST_EXPECT_TEXTFILES_EQUAL(outfile, EXPECTED(outfile))
#endif
#define TEST_OUTFILE_EXPECTED(outfile) do{                     \
        UPDATE_OR_COMPARE(outfile);                            \
        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile));    \
    }while(0)

#define TEST_OUTFILE_EXPECTED__BROKEN(outfile) do{                       \
        TEST_EXPECT_TEXTFILES_EQUAL__BROKEN(outfile, EXPECTED(outfile)); \
        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile));              \
    }while(0)

#define SEQ_DB          "TEST_loadsave.arb"
#define TEMPLATE_DB     "tools/min_template.arb"
#define EFT             "../../lib/export/fasta_wide.eft" // ../lib/export/fasta_wide.eft
#define EXSEQ_EFT       "tools/exseq_via_eft.fasta"
#define EXSEQ_FASTA     "tools/exseq.fasta"
#define EXSEQ_ARB       "tools/exseq.arb"
#define EXSEQ_ARB_ASCII "tools/exseq_ascii.arb"
#define EXSEQ_RESTRICT  "tools/acc.list"

void TEST_arb_export_sequences() {
    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format FASTA   --dest " EXSEQ_FASTA);
    TEST_OUTFILE_EXPECTED(EXSEQ_FASTA);

    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format " EFT " --dest " EXSEQ_EFT   " --accs " EXSEQ_RESTRICT);
    TEST_OUTFILE_EXPECTED(EXSEQ_EFT);

    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format ARB     --dest " EXSEQ_ARB   " --arb-template " TEMPLATE_DB
                  " && "
                  "arb_2_ascii " EXSEQ_ARB " " EXSEQ_ARB_ASCII
        );
    TEST_OUTFILE_EXPECTED(EXSEQ_ARB_ASCII);
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(EXSEQ_ARB));
}

#define FILTSEQ_DB              "TEST_prot_tiny.arb" // ../UNIT_TESTER/run/TEST_prot_tiny.arb
#define EXFLT_DNA_UNFILTERED    "tools/exflt_DNA_unfiltered.fasta" // ../UNIT_TESTER/run/tools/
#define EXFLT_AMI_UNFILTERED    "tools/exflt_AMI_unfiltered.fasta"
#define EXFLT_DNA_PARTIAL_ALI   "tools/exflt_DNA_partial.fasta"
#define EXFLT_AMI_SKIP_SHORT    "tools/exflt_AMI_skipShort.fasta"
#define EXFLT_DNA_PVP_FILT_GAPS "tools/exflt_DNA_pvp_gaps.fasta"
#define EXFLT_DNA_PVP_FILT_G03  "tools/exflt_DNA_pvp.fasta"
#define EXFLT_DNA_PVP_FILT_G05  "tools/exflt_DNA_pvp_g05.fasta"
#define EXFLT_DNA_MF_FILT       "tools/exflt_DNA_mf.fasta"
#define EXFLT_DNA_MFPVP_FILT    "tools/exflt_DNA_mfpvp.fasta"
#define EXFLT_AMI_MF_FILT       "tools/exflt_AMI_mf.fasta"
#define EXFLT_EMPTY             "tools/exflt_empty.fasta"
#define EXFLT_NOTSAVED          "tools/exflt_notSaved.fasta"

void TEST_arb_filtered_sequence_export() {
    // see also ../SL/FILTSEQEXP/FilteredExport.cxx@SAI_FILTERED_EXPORT_TESTS
    // ./arb_export_seq_filtered.cxx@show_help

#define EXPORT_FILTERED_TO "arb_export_seq_filtered --db " FILTSEQ_DB " --fasta "

    // fail to export from missing database:
    TEST_FAILURE_OUTPUT_CONTAINS("arb_export_seq_filtered --db " FILTSEQ_DB "xxx --fasta " EXFLT_NOTSAVED,
                                 NULp,
                                 "Database 'TEST_prot_tiny.arbxxx' not found");

    // export default alignment ('ali_dna'):
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_UNFILTERED " --id \"readdb(name);\\\" \\\";readdb(acc)\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_UNFILTERED);

    // export 'ali_prot':
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_UNFILTERED " --ali ali_prot");
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_UNFILTERED);

    // fail to export unknown alignment:
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_nosuch",
                                 NULp,
                                 "alignment 'ali_nosuch' not found");

    // fail if not all species contain data
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_dna_incomplete",
                                 NULp,
                                 "species 'TaxOcell' has no data in 'ali_dna_incomplete'");

    // fail if ACI is broken
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --id \"readdb(name)|invalid\"",
                                 NULp,
                                 "Command 'readdb(name)|invalid' failed:\nReason: Unknown command 'invalid'");

    // export 'ali_dna_incomplete' (where only 1 species has data):
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PARTIAL_ALI " --ali ali_dna_incomplete --accept-missing-data");
    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_DNA_PARTIAL_ALI " --ali ali_dna_incomplete --accept-missing-data", true).Contains(NULp, "Skipped species 'StrCoel9' (Reason: has no data)"));
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PARTIAL_ALI);

    // skip "short" species:
    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_AMI_SKIP_SHORT " --ali ali_prot --min-bases 62", true).Contains(NULp, "Skipped species 'BctFra12' (Reason: not enough base-characters left)")); // below 62 less species are filtered
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_SKIP_SHORT);
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_UNFILTERED " --ali ali_prot --min-bases 57"); // all sequences contain 57 AA
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_UNFILTERED);
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_EMPTY " --ali ali_prot --min-bases 94");          // no sequence contains 94 AA
    TEST_OUTFILE_EXPECTED(EXFLT_EMPTY);

    // pass custom bases to count
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_SKIP_SHORT " --ali ali_prot --min-bases 62 --count-bases \"AC-IK-NP-TV-Y\"");
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_SKIP_SHORT);
    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_EMPTY " --count-bases \"x-z\" --min-bases 1", true).Contains(NULp, "warning: generated empty file")); // no data contains 'xyz' -> skips all
    TEST_OUTFILE_EXPECTED(EXFLT_EMPTY);

    // apply SAI filters:
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_prot --filterby \"POS_VAR_BY_PARSIMONY\" --block \"-.=012345\"",
                                 NULp,
                                 "SAI 'POS_VAR_BY_PARSIMONY' has no data in alignment 'ali_prot'");

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT_G03  " --filterby POS_VAR_BY_PARSIMONY --block \"-.=0123\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT_G03);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT_GAPS " --filterby POS_VAR_BY_PARSIMONY --pass allbut \"-.=\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT_GAPS);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT_G03  " --filterby POS_VAR_BY_PARSIMONY --pass allbut \"-.=0123\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT_G03);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT_G03  " --filterby POS_VAR_BY_PARSIMONY --pass allbut \".-=0123\""); // ".-=" no longer interpreted as range
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT_G03);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT_G05  " --filterby POS_VAR_BY_PARSIMONY --pass allbut \"-.=012345\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT_G05);

    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED "  --filterby POS_VAR_BY_PARSIMONY --pass allbut \"-.=0123456789\"",
                                 NULp,
                                 "Sequence completely filtered out");

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT " --filterby MAX_FREQUENCY --pass \"789\"");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT " --filterby MAX_FREQUENCY --pass \"7-9\""); // use char-range
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_MF_FILT " --ali ali_prot --filterby MAX_FREQUENCY --block allbut 5-9");
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_MF_FILT);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_MF_FILT " --ali ali_prot --filterby MAX_FREQUENCY --pass \"5-9\"");
    TEST_OUTFILE_EXPECTED(EXFLT_AMI_MF_FILT);

    // test failure of block/pass w/o filterby:
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --block \"-.=012345\"",
                                 NULp,
                                 "--pass and --block have to be preceeded by --filterby");
    // test failure of filterby w/o block/pass:
    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --filterby \"POS_VAR_BY_PARSIMONY\"",
                                 NULp,
                                 "--filterby has to be followed by --pass or --block");

    // apply combined filters:
    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT
                  " --filterby MAX_FREQUENCY --pass 0-9"
                  " --filterby MAX_FREQUENCY --block allbut 987");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);

    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MFPVP_FILT
                  " --seqpp :.=-"
                  " --filterby MAX_FREQUENCY        --pass 7-9"
                  " --filterby POS_VAR_BY_PARSIMONY --pass 4-7");
    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MFPVP_FILT);

}

// --------------------------------------------------------------------------------

#undef UPDATE_OR_COMPARE

// #define TEST_AUTO_UPDATE_EXP_PVP // uncomment to auto-update expected pvp data

#if defined(TEST_AUTO_UPDATE_EXP_PVP)
#define UPDATE_OR_COMPARE(outfile) TEST_COPY_FILE(outfile, EXPECTED(outfile))
#else // !defined(TEST_AUTO_UPDATE_EXP_SEQ)
#define UPDATE_OR_COMPARE(outfile) TEST_EXPECT_TEXTFILES_EQUAL(outfile, EXPECTED(outfile))
#endif

#define TEST_DBPART_EQUALS(sub,saveName)                                       \
    TEST_RUN_TOOL("arb_sub2ascii pvp/calc.arb " sub " pvp/" saveName ".dump"); \
    TEST_OUTFILE_EXPECTED("pvp/" saveName ".dump")

#define TEST_CALC_PVP(treeName,addArgs,sub,saveName)                                                            \
    TEST_RUN_TOOL("arb_calc_pvp TEST_nuc.arb --savename pvp/calc.arb " treeName " ali_16s " addArgs);           \
    TEST_DBPART_EQUALS(sub,saveName)

void TEST_arb_calc_pvp() {
    // see also ../SL/PVP/pvp.cxx@TEST_pvp
    // ./arb_calc_pvp.cxx@show_help

    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp",         NULp, "argument <database> has not been provided");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db",      NULp, "argument <treename>");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree", NULp, "argument <aliname>");

    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree_bla ali_123 xtra",      NULp, "too many arguments");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree_bla ali_123",           NULp, "Database 'db' not found");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp TEST_nuc.arb tree_bla ali_123", NULp, "alignment 'ali_123' not found");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp TEST_nuc.arb tree_bla ali_16s", NULp, "Failed to read tree 'tree_bla'");

    // expected results in ../UNIT_TESTER/run/pvp

    TEST_CALC_PVP("tree_nuc",    "",                                     "extended_data", "standard");          // use tree in DB
    TEST_CALC_PVP("tree_import", "--tree-import trees/nuc_flipped.tree", "extended_data", "flippedTreeImported"); // load tree from file
    TEST_DBPART_EQUALS("tree_data/tree_import/node",     "gottree");                                            // test imported tree was saved to DB (just a hacked test, otherwise problems with date in remark)
    TEST_CALC_PVP("tree_nuc",    "--tree-import trees/nuc_flipped.tree", "extended_data", "standard");          // load + overwrite tree
    TEST_CALC_PVP("tree_nuc",    "--tree-delete",                        "tree_data",     "notree");            // use tree in DB + delete tree
    TEST_CALC_PVP("tree_nuc",    "--sainame custPVP",                    "extended_data", "custom");            // test alternate SAI-name

    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("pvp/calc.arb"));
    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("pvp/calc.ARF"));
}

// --------------------------------------------------------------------------------

#define ARB_EXPORT_NEWICK_CMD  "arb_export_newick --tree tree_test --newick-file /dev/stdout --db tools/export_newick.arb "
#define ARB_EXPORT_NEWICK_CMD2 "arb_export_newick --tree tree_test --newick-file test.tree "


void TEST_arb_export_newick() {
    // using contains instead of equals - did not find a way to suppress the
    // tree comment or at least the date in it
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD,
                         "((((((((((LacViri2,\nLacVirid\n),\nLacBulga\n),\nLacPlant\n),\nLacBrevi\n),\nEntFaeca\n),\n((StaAureu,\nStaEpide\n),\nAnaAbact\n)\n),\n(((CloTyro3,\nCloTyro4\n),\nCloTyro2\n),\nCloTyrob\n)\n),\n(BacMegat,\nBacPaste\n)\n),\n(PlaCitre,\nPlaKocur\n)\n),\n((BacSpec2,\nBacSubt2\n),\nBacLich2\n)\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--quoting none",
                         "((((((((((LacViri2,\nLacVirid\n),\nLacBulga\n),\nLacPlant\n),\nLacBrevi\n),\nEntFaeca\n),\n((StaAureu,\nStaEpide\n),\nAnaAbact\n)\n),\n(((CloTyro3,\nCloTyro4\n),\nCloTyro2\n),\nCloTyrob\n)\n),\n(BacMegat,\nBacPaste\n)\n),\n(PlaCitre,\nPlaKocur\n)\n),\n((BacSpec2,\nBacSubt2\n),\nBacLich2\n)\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--quoting single",
                         "(((((((((('LacViri2',\n'LacVirid'\n),\n'LacBulga'\n),\n'LacPlant'\n),\n'LacBrevi'\n),\n'EntFaeca'\n),\n(('StaAureu',\n'StaEpide'\n),\n'AnaAbact'\n)\n),\n((('CloTyro3',\n'CloTyro4'\n),\n'CloTyro2'\n),\n'CloTyrob'\n)\n),\n('BacMegat',\n'BacPaste'\n)\n),\n('PlaCitre',\n'PlaKocur'\n)\n),\n(('BacSpec2',\n'BacSubt2'\n),\n'BacLich2'\n)\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--quoting double",
                         "((((((((((\"LacViri2\",\n\"LacVirid\"\n),\n\"LacBulga\"\n),\n\"LacPlant\"\n),\n\"LacBrevi\"\n),\n\"EntFaeca\"\n),\n((\"StaAureu\",\n\"StaEpide\"\n),\n\"AnaAbact\"\n)\n),\n(((\"CloTyro3\",\n\"CloTyro4\"\n),\n\"CloTyro2\"\n),\n\"CloTyrob\"\n)\n),\n(\"BacMegat\",\n\"BacPaste\"\n)\n),\n(\"PlaCitre\",\n\"PlaKocur\"\n)\n),\n((\"BacSpec2\",\n\"BacSubt2\"\n),\n\"BacLich2\"\n)\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--quoting single --add-branch-lengths --add-bootstraps --add-group-names",
                         "(((((((((('LacViri2':0.01905,\n'LacVirid':0.01905\n):0.17308,\n'LacBulga':0.14286\n)'19':0.03261,\n'LacPlant':0.05714\n)'36':0.00990,\n'LacBrevi':0.06667\n)'19':0.02985,\n'EntFaeca':0.09524\n):1.35323,\n(('StaAureu':0.06087,\n'StaEpide':0.04348\n)'99':0.08929,\n'AnaAbact':0.15044\n)'21:xx':0.01923\n)'35':0.01163,\n((('CloTyro3':1.04569,\n'CloTyro4':0.06061\n)'40':0.02581,\n'CloTyro2':0.01732\n)'0':0.01717,\n'CloTyrob':0.00866\n)'97:test':0.06639\n)'0':0.00000,\n('BacMegat':0.04386,\n'BacPaste':0.04348\n)'53':0.02667\n)'outer':0.14592,\n('PlaCitre':0.00862,\n'PlaKocur':0.01724\n):0.13913\n)'0':0.00889,\n(('BacSpec2':0.05714,\n'BacSubt2':0.25743\n):1.21875,\n'BacLich2':0.15652\n)'0':0.04571\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--pretty",
                         "(\n  (\n    (\n      (\n        (\n          (\n            (\n              (\n                (\n                  (\n                    LacViri2,\n                    LacVirid\n                  ),\n                  LacBulga\n                ),\n                LacPlant\n              ),\n              LacBrevi\n            ),\n            EntFaeca\n          ),\n          (\n            (\n              StaAureu,\n              StaEpide\n            ),\n            AnaAbact\n          )\n        ),\n        (\n          (\n            (\n              CloTyro3,\n              CloTyro4\n            ),\n            CloTyro2\n          ),\n          CloTyrob\n        )\n      ),\n      (\n        BacMegat,\n        BacPaste\n      )\n    ),\n    (\n      PlaCitre,\n      PlaKocur\n    )\n  ),\n  (\n    (\n      BacSpec2,\n      BacSubt2\n    ),\n    BacLich2\n  )\n);\n");
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--replace-problem-chars --leaf-aci \"readdb(\\\"ber_tax\\\")\"",
                         "((((((((((PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\n((PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA_|MOLLICUTES__ACHOLEPLASMATALES__ANAEROPLASMATACEAE.\n)\n),\n(((PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n)\n),\n(PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n)\n),\n(PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n)\n),\n((PROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.,\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n),\nPROKARYOTA__EUBACTERIA__GRAMPOSITIVES__CLOSTRIDIOBACTERIA.\n)\n);\n");

    // The following format is required by the SILVA pipeline.
    TEST_STDOUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--quoting single --add-branch-lengths --leaf-aci \"readdb(\\\"acc\\\");\\\".\\\";readdb(\\\"start\\\");\\\".\\\";readdb(\\\"stop\\\");merge\"",
                         "(((((((((('ARB_3951DE2B.1.117':0.01905,\n'ARB_C57D21E1.1.117':0.01905\n):0.17308,\n'ARB_9B3FAE39.1.117':0.14286\n):0.03261,\n'ARB_F2164787.1.117':0.05714\n):0.00990,\n'ARB_8C51921E.1.117':0.06667\n):0.02985,\n'ARB_831183A2.1.117':0.09524\n):1.35323,\n(('ARB_9F013D16.1.115':0.06087,\n'ARB_E5492501.1.115':0.04348\n):0.08929,\n'ARB_30492909.1.113':0.15044\n):0.01923\n):0.01163,\n((('ARB_D9683416.1.117':1.04569,\n'ARB_3A0C4A19.1.117':0.06061\n):0.02581,\n'ARB_44CFBEF7.1.116':0.01732\n):0.01717,\n'ARB_9B15850C.1.116':0.00866\n):0.06639\n):0.00000,\n('ARB_2F6EC950.1.116':0.04386,\n'ARB_1E14162F.1.117':0.04348\n):0.02667\n):0.14592,\n('ARB_FDEBCF58.1.116':0.00862,\n'ARB_CE41B26E.1.116':0.01724\n):0.13913\n):0.00889,\n(('ARB_B9CB66D2.1.116':0.05714,\n'ARB_914AF372.1.116':0.25743\n):1.21875,\n'ARB_15DC3437.1.116':0.15652\n):0.04571\n);\n");

    // very briefly test if help is supported
    TEST_FAILURE_OUTPUT_CONTAINS(ARB_EXPORT_NEWICK_CMD "--help", (const char *)NULp, "Usage: arb_export_newick");

    // missing mandatory arguments
    TEST_FAILURE_OUTPUT_CONTAINS("arb_export_newick ",                               (const char *)NULp, "Error: no input database specified\n");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_export_newick --db DUMMY_DB",                  (const char *)NULp, "Error: no tree name specified\n");
    TEST_FAILURE_OUTPUT_CONTAINS("arb_export_newick --db DUMMY_DB --tree tree_test", (const char *)NULp, "Error: no output file specified\n");

    // illegal arguments
    TEST_FAILURE_OUTPUT_CONTAINS(ARB_EXPORT_NEWICK_CMD2 "--db DUMMY_DB --quoting n0N€",                                (const char *)NULp, "unknown quoting mode 'n0N€'");
    TEST_FAILURE_OUTPUT_CONTAINS(ARB_EXPORT_NEWICK_CMD2 "--db DUMMY_DB --illegal-command",                             (const char *)NULp, "unexpected argument '--illegal-command'");
    TEST_FAILURE_OUTPUT_CONTAINS(ARB_EXPORT_NEWICK_CMD2 "--db DUMMY_DB                --leaf-aci \"illegal-command\"", (const char *)NULp, "Database 'DUMMY_DB' not found (While opening 'DUMMY_DB': No such file or directory)");
    TEST_FAILURE_OUTPUT_CONTAINS(ARB_EXPORT_NEWICK_CMD2 "--db tools/export_newick.arb --leaf-aci \"illegal-command\"", (const char *)NULp, "Command 'illegal-command' failed:\nReason: Unknown command 'illegal-command'");

    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("test.tree"));
}
TEST_PUBLISH(TEST_arb_export_newick);

#endif // UNIT_TESTS

