// =============================================================== //
//                                                                 //
//   File      : arb_2_bin.cxx                                     //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //


#include <arbdbt.h>
#include <TreeRead.h>
#include <TreeNode.h>

int ARB_main(int argc, char *argv[]) {
    GB_ERROR error = NULp;

    fprintf(stderr, "arb_2_bin - ARB database ascii to binary converter\n");

    if (argc <= 1 || strcmp(argv[1], "--help") == 0) {
        fprintf(stderr,
                "\n"
                "Purpose: Converts a database to binary format\n"
                "Syntax:  arb_2_bin [Options] database [newdatabase]\n"
                "Options: -m            create map file too\n"
                "         -r            try to repair destroyed database\n"
                "         -c[tree_xxx]  optimize database using tree_xxx or largest tree\n"
                "         -C<type>      use extra compression\n"
                "                       (known <type>s: %s)\n"
                "         -e <treefile> optimize the database using the tree from the\n"
                "                       file. The tree is not stored in the database.\n"
                "\n"
                "database my be '-' in which case arb_2_bin reads from stdin.\n"
                "\n",
                GB_get_supported_compression_flags(true));

        if (argc>1 && strcmp(argv[1], "--help") != 0) { error = "Missing arguments"; }
    }
    else {
        char rtype[256];
        char wtype[256];
        int  ci   = 1;
        int  nidx = 0;

        const char *opt_tree = NULp;
        const char *ext_tree = NULp;

        {
            char *rtypep = rtype;
            char *wtypep = wtype;

            memset(rtype, 0, 10);
            memset(wtype, 0, 10);
            *(wtypep++) = 'b';
            *(rtypep++) = 'r';
            *(rtypep++) = 'w';

            while (argv[ci] && argv[ci][0] == '-' && argv[ci][1] != 0 && !error) {
                if      (!strcmp(argv[ci], "-m")) { ci++; *(wtypep++) = 'm'; }
                else if (!strcmp(argv[ci], "-r")) { ci++; *(rtypep++) = 'R'; }
                else if (!strncmp(argv[ci], "-c", 2)) { opt_tree = argv[ci]+2; ci++; }
                else if (!strncmp(argv[ci], "-i", 2)) { nidx = atoi(argv[ci]+2); ci++; }
                else if (!strncmp(argv[ci], "-C", 2)) {
                    char cflag = argv[ci][2]; ci++;
                    if (!strchr(GB_get_supported_compression_flags(false), cflag)) {
                        error = GBS_global_string("Unknown compression flag '%c'", cflag);
                    }
                    else {
                        *(wtypep++) = cflag;
                    }
                }
                else if (!strcmp(argv[ci], "-e")) { ci++; ext_tree = argv[ci]; ci++; }
                else {
                    error = GBS_global_string("Unknown argument '%s'", argv[ci]);
                    break;
                }
            }
        }

        const char *in  = argv[ci++];
        const char *out = ci >= argc ? in : argv[ci++];

        if (!error && !in) error = "missing arguments";

        if (!error) {
            printf("Reading database...\n");
            GB_shell  shell;
            GBDATA   *gb_main = GBT_open(in, rtype);
            if (!gb_main) {
                error = GB_await_error();
            }
            else {
                TreeNode *tree = NULp;

                if (ext_tree) {
                    printf("Reading tree from '%s' ..\n", ext_tree);
                    {
                        char *warnings = NULp;
                        tree = TREE_load(ext_tree, new SimpleRoot, NULp, false, &warnings);
                        if (tree) {
                            if (warnings) {
                                printf("Warning from TREE_load: %s\n", warnings);
                                free(warnings);
                            }
                            opt_tree = "tree_zzzz_comp_opt_tree_zzzz";
                            error = GB_begin_transaction(gb_main);
                            if (tree->is_leaf()) error = "Cannot load tree (need at least 2 leafs)";
                            if (!error) error = GBT_write_tree(gb_main, opt_tree, tree);
                            error = GB_end_transaction(gb_main, error);
                        }
                        else {
                            error = GBS_global_string("Failed to load tree: %s", GB_await_error());
                        }
                    }
                }

                if (!error && opt_tree) {
                    char *ali_name = GBT_get_default_alignment(gb_main);
                    if (!ali_name) {
                        error = GB_await_error();
                    }
                    else {
                        if (!strlen(opt_tree)) opt_tree = NULp;

                        printf("Optimizing database...\n");
                        error = GBT_compress_sequence_tree2(gb_main, opt_tree, ali_name);
                        free(ali_name);
                    }
                    if (error) error = GBS_global_string("Failed to optimize database: %s", error);
                }

                if (!error && ext_tree) {
                    printf("Deleting compression tree from database...\n");
                    error = GB_begin_transaction(gb_main);
                    GBDATA *gb_comp_tree = GBT_find_tree(gb_main, opt_tree);
                    if (!error) error = GB_delete(gb_comp_tree);
                    if (error) error = GBS_global_string("Failed to delete optimization tree: %s", error);
                    error = GB_end_transaction(gb_main, error);
                }

                if (!error) {
                    GB_set_next_main_idx(nidx);
                    printf("Saving database...\n");
                    error = GB_save(gb_main, out, wtype);
                }
                GB_close(gb_main);
            }
        }
    }

    if (error) {
        fprintf(stderr, "arb_2_bin: Error: %s\n", error);
        return EXIT_FAILURE;
    }
    return EXIT_SUCCESS;
}
