// ============================================================ //
//                                                              //
//   File      : FilteredExport.h                               //
//   Purpose   : encapsulate SAI-filtered fasta exporter        //
//                                                              //
//   Coded by Ralf Westram (coder@reallysoft.de) in June 2017   //
//   http://www.arb-home.de/                                    //
//                                                              //
// ============================================================ //

#ifndef FILTEREDEXPORT_H
#define FILTEREDEXPORT_H

#ifndef AP_FILTER_HXX
#include <AP_filter.hxx>
#endif
#ifndef _GLIBCXX_STRING
#include <string>
#endif
#ifndef _STDINT_H
#include <stdint.h>
#endif

typedef enum { BLOCK, PASS } FilterDefType;

class CharRangeTable {
    // easy translation of character ranges to table (e.g. "a-zA-Z").
    // '-' at start or end of argument string gets accepted as plain char!
    // Does only expand alphanumeric, forward ranges.

    bool table[256];
public:
    CharRangeTable(const char *chars) {
        memset(table, 0, 256);
        if (chars) {
            uint8_t prevchar = 0;
            for (int i = 0; chars[i]; ++i) {
                uint8_t c = chars[i];
                if (c == '-' && prevchar) {
                    uint8_t toChar = chars[i+1];
                    if (toChar) {
                        ++i;

                        bool expand = prevchar<=toChar; // only expand forward ranges
                        if (expand) {
                            for (unsigned b = prevchar; b<=toChar && expand; ++b) {
                                if (!isalnum(b)) expand = false; // non-alphanumeric character expanded (dangerous)
                            }
                        }

                        if (expand) {
                            for (unsigned b = prevchar; b<=toChar; ++b) {
                                table[b] = 1;
                            }
                        }
                        else { // do not expand -> insert litarally
                            table[prevchar] = 1;
                            table['-']      = 1;
                            table[toChar]   = 1;
                        }
                    }
                    else {
                        table[c] = 1; // '-' at end
                    }
                }
                else {
                    table[c] = 1;
                }
                prevchar = c;
            }
        }
    }

    bool isSet(uint8_t i) const { return table[i]; }
    const char *expandedRange() const {
        static char buf[256+1];
        int         b = 0;
        for (unsigned i = 0; i<256; ++i) {
            if (isSet(i)) {
                buf[b++] = char(i);
            }
        }
        buf[b] = 0;
        return buf;
    }
};

class FilterDefinition {
    FilterDefType type;

    std::string sai_name;
    std::string characters; // type == BLOCK -> blocking characters; type==PASS -> permeable characters

    bool inverse; // true -> do not use 'characters',  use rest of ASCII set

public:
    FilterDefinition(const char *sai_name_, FilterDefType type_, bool filter_chars, const char *characters_) :
        type(type_),
        sai_name(sai_name_),
        characters(characters_),
        inverse(!filter_chars)
    {}

    FilterDefType get_type() const { return type; }
    AP_filter *make_filter(GBDATA *gb_main, const char *aliName, size_t aliSize) const;
};


class FilteredExport : virtual Noncopyable {
    GBDATA *gb_main;
    char   *aliname;
    size_t  alisize;

    bool accept_missing_data;

    char *header_ACI;
    char *sequence_ACI;

    // min requirements for export (which chars to count + min. counts required)
    CharRangeTable count_table;
    int            minCount;


    AP_filter filter;
    bool      filter_added; // add_SAI_filter called yet?

    char *get_filtered_sequence(GBDATA *gb_species, const char*& reason) const;
    char *get_fasta_header(GBDATA *gb_species) const; // w/o leading '>'

#if defined(UNIT_TESTS)
    friend void TEST_FilteredExport(); // allow test inspection
#endif

    int count_bases(const char *seq) const;

public:
    FilteredExport(GBDATA *gb_main_, const char *aliname_, size_t alisize_);
    ~FilteredExport();

    // configuration:
    void do_accept_missing_data() { accept_missing_data = true; }
    void set_required_baseCount(const char *basesToCount, int minCount_) {
        minCount    = minCount_;
        count_table = CharRangeTable(basesToCount);
        arb_assert(implicated(minCount>0, basesToCount));
    }
    void reset_required_baseCount() { set_required_baseCount(NULp, 0); }
    void set_header_ACI(const char *aci) { freedup(header_ACI, aci); }
    void set_sequence_ACI(const char *aci) { freedup(sequence_ACI, aci); }
    GB_ERROR add_SAI_filter(const FilterDefinition& filterDef) __ATTR__USERESULT;
    void clear_SAI_filters() {
        filter       = AP_filter(alisize);
        filter_added = false;
    }

    // access:
    const char *get_aliname() const {
        return aliname;
    }

    // action:
    GB_ERROR write_fasta(FILE *out);
};


#else
#error FilteredExport.h included twice
#endif // FILTEREDEXPORT_H
