SectionList; // -------------------- // class Link class Link { string target; size_t source_lineno; public: Link(const string& target_, size_t source_lineno_) : target(target_), source_lineno(source_lineno_) {} const string& Target() const { return target; } size_t SourceLineno() const { return source_lineno; } }; typedef list Links; // ------------------------ // class Helpfile class Helpfile { Links uplinks; Links references; Links auto_references; Section title; SectionList sections; string inputfile; void check_self_ref(const string& link) { size_t slash = inputfile.find('/'); if (slash != string::npos) { if (inputfile.substr(slash+1) == link) { throw string("Invalid link to self"); } } } public: Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {} virtual ~Helpfile() {} void readHelp(istream& in, const string& filename); void writeXML(FILE *out, const string& page_name); void extractInternalLinks(); const Section& get_title() const { return title; } }; inline bool isSpace(char c) { return c == ' '; } inline bool isWhitespace(char c) { return isSpace(c) || c == '\n'; } inline bool isComment(const char *s) { return s[0] == '#'; } inline bool isEmptyOrComment(const char *s) { if (isComment(s)) return true; for (int off = 0; ; ++off) { if (s[off] == 0) return true; if (!isSpace(s[off])) break; } return false; } static void checkControlComment(const char *line) { const int DEFAULT_WIDTH = 91; // has to match value in to_help.xsl@preformatted-default-width if (isComment(line)) { // Note: currently all valid control comments start with the same keyword // Please document control comments in help.readme@Control const char *KEYWORD = "PREFORMATTED "; const char *found = strstr(line, KEYWORD); if (found) { const char *rest = found + strlen(KEYWORD); if (strcmp(rest, "RESET") == 0) { Ostring::set_current_preformatted_width(0); } #define WLEN 6 else if (strncmp(rest, "WIDTH ", WLEN) == 0) { const char *rest2 = rest + WLEN; int width = atoi(rest2); if (width<1) { if (strncmp(rest2, "DEFAULT", 7) == 0) { width = DEFAULT_WIDTH; } else { throw strf("invalid width %i in control comment '%s'", width, line); } } Ostring::set_current_preformatted_width(width); } #undef WLEN else if (strcmp(rest, "1") == 0) { // @@@ allow higher numbers for more lines? Ostring::set_current_preformatted_width(DEFAULT_WIDTH); Ostring::set_preformatted_blocks_wanted(1); } else { throw strf("invalid control comment '%s' (while parsing at '%s')", line, rest); } } } } inline const char *extractKeyword(const char *line, string& keyword) { // returns NULp if no keyword was found // otherwise returns position behind keyword and sets value of 'keyword' const char *space = strchr(line, ' '); if (space && space>line) { keyword = string(line, 0, space-line); return space; } else if (!space) { // test for keyword w/o content behind if (line[0]) { // not empty keyword = line; return strchr(line, 0); } } return NULp; } inline const char *eatSpace(const char *line) { // skip over spaces at start of 'line' while (isSpace(*line)) ++line; return line; } inline const char *eatWhitespace(const char *paragraph) { // skip over spaces and empty lines at start of 'paragraph' while (isWhitespace(*paragraph)) ++paragraph; return paragraph; } inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) { if (paragraph.length()) { if (type == ENUMERATED) { sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num)); } else { sec.Content().push_back(Ostring(paragraph, lineNo, type)); } type = PLAIN_TEXT; etype = NONE; paragraph = ""; } } inline const char *firstChar(const char *s) { while (isSpace(s[0])) ++s; return s; } inline bool is_startof_itemlist_element(const char *contentStart) { return (contentStart[0] == '-' || contentStart[0] == '*') && isspace(contentStart[1]) && !(isspace(contentStart[2]) || contentStart[2] == '-'); } #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums static EnumerationType startsWithLetter(string& s, unsigned& number) { // tests if first line starts with 'letter.' // if true then 'letter.' is removed from the string // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..) size_t off = s.find_first_not_of(" \n"); if (off == string::npos) return NONE; if (!isalpha(s[off])) return NONE; size_t astart = off; EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER; number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1; ++off; h2x_assert(number>0 && numberMAX_ALLOWED_ENUM) return false; if (s[off] != '.' && s[off] != ')') return false; if (s[off+1] != ' ') return false; // remove 'number.' from string : ++off; while (s[off+1] == ' ') ++off; s.erase(num_start, off-num_start+1); return true; } static EnumerationType detectLineEnumType(string& line, unsigned& number) { if (startsWithNumber(line, number)) return DIGITS; return startsWithLetter(line, number); } static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) { string paragraph = line; size_t para_start_lineno = reader.getLineNo(); if (sec.line_number() == NO_LINENUMBER_INFO) { // linenumber is not known yet // assume section just started (this happens with TITLE) sec.set_line_number(para_start_lineno); } ParagraphType type = PLAIN_TEXT; EnumerationType etype = NONE; unsigned num = 0; unsigned last_alpha_num = -1; h2x_assert(sec.Content().empty()); while (1) { line = reader.getNext(); if (!line) break; if (isEmptyOrComment(line)) { pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); checkControlComment(line); check_TODO(line, reader); } else { string keyword; const char *rest = extractKeyword(line, keyword); if (rest) { // a new keyword reader.back(); break; } check_TODO(line, reader); string Line = line; if (sec.get_type() == SEC_OCCURRENCE) { pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); } else { const char *firstNonWhite = firstChar(line); if (is_startof_itemlist_element(firstNonWhite)) { h2x_assert(firstNonWhite != line); pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); Line[firstNonWhite-line] = ' '; type = ITEM; // is reset in call to pushParagraph } else { unsigned foundNum; EnumerationType foundEtype = detectLineEnumType(Line, foundNum); if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) { if (foundNum == (last_alpha_num+1) || foundNum == 1) { last_alpha_num = foundNum; } else { #if defined(WARN_IGNORED_ALPHA_ENUMS) add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum")); #endif foundEtype = NONE; reader.back(); Line = reader.getNext(); last_alpha_num = -1; } } if (foundEtype != NONE) { pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); type = ENUMERATED; num = foundNum; etype = foundEtype; if (!num) { h2x_assert(etype == DIGITS); throw "Enumerations starting with zero are not supported"; } } } } if (paragraph.length()) { paragraph = paragraph+"\n"+Line; } else { paragraph = string("\n")+Line; para_start_lineno = reader.getLineNo(); } } } pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); if (sec.Content().size()>0 && indentation>0) { string spaces; spaces.reserve(indentation); spaces.append(indentation, ' '); string& ostr = sec.Content().front(); ostr = string("\n") + spaces + ostr; } } inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) { for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) { if (ex->Target() == link) { if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno()); throw strf("Link to '%s' duplicated here.", link.c_str()); } } } inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) { check_specific_duplicates(link, uplinks, add_warnings); check_specific_duplicates(link, references, add_warnings); } static void warnAboutDuplicate(SectionList& sections) { set seen; SectionList::iterator end = sections.end(); for (SectionList::iterator s = sections.begin(); s != end; ++s) { const string& sname = s->getName(); if (sname == "NOTES") continue; // do not warn about multiple NOTES sections SectionList::iterator o = s; ++o; for (; o != end; ++o) { if (sname == o->getName()) { o->attach_warning("duplicated SECTION name"); if (seen.find(sname) == seen.end()) { s->attach_warning("name was first used"); seen.insert(sname); } } } } } void Helpfile::readHelp(istream& in, const string& filename) { if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str())); Reader read(in); inputfile = filename; // remember file read (for comment) const char *line; const char *name_only = strrchr(filename.c_str(), '/'); h2x_assert(name_only); ++name_only; try { while (1) { line = read.getNext(); if (!line) break; if (isEmptyOrComment(line)) { checkControlComment(line); check_TODO(line, read); continue; } check_TODO(line, read); string keyword; const char *rest = extractKeyword(line, keyword); if (rest) { // found a keyword if (keyword == "UP") { rest = eatSpace(rest); if (strlen(rest)) { check_duplicates(rest, uplinks, references, true); if (strcmp(name_only, rest) == 0) throw "UP link to self"; uplinks.push_back(Link(rest, read.getLineNo())); } } else if (keyword == "SUB") { rest = eatSpace(rest); if (strlen(rest)) { check_duplicates(rest, uplinks, references, true); if (strcmp(name_only, rest) == 0) throw "SUB link to self"; references.push_back(Link(rest, read.getLineNo())); } } else if (keyword == "TITLE") { rest = eatSpace(rest); parseSection(title, rest, 0, read); if (title.Content().empty()) throw "empty TITLE not allowed"; const string& t = title.Content().front(); if (t.find("Standard help file form") != string::npos) { throw strf("Illegal title for help file: '%s'", t.c_str()); } const size_t len = t.length(); if (len>MAX_TITLE_CHARS) { // ignore non-alphanumeric characters at end of string: size_t last_alnum_pos = len-1; while (!isalnum(t[last_alnum_pos])) { --last_alnum_pos; } ++last_alnum_pos; arb_assert(last_alnum_pos<=len); const size_t ignored = len-last_alnum_pos; if ((len-ignored)>MAX_TITLE_CHARS) { title.attach_warning(strf("TITLE too verbose (max. %i chars allowed; found %zu%s)", MAX_TITLE_CHARS, len, ignored ? strf("; acceptable trailing chars: %zu", ignored).c_str() : "" )); } } } else { if (keyword == "NOTE") keyword = "NOTES"; if (keyword == "EXAMPLE") keyword = "EXAMPLES"; if (keyword == "WARNING") keyword = "WARNINGS"; SectionType stype = SEC_NONE; int idx; for (idx = 0; idx= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str()); if (stype == SEC_SECTION) { string section_name = eatSpace(rest); Section sec(section_name, stype, lineno); parseSection(sec, "", 0, read); sections.push_back(sec); } else { Section sec(keyword, stype, lineno); rest = eatSpace(rest); parseSection(sec, rest, rest-line, read); sections.push_back(sec); } } } else { throw strf("Unhandled line"); } } warnAboutDuplicate(sections); } catch (string& err) { throw read.attached_message(err); } catch (const char *err) { throw read.attached_message(err); } } static bool shouldReflow(const string& s, int& foundIndentation) { // foundIndentation is only valid if shouldReflow() returns true enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START; bool equal_indent = true; int lastIndent = -1; int thisIndent = 0; for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) { if (*c == '\n') { state = START; thisIndent = 0; } else if (isSpace(*c)) { if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces else if (state == CHAR) state = SPACE; // now seen 1 space } else { if (state == MULTIPLE) return false; // character after multiple spaces if (state == START) { if (lastIndent == -1) lastIndent = thisIndent; else if (lastIndent != thisIndent) equal_indent = false; } state = (*c == '.' || *c == ',') ? DOT : CHAR; } } if (lastIndent<0) { equal_indent = false; } if (equal_indent) { foundIndentation = lastIndent-1; h2x_assert(foundIndentation >= 0); } return equal_indent; } static string correctSpaces(const string& text, int change) { h2x_assert(text.find('\n') == string::npos); if (!change) return text; size_t first = text.find_first_not_of(' '); if (first == string::npos) return ""; // empty line if (change<0) { int remove = -change; h2x_assert(remove <= int(first)); return text.substr(remove); } h2x_assert(change>0); // add spaces return string(change, ' ')+text; } static string correctIndentation(const string& text, int change) { // removes 'remove' spaces from every line size_t this_lineend = text.find('\n'); string result; if (this_lineend == string::npos) { result = correctSpaces(text, change); } else { result = correctSpaces(text.substr(0, this_lineend), change); while (this_lineend != string::npos) { size_t next_lineend = text.find('\n', this_lineend+1); if (next_lineend == string::npos) { // last line result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change); } else { result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change); } this_lineend = next_lineend; } } return result; } inline size_t countSpaces(const string& text) { size_t first = text.find_first_not_of(' '); if (first == string::npos) return INT_MAX; // empty line return first; } static size_t scanMinIndentation(const string& text) { size_t this_lineend = text.find('\n'); size_t min_indent = INT_MAX; if (this_lineend == string::npos) { min_indent = countSpaces(text); } else { while (this_lineend != string::npos) { size_t next_lineend = text.find('\n', this_lineend+1); if (next_lineend == string::npos) { min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1))); } else { min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1))); } this_lineend = next_lineend; } } if (min_indent == INT_MAX) min_indent = 0; // only empty lines return min_indent; } // ----------------------------- // class ParagraphTree class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable { ParagraphTree *brother; // has same indentation as this ParagraphTree *son; // indentation + 1 Ostring otext; // text of the Section (containing linefeeds) bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text) int indentation; // the real indentation of the blank (behind removed enumeration) string location_description() const OVERRIDE { return "in paragraph starting here"; } size_t line_number() const OVERRIDE { return otext.get_lineno(); } ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) : son(NULp), otext(*begin), indentation(0) { h2x_assert(begin != end); string& text = otext; reflow = otext.get_preformatted_width()==0 && shouldReflow(text, indentation); if (!reflow) { size_t reststart = text.find('\n', 1); if (reststart == 0) { attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail"); } if (reststart != string::npos) { int rest_indent = -1; string rest = text.substr(reststart); bool rest_reflow = shouldReflow(rest, rest_indent); if (rest_reflow) { int first_indent = countSpaces(text.substr(1)); if (get_type() == PLAIN_TEXT) { size_t last = text.find_last_not_of(' ', reststart-1); bool is_header = last != string::npos && text[last] == ':'; if (!is_header && rest_indent == (first_indent+8)) { #if defined(DEBUG) size_t textstart = text.find_first_not_of(" \n"); h2x_assert(textstart != string::npos); #endif // DEBUG text = text.substr(0, reststart)+correctIndentation(rest, -8); reflow = shouldReflow(text, indentation); } } else { int diff = rest_indent-first_indent; if (diff>0) { text = text.substr(0, reststart)+correctIndentation(rest, -diff); reflow = shouldReflow(text, indentation); } else if (diff<0) { // paragraph with more indent on first line (occurs?) attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff)); } } } } } if (!reflow) { indentation = scanMinIndentation(text); } text = correctIndentation(text, -indentation); if (get_type() == ITEM) { h2x_assert(indentation >= 2); indentation -= 2; } brother = buildParagraphTree(++begin, end); } void brothers_to_sons(ParagraphTree *new_brother); unsigned get_preformatted_width() const { return otext.get_preformatted_width(); } public: virtual ~ParagraphTree() { delete brother; delete son; } ParagraphType get_type() const { return otext.get_type(); } bool is_itemlist_member() const { return get_type() == ITEM; } unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; } EnumerationType get_enum_type() const { return otext.get_enum_type(); } const char *readable_type() const { const char *res = NULp; switch (get_type()) { case PLAIN_TEXT: res = "PLAIN_TEXT"; break; case ITEM: res = "ITEM"; break; case ENUMERATED: res = "ENUMERATED"; break; } return res; } size_t countTextNodes() { size_t nodes = 1; // this if (son) nodes += son->countTextNodes(); if (brother) nodes += brother->countTextNodes(); return nodes; } #if defined(DUMP_PARAGRAPHS) void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; } char *masknl(const char *text) { char *result = ARB_strdup(text); for (int i = 0; result[i]; ++i) { if (result[i] == '\n') result[i] = '|'; } return result; } void dump(ostream& out, int indent = 0) { print_indent(out, indent+1); { char *mtext = masknl(otext.as_string().c_str()); out << "text='" << mtext << "'\n"; free(mtext); } print_indent(out, indent+1); out << "type='" << readable_type() << "' "; if (get_type() == ENUMERATED) { out << "enumeration='" << otext.get_number() << "' "; } out << "reflow='" << reflow << "' "; out << "indentation='" << indentation << "'\n"; if (son) { print_indent(out, indent+2); cout << "son:\n"; son->dump(out, indent+2); cout << "\n"; } if (brother) { print_indent(out, indent); cout << "brother:\n"; brother->dump(out, indent); } } #endif // DUMP_PARAGRAPHS private: static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) { if (begin == end) return NULp; return new ParagraphTree(begin, end); } public: static ParagraphTree* buildParagraphTree(const Section& sec) { const Ostrings& txt = sec.Content(); if (txt.empty()) throw "attempt to build an empty ParagraphTree"; return buildParagraphTree(txt.begin(), txt.end()); } bool contains(ParagraphTree *that) { return this == that || (son && son->contains(that)) || (brother && brother->contains(that)); } ParagraphTree *predecessor(ParagraphTree *before_this) { if (brother == before_this) return this; if (!brother) return NULp; return brother->predecessor(before_this); } void append(ParagraphTree *new_brother) { if (!brother) brother = new_brother; else brother->append(new_brother); } bool is_some_brother(const ParagraphTree *other) const { return (other == brother) || (brother && brother->is_some_brother(other)); } ParagraphTree* takeAllInFrontOf(ParagraphTree *after) { ParagraphTree *removed = this; ParagraphTree *after_pred = this; h2x_assert(is_some_brother(after)); while (1) { h2x_assert(after_pred); h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after' if (after_pred->brother == after) { // found after after_pred->brother = NULp; // unlink break; } after_pred = after_pred->brother; } return removed; } ParagraphTree *firstListMember() { switch (get_type()) { case PLAIN_TEXT: break; case ITEM: return this; case ENUMERATED: { if (get_enumeration() == 1) return this; break; } } if (brother) return brother->firstListMember(); return NULp; } ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) { if (indentation previous.get_enumeration()) return this; return NULp; } if (!brother) return NULp; return brother->nextListMemberAfter(previous); } ParagraphTree *nextListMember() const { return brother ? brother->nextListMemberAfter(*this) : NULp; } ParagraphTree* firstWithLessIndentThan(int wanted_indentation) { if (indentation < wanted_indentation) return this; if (!brother) return NULp; return brother->firstWithLessIndentThan(wanted_indentation); } void format_indentations(); void format_lists(); private: static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) { Ostrings S; S.push_back(Ostring(Text, beginLineNo, type)); return new ParagraphTree(S.begin(), S.end()); } ParagraphTree *xml_write_list_contents(); ParagraphTree *xml_write_enum_contents(); void xml_write_textblock(); public: void xml_write(); }; #if defined(DUMP_PARAGRAPHS) static void dump_paragraph(ParagraphTree *para) { // helper function for use in gdb para->dump(cout, 0); } #endif void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) { /*! folds down brothers to sons * @param new_brother brother of 'this->brother', will become new brother. * If new_brother == NULp -> make all brothers sons. */ if (new_brother) { h2x_assert(is_some_brother(new_brother)); if (brother != new_brother) { #if defined(DEBUG) if (son) { son->attach_warning("Found unexpected son (in brothers_to_sons)"); brother->attach_warning("while trying to transform paragraphs from here .."); new_brother->attach_warning(".. to here .."); attach_warning(".. into sons of this paragraph."); return; } #endif h2x_assert(!son); h2x_assert(brother); if (!new_brother) { // all brothers -> sons son = brother; brother = NULp; } else { son = brother->takeAllInFrontOf(new_brother); brother = new_brother; } } } else { h2x_assert(!son); son = brother; brother = NULp; } } void ParagraphTree::format_lists() { // reformats tree such that all items/enumerations are brothers ParagraphTree *member = firstListMember(); if (member) { for (ParagraphTree *curr = this; curr != member; curr = curr->brother) { h2x_assert(curr); if (curr->son) curr->son->format_lists(); } for (ParagraphTree *next = member->nextListMember(); next; member = next, next = member->nextListMember()) { member->brothers_to_sons(next); h2x_assert(member->brother == next); if (member->son) member->son->format_lists(); } h2x_assert(!member->son); // member is the last item if (member->brother) { ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1); member->brothers_to_sons(non_member); } if (member->son) member->son->format_lists(); if (member->brother) member->brother->format_lists(); } else { for (ParagraphTree *curr = this; curr; curr = curr->brother) { h2x_assert(curr); if (curr->son) curr->son->format_lists(); } } } void ParagraphTree::format_indentations() { if (brother) { ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1); #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION) if (same_indent && indentation != same_indent->indentation) { same_indent->attach_warning("indentation is assumed to be same as .."); attach_warning(".. here"); } #endif brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs if (brother) brother->format_indentations(); } if (son) son->format_indentations(); } // ----------------- // LinkType enum LinkType { LT_UNKNOWN = 0, LT_HTTP = 1, LT_HTTPS = 2, LT_FTP = 4, LT_FILE = 8, LT_EMAIL = 16, LT_HLP = 32, LT_PS = 64, LT_PDF = 128, LT_TICKET = 256, }; static const char *link_id[] = { "unknown", "www", // "http:" "www", // "https:" "www", // "ftp:" "www", // "file:" "email", "hlp", "ps", "pdf", "ticket", }; static string LinkType2id(LinkType type) { size_t idx = 0; while (type >= 1) { idx++; type = LinkType(type>>1); } arb_assert(idx not a ticketref size_t afterhash = hashpos+1; return text.substr(0, afterhash) + autolink_ticket_references(text.substr(afterhash)); } size_t hashlength = 2; while (isdigit(text[hashpos+hashlength])) ++hashlength; return text.substr(0, hashpos) + "LINK{" + text.substr(hashpos, hashlength) + "}" + autolink_ticket_references(text.substr(hashpos+hashlength)); } inline void print_XML_Text(const string& text, size_t lineNo) { string autolinkedText = autolink_ticket_references(text); print_XML_Text_expanding_links(autolinkedText, lineNo); } void ParagraphTree::xml_write_textblock() { XML_Tag textblock("T"); textblock.add_attribute("reflow", reflow ? "1" : "0"); if (!reflow) { unsigned width = get_preformatted_width(); if (width>0) { textblock.add_attribute("width", strf("%i", width)); } } { string usedText; const string& text = otext; if (reflow) { usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level); } else { usedText = text; } print_XML_Text(usedText, otext.get_lineno()); } } ParagraphTree *ParagraphTree::xml_write_list_contents() { h2x_assert(is_itemlist_member()); #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) if (!reflow) attach_warning("ITEM not reflown (check output)"); #endif { XML_Tag entry("ENTRY"); entry.add_attribute("item", "1"); xml_write_textblock(); if (son) son->xml_write(); } if (brother && brother->is_itemlist_member()) { return brother->xml_write_list_contents(); } return brother; } ParagraphTree *ParagraphTree::xml_write_enum_contents() { h2x_assert(get_enumeration()); #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) if (!reflow) attach_warning("ENUMERATED not reflown (check output)"); #endif { XML_Tag entry("ENTRY"); switch (get_enum_type()) { case DIGITS: entry.add_attribute("enumerated", strf("%i", get_enumeration())); break; case ALPHA_UPPER: entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration())); break; case ALPHA_LOWER: entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration())); break; default: h2x_assert(0); break; } xml_write_textblock(); if (son) son->xml_write(); } if (brother && brother->get_enumeration()) { int diff = brother->get_enumeration()-get_enumeration(); if (diff != 1) { attach_warning("Non-consecutive enumeration detected between here.."); brother->attach_warning(".. and here"); } return brother->xml_write_enum_contents(); } return brother; } void ParagraphTree::xml_write() { try { ParagraphTree *next = NULp; if (get_enumeration()) { XML_Tag enu("ENUM"); if (get_enumeration() != 1) { attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration())); } next = xml_write_enum_contents(); #if defined(WARN_LONESOME_ENUM_ELEMENTS) if (next == brother) attach_warning("Suspicious single-element-ENUM"); #endif } else if (is_itemlist_member()) { XML_Tag list("LIST"); next = xml_write_list_contents(); #if defined(WARN_LONESOME_LIST_ELEMENTS) if (next == brother) attach_warning("Suspicious single-element-LIST"); #endif } else { { XML_Tag para("P"); xml_write_textblock(); if (son) son->xml_write(); } next = brother; } if (next) next->xml_write(); } catch (string& err) { throw attached_message(err); } catch (const char *err) { throw attached_message(err); } } static void create_top_links(const Links& links, const char *tag) { for (Links::const_iterator s = links.begin(); s != links.end(); ++s) { XML_Tag link(tag); add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno()); } } inline string remove_LF_and_indentation(string paragraph) { // remove linefeeds + spaces behind linefeed (=indentation) size_t pos = 0; while (1) { size_t lf = paragraph.find('\n', pos); if (lf == string::npos) break; // all LFs handled // eliminate spaces before the LF: if (lf>0 && paragraph[lf-1] == ' ') { // LF is preceeded by space(s) size_t sp = lf-1; while (sp>=1 && paragraph[sp-1] == ' ') --sp; // position to 1st space arb_assert(sp replace LF by single space pos = lf+1; } else { size_t as = paragraph.find_first_not_of(' ', ns); // pos after consecutive space(s) size_t ls = as == string::npos ? ns : as-1; // last consecutive space paragraph.erase(lf, ls-lf); // keep one space (between concatenated line contents) } } // remove trailing spaces: size_t ls = paragraph.find_last_not_of(' '); if (ls == string::npos) { // only spaces paragraph.clear(); } else { ++ls; paragraph.erase(ls, paragraph.length()-ls); } return paragraph; } void Helpfile::writeXML(FILE *out, const string& page_name) { XML_Document xml("PAGE", "arb_help.dtd", out); xml.skip_empty_tags = true; xml.indentation_per_level = 2; xml.getRoot().add_attribute("name", page_name); #if defined(DEBUG) xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version #else xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version #endif // DEBUG xml.getRoot().add_attribute("source", inputfile.c_str()); { XML_Comment(string("automatically generated from ../")+inputfile+' '); } create_top_links(uplinks, "UP"); create_top_links(references, "SUB"); create_top_links(auto_references, "SUB"); try { string titleText, subtitleText; const Ostrings& T = title.Content(); Ostrings::const_iterator s = T.begin(); if (s != T.end()) titleText = *s++; bool subtitleAdded = false; // @@@ not needed! (use !subtitleText.empty()) for (; s != T.end(); ++s) { if (s->get_type() != PLAIN_TEXT) { throw s->attached_message("wrong paragraph type (plain text expected)"); } string text = s->as_string(); if (!text.empty()) { // ignore empty lines text = eatWhitespace(text.c_str()); if (!text.empty()) { if (subtitleAdded) throw s->attached_message("only one subtitle accepted"); text = remove_LF_and_indentation(text); if (text.length()>MAX_SUBTITLE_CHARS) { s->attach_warning(strf("subtitle too verbose (max. %i chars allowed; found %zu)", MAX_SUBTITLE_CHARS, text.length())); } subtitleText = text; subtitleAdded = true; // accept only one line } } } { XML_Tag title_tag("TITLE"); { XML_Text text(titleText); } } if (!subtitleText.empty()) { XML_Tag title_tag("SUBTITLE"); { XML_Text text(subtitleText); } } } catch (string& err) { throw title.attached_message(err); } catch (const char *err) { throw title.attached_message(err); } for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { try { XML_Tag section_tag("SECTION"); section_tag.add_attribute("name", sec->getName()); ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec); #if defined(DEBUG) size_t textnodes = ptree->countTextNodes(); #endif #if defined(DUMP_PARAGRAPHS) cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n"; ptree->dump(cout); cout << "----------------------------------------\n"; #endif ptree->format_lists(); #if defined(DUMP_PARAGRAPHS) cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n"; ptree->dump(cout); cout << "----------------------------------------\n"; #endif #if defined(DEBUG) size_t textnodes2 = ptree->countTextNodes(); h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error #endif ptree->format_indentations(); #if defined(DUMP_PARAGRAPHS) cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n"; ptree->dump(cout); cout << "----------------------------------------\n"; #endif #if defined(DEBUG) size_t textnodes3 = ptree->countTextNodes(); h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error #endif ptree->xml_write(); delete ptree; } catch (string& err) { throw sec->attached_message(err); } catch (const char *err) { throw sec->attached_message(err); } } } void Helpfile::extractInternalLinks() { for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { try { const Ostrings& s = sec->Content(); for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) { const string& line = *li; size_t start = 0; while (1) { size_t found = line.find("LINK{", start); if (found == string::npos) break; found += 5; size_t close = line.find('}', found); if (close == string::npos) break; string link_target = line.substr(found, close-found); if (link_target.find("http://") == string::npos && link_target.find("https://")== string::npos && link_target.find("ftp://") == string::npos && link_target.find("file://") == string::npos && link_target.find('@') == string::npos) { check_self_ref(link_target); try { check_specific_duplicates(link_target, references, false); // check only sublinks here check_specific_duplicates(link_target, uplinks, false); // check only uplinks here check_specific_duplicates(link_target, auto_references, false); // check only sublinks here // only auto-add inline reference if none of the above checks has thrown auto_references.push_back(Link(link_target, li->line_number())); } catch (string& err) { ; // silently ignore inlined } } start = close+1; } } } catch (string& err) { throw sec->attached_message("'"+err+"' while scanning LINK{}"); } } } static void show_err(const string& err, size_t lineno, const string& helpfile) { if (err.find(helpfile+':') != string::npos) { cerr << err; } else if (lineno == NO_LINENUMBER_INFO) { cerr << helpfile << ":1: [in unknown line] " << err; } else { cerr << helpfile << ":" << lineno << ": " << err; } cerr << '\n'; } inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) { show_err(line_err.Message(), line_err.Lineno(), helpfile); } inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) { show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile); } inline void show_warnings(const string& helpfile) { for (list::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) { show_warning(*wi, helpfile); } } static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) { show_err(error, helpfile); show_warnings(helpfile); } int ARB_main(int argc, char *argv[]) { if (argc != 3) { cerr << "Usage: arb_help2xml \n"; return EXIT_FAILURE; } Helpfile help; string arb_help; try { try { arb_help = argv[1]; string xml_output = argv[2]; { ifstream in(arb_help.c_str()); help.readHelp(in, arb_help); } help.extractInternalLinks(); { FILE *out = std::fopen(xml_output.c_str(), "wt"); if (!out) throw string("Can't open '")+xml_output+'\''; try { // arb_help contains 'source/name.hlp' size_t slash = arb_help.find('/'); size_t dot = arb_help.find_last_of('.'); if (slash == string::npos || dot == string::npos) { throw string("parameter has to be in format 'source/name.hlp' (not '"+arb_help+"')"); } string page_name(arb_help, slash+1, dot-slash-1); help.writeXML(out, page_name); fclose(out); } catch (...) { fclose(out); remove(xml_output.c_str()); throw; } } show_warnings(arb_help); return EXIT_SUCCESS; } catch (string& err) { throw unattached_message(err); } catch (const char * err) { throw unattached_message(err); } catch (LineAttachedMessage& err) { throw; } catch (...) { throw unattached_message("unknown exception in arb_help2xml"); } } catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); } catch (...) { h2x_assert(0); } return EXIT_FAILURE; } // -------------------------------------------------------------------------------- #ifdef UNIT_TESTS #include #include #include // Hint: you may set ONLY_DO_UNITTEST = 1 to speed up code/test-cycle // see ./Makefile@ONLY_DO_UNITTEST #define TEST_REMOVE_LF_AND_INDENTATION(i,want) TEST_EXPECT_EQUAL(remove_LF_and_indentation(i).c_str(), want) #define TEST_REMOVE_LF_AND_INDENTATION__BROKEN(i,want,got) TEST_EXPECT_EQUAL__BROKEN(remove_LF_and_indentation(i).c_str(), want, got) void TEST_remove_LF_and_indentation() { TEST_REMOVE_LF_AND_INDENTATION("", ""); TEST_REMOVE_LF_AND_INDENTATION(" \n \n \n ", ""); TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline", "hello Newline"); TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline\n 1\n2 \n 3 \n4\n5\n 6 \n 7 \n 8\n", "hello Newline 1 2 3 4 5 6 7 8"); TEST_REMOVE_LF_AND_INDENTATION("Visualization of Three-dimensional\n structure of small subunit (16S) rRNA", "Visualization of Three-dimensional structure of small subunit (16S) rRNA"); } static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) { using namespace arb_test; expectation_group expected; ifstream in(helpfile); LineAttachedMessage *error = NULp; Helpfile help; try { help.readHelp(in, helpfile); help.extractInternalLinks(); FILE *devnul = fopen("/dev/null", "wt"); if (!devnul) throw unattached_message("can't write to null device"); help.writeXML(devnul, "dummy"); fclose(devnul); } catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); } catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); } if (expected_error_part) { expected.add(that(error).does_differ_from_NULL()); if (error) expected.add(that(error->Message()).does_contain(expected_error_part)); } else { expected.add(that(error).is_equal_to_NULL()); if (!error) { Section title = help.get_title(); const Ostrings& title_strings = title.Content(); expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title)); expected.add(that(title_strings.size()).is_equal_to(1)); } else { show_error_and_warnings(*error, helpfile); } } delete error; return all().ofgroup(expected); } #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp)) #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError)) void TEST_hlp2xml_conversion() { TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE")); HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp HELP_FILE_COMPILES("source/markbyref.hlp", "Mark by reference"); // source/markbyref.hlp HELP_FILE_COMPILES("source/ad_align.hlp", "Alignment Administration"); // source/ad_align.hlp HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp // @@@ add test for helpfile with subtitle HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file } TEST_PUBLISH(TEST_hlp2xml_conversion); // #define TEST_AUTO_UPDATE // uncomment to update expected xml void TEST_hlp2xml_output() { string tested_helpfile[] = { "unittest" }; string HELP_SOURCE = "../../HELP_SOURCE/"; string LIB = "../../lib/"; string EXPECTED = "help/"; for (size_t i = 0; i