#Please insert up references in the next lines (line starts with keyword UP) UP arb.hlp UP glossary.hlp #Please insert subtopic references (line starts with keyword SUB) #SUB subtopic.hlp # Hypertext links in helptext can be added like this: LINK{ref.hlp|http://add|bla@domain} #************* Title of helpfile !! and start of real helpfile ******** TITLE ARB: Database OCCURRENCE ARB_NT DESCRIPTION A central database of sequences and additional information (taken from public databases or supplied by the user) is stored in a binary or ASCII file (*.arb). ( and in future releases archive and delta files). The database reader auto-detects binary or ASCII mode. Brief advantages of the different file types: binary with fast load file: (+) very fast (+) runs on slow and old computers (-) needs a lot of harddisc space => for normal operation on old machines binary: (+) very fast (+) small (compression rate: 60%-95%) => for normal operation ASCII: (+) editable by standard text editors (+) information can be extracted by hand (-) needs an extreme amount of harddisc space => to check and correct a database All ARB tools for database handling and most of the ARB tools for data analysis act directly upon the database. The database is kept consistent at any time. Any local modifications by individual ARB tools are immediately exported to the database and all other active tools. NOTES ASCII format DATA FORMAT [xxx] means xxx is optional [xxx]* means xxx is optional and can occur many times xxx|yyy means xxx or yyy // means comment ARBDB HIERARCHY ARB DB is a hierarchical database system, so here's a short description of the hierarchy: ARBDB ::= species_data // container containing all species presets // global alignment and db field information [extended_data] // all SAIs [tmp] // temporary data [tree_data] // all trees ... // user defined entries (programmers) species_data::= [species]* extended_data::= [extended]* gene_data::= [gene]* // container for genes (species local) species::= 'name' // species identifier ['full_name'] ... // (end) user defined fields [ali_xxx] // the alignment container(s) [gene_data] // container containing genes extended::= // analogous to species gene::= // analogous to species ali_xxx::= 'data' // the sequence ... // additional sequence information presets::= 'use' // default alignment [alignment]* [key_data] // description of the user defined keys alignment::= 'alignment_name' // name of the alignment (prefix 'ali_') 'alignment_len' // length of longest sequence 'alignment_write_security' // default write security 'alignment_type' // dna or pro 'aligned' // ==1 when all sequences have the same // length else 0 key_data::= [key]* key::= 'key_name' // name of an user defined field 'key_type' // type (12=string 3=int) ******************************************* *************** ASCII BASIC ************** ******************************************* Note: - /* xxx */ is used for comments and not read - I use a grammar to describe the dataformat. All terminal symbols are surrounded by "'". ASCII::= ['/*ARBDB ASCII*/'] [FIELD]* FIELD::= KEY [PROTECTION] [TYPE] VALUE | KEY [PROTECTION] '%%' (% [FIELD]* %) /* Comment */ KEY::= 'Any string of a-z|A-Z|0-9|"_"' |KEY| > 2 < 256 PROTECTION::= ':''delete protection level''write p.l.''00' // 00 are reserved for future use TYPE::= '%s' // STRING '%i' // INTEGER '%f' // FLOAT '%N' // BYTES '%I' // BITS '%F' // FLOATS VALUE::= '"string"' | '"^Astring^A"' | 'string' //type = STRING | 'int_number' //type = INT | 'real_number' //type = FLOAT | 'coded bytestring' //type = BYTES,FLOATS, // BITS EXAMPLES None ******************************************* ************** ASCII EXAMPLE ************* ******************************************* /*ARBDB ASCII*/ species_data %% (% species :5000 %% (% name :7600 "EscCol10" file "ecrna3.empro" full_name "Escherichia coli" acc "V00331;" ali_23all :5000 %% (% data :7500 "...........ACGTUUU........... mark %I "---------------++++--------- %) /*ali_23all*/ species :5000 %% (% name :7600 "EscCol11" file "ecrr23s.empro" full_name "Escherichia coli" ali_23all :5000 %% (% data :7500 "...........ACGTUUUGGG....... mark %I "---------------++++--------- %) /*ali_23all*/ %) /*species*/ %) /*species_data*/ presets %% (% use "ali_23all" max_alignment_len %i 2000 alignment_len %i 0 max_name_len %i 9 alignment %% (% alignment_name "ali_23all" alignment_len %i 4205 aligned %i 1 alignment_write_security %i 5 alignment_type "rna" %) /*alignment*/ key_data %% (% key %% (% key_name "name" key_type %i 12 %) /*key*/ key %% (% key_name "group_name" key_type %i 12 %) /*key*/ key %% (% key_name "acc" key_type %i 12 %) /*key*/ key %% (% key_name "ali_23all/data" key_type %i 12 %) /*key*/ key %% (% key_name "ali_23all/mark" key_type %i 6 %) /*key*/ key %% (% key_name "aligned" key_type %i 12 %) /*key*/ key %% (% key_name "author" key_type %i 12 %) /*key*/ %) /*key_data*/ %) /*presets*/ tree_data %% (% tree_main :4400 %% (% nnodes %i 2 tree "N0.014808,0.015168;N0.000360,0.000360;LEscCol10^ALEscColi^ALEscCol11^A" ruler %% (% size %f 0.100000 RADIAL %% (% ruler_y %f 0.341577 ruler_x %f 0.000000 %) /*RADIAL*/ text_x %f 0.000000 text_y %f 0.000000 ruler_width %i 0 LIST %% (% ruler_y %f 0.000000 ruler_x %f 0.000000 %) /*LIST*/ %) /*ruler*/ %) /*tree_main*/ %) /*tree_data*/ extended_data :7000 %% (% extended %% (% name "HELIX_PAIRS" ali_23all %% (% data "............................1a.. %) /*ali_23all*/ %) /*extended*/ extended %% (% name "gpl5rr" ali_23all %% (% phyl_options %N 10000106D02:0C03.0D02-07.87.DB6 bits %I "-----------------------+++++++++-+-+++ floats %F 10000106D04:0A.C816.425C03.5D.802F.BF03 %) /*ali_23all*/ %) /*extended*/ %) /*extended_data*/ tmp %% (% focus %% (% species_name "EscColi" cursor_position %i 323 %) /*focus*/ message "" %) /*tmp*/ WARNINGS The ASCII version of arb needs a lot of virtual memory when loaded.