# gnomap.properties # for dmel het gff feature data LIB_NAME=gff # leave these to command line args? DATA_ROOT=web/data/genomes/Drosophila_melanogaster/current/gff/ INDEX_PATH=indices/lucene/gff/ # locate data with regex file, folder patterns regex_folder=^\\w.+$ regex_file=^.*\\.gff$ regex_skipfile=.*\\.old regex_skipfolder=tmp|old.*|.*\\.old INDEX_CLASS=org.eugenes.index.LuceneTableIndexer analyzer=org.eugenes.index.BiodataAnalyzer fieldnames_lastcomment=false fieldnames_firstline=false fieldnames=chr source feature start stop score strand phase attributes ## attributes == group+others; feature == method in gff-speak; source == sub-feature regex_comment=^\\s*[!#] ## table field separator regex ## any whitespace # regex_keyval=\\s*(\\S+) ## tabs only regex_keyval=\\t*([^\\t]+) ## commas # regex_keyval=,*([^,]+) ## commas, optional quotes # regex_keyval=,*([^,]+)|,*"([^"]+)" MIME_TYPE=text/tsv ## append existing index or create new INDEX_APPEND=false ## index names as values (as well as field names)? INDEX_TAGS=false ## index values ? INDEX_ATTRIBUTES=false ## use fieldname xpath; full top.middle.last field name? INDEX_XPATH=false ## INDEX_LEVEL=0 means index main records one level below xml tag INDEX_LEVEL=2 INDEX_BLANKS=false ## search properties ## default search field searchfield=all format=native outfields=chr,feature,start,stop,strand,attributes title = GFF Feaures header.native=# Chr Feature Start Stop Strand Attributes footer.native= header.xml=\n footer.xml= xsl = gnomap.xsl # xsl = fban.xsl merge_factor=10 max_field_length=1000000 ## memory crash cure: MAX_FIELDS=10000 # to create "contents" field of all text indexall=false ## field indexing parameters # special summary fields -- replace w/ fieldalias.TAG=newtag sumfields=docid,docclass field.docid=id field.docclass=feature ## default - UnStored = index but dont store text fieldtype=Text # fieldtype.contents=UnStored # fieldtype.summary=UnIndexed fieldtype.ancestors=ignore ## these must always be stored; Text or UnIndexed fieldtype.docid=Text fieldtype.docclass=Text fieldtype.url=Text fieldtype.modified=Keyword analyzer=org.eugenes.index.BiodataAnalyzer ## field filters tokenfilter=org.eugenes.index.BiodataAnalyzer$DataFilter #tokenfilter.contents=org.eugenes.index.BiodataAnalyzer$LowerWordFilter tokenfilter.start=org.eugenes.index.BiodataAnalyzer$NumberFilter tokenfilter.stop=org.eugenes.index.BiodataAnalyzer$NumberFilter ## field tokenizers -- replace with filters tokenizer=org.eugenes.index.BiodataAnalyzer$LowerDataTokenizer #tokenizer.contents=org.eugenes.index.BiodataAnalyzer$LowerWordTokenizer #tokenizer.db_xref=org.eugenes.index.BiodataAnalyzer$LowerWordTokenizer ## need a fieldrecoder for attributes; group, ID, etc. fieldrecoder.attributes=LucegeneIndexers$GFFAttribute_FieldRecoder ## fieldrecoder.range=LucegeneIndexers$Location_FieldRecoder