# dbs/lucegene/blasttab.properties
# d.gilbert, jan 05
# NCBI BLAST output table -m 8,9
# BLASTX 2.2.10 [Oct-19-2004]
# Query: contig_0
# Database: dmel-blast/dmel-translation
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings,
# q. start, q. end, s. start, s. end, e-value, bit score
LIB_NAME=blasttab
title = BLAST output table
# dmel-dvir-tblastn.xml
DATA_ROOT=web/data/blast/
INDEX_PATH=indices/lucene/blasttab/
MIME_TYPE=text/tsv
# text/table ? tsv ?
## --------- search/report keys -------------------
searchfield=all
outfields=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore
storefields=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore
docurl=lookup.jsp?id=
batch.forward=lookup.jsp
linkto=\
QueryID>seqs-docid\n
## for xslt to produce the result page tables
# resultxsl = conf/blast_result.xsl
# resultspage = resultxsl.jsp
header.native=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore
footer.native=
## difference in header. footer. is for native file data versus lucegene index fields
header.xml=\n
footer.xml=\n
# ---- indexing values -----------------------
# locate data with regex file, folder patterns
regex_folder=
regex_file=^.*blast.*\.tsv$
regex_skipfile=
regex_skipfolder=.*
# (tmp|.*\.old)
INDEX_CLASS=org.eugenes.index.LuceneTableIndexer
fieldnames_lastcomment=false
fieldnames_firstline=false
#fieldnames=chr start feature gene map range id db_xref notes
fieldnames=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore
regex_comment=^\\s*[!#]
## table field separator regex
## any whitespace
# regex_keyval=\\s*(\\S+)
## tabs only
regex_keyval=\\t*([^\\t]+)
## commas
# regex_keyval=,*([^,]+)
## commas, optional quotes
# regex_keyval=,*([^,]+)|,*"([^"]+)"
## append existing index or create new
INDEX_APPEND=false
## index names as values (as well as field names)?
INDEX_TAGS=false
## index values ?
INDEX_ATTRIBUTES=false
## use fieldname xpath; full top.middle.last field name?
INDEX_XPATH=false
## INDEX_LEVEL=0 means index main records one level below xml tag
INDEX_LEVEL=0
INDEX_BLANKS=false
## IndexWriter opts
## merge=10 is default; 4 == less mem usage ; 2 minimum
merge_factor=6
## max_field_length is max # terms/field
max_field_length=1000000
MAX_FIELDS=50000
# to create "contents" field of all text
indexall=false
## field indexing parameters
## sumfields list needs to match field.xxx common summary fields
sumfields=docid
# special summary fields -- replace w/ fieldalias.TAG=newtag
# field.docid=ID
# field.docclass=CLA
# field.title=RETE
# field.summary=GeneSummary.SUMMARY.text
## default - Text or UnStored = index but dont store text
fieldtype=Text
field.docid=QueryID
# docid = SubjectID or QueryID ??
# Summary.text
# fieldtype.text=UnStored
## these must always be stored; Text or UnIndexed
## Keyword is problem as Search wants to lc() all 1st, Keyword is casefull
fieldtype.docid=Text
fieldtype.docclass=Text
fieldtype.url=UnIndexed
fieldtype.modified=Keyword
fieldtype.title=UnIndexed
fieldtype.summary=UnIndexed
analyzer=org.eugenes.index.BiodataAnalyzer2
# all field defaults
tokenizer=org.eugenes.index.BiodataFilters$LowerDataTokenizer
tokenfilter=fbacode$DebugFilter
tokenfilter.EOR=fbacode$DebugEndOfRecordFilter
fieldrecoder=fbacode$FBID_Recoder
# query,subject are cv/symbol terms; rest are numbers (some floats)
# this wont accept floats .. e.g. eval
tokenfilter.Bitscore=fbacode$NumberFilter,fbacode$DebugFilter
tokenfilter.Send=fbacode$NumberFilter
tokenfilter.Sstart=fbacode$NumberFilter,fbacode$DebugFilter
tokenfilter.Qend=fbacode$NumberFilter
tokenfilter.Qstart=fbacode$NumberFilter,fbacode$DebugFilter
tokenfilter.Gaps=fbacode$NumberFilter
tokenfilter.Mismatch=fbacode$NumberFilter
tokenfilter.AlignLen=fbacode$NumberFilter,fbacode$DebugFilter
tokenfilter.PctIdent=fbacode$NumberFilter,fbacode$DebugFilter
#----- fix blast.xml output ----
# perlfix= \
# #!/usr/bin/perl
# # slimblastxml.pl -- cut verbosity down for NCBI BLAST -m 7 xml output
# print "\n\n";
# while(<>){
# next if (m,^<\?xml, || m,^<\!DOCTYPE,);
# if (m,<(BlastOutput_reference|BlastOutput_param|Iteration_stat),) { $skipto= $1; }
# if ($skipto) {
# $skipto='' if (m,$skipto>,);
# next;
# }
# print;
# }
# print "\n"; # trailer