# dbs/lucegene/blasttab.properties # d.gilbert, jan 05 # NCBI BLAST output table -m 8,9 # BLASTX 2.2.10 [Oct-19-2004] # Query: contig_0 # Database: dmel-blast/dmel-translation # Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, # q. start, q. end, s. start, s. end, e-value, bit score LIB_NAME=blasttab title = BLAST output table # dmel-dvir-tblastn.xml DATA_ROOT=web/data/blast/ INDEX_PATH=indices/lucene/blasttab/ MIME_TYPE=text/tsv # text/table ? tsv ? ## --------- search/report keys ------------------- searchfield=all outfields=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore storefields=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore docurl=lookup.jsp?id= batch.forward=lookup.jsp linkto=\ QueryID>seqs-docid\n ## for xslt to produce the result page tables # resultxsl = conf/blast_result.xsl # resultspage = resultxsl.jsp header.native=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore footer.native= ## difference in header. footer. is for native file data versus lucegene index fields header.xml=\n footer.xml=\n # ---- indexing values ----------------------- # locate data with regex file, folder patterns regex_folder= regex_file=^.*blast.*\.tsv$ regex_skipfile= regex_skipfolder=.* # (tmp|.*\.old) INDEX_CLASS=org.eugenes.index.LuceneTableIndexer fieldnames_lastcomment=false fieldnames_firstline=false #fieldnames=chr start feature gene map range id db_xref notes fieldnames=QueryID SubjectID PctIdent AlignLen Mismatch Gaps Qstart Qend Sstart Send Evalue Bitscore regex_comment=^\\s*[!#] ## table field separator regex ## any whitespace # regex_keyval=\\s*(\\S+) ## tabs only regex_keyval=\\t*([^\\t]+) ## commas # regex_keyval=,*([^,]+) ## commas, optional quotes # regex_keyval=,*([^,]+)|,*"([^"]+)" ## append existing index or create new INDEX_APPEND=false ## index names as values (as well as field names)? INDEX_TAGS=false ## index values ? INDEX_ATTRIBUTES=false ## use fieldname xpath; full top.middle.last field name? INDEX_XPATH=false ## INDEX_LEVEL=0 means index main records one level below xml tag INDEX_LEVEL=0 INDEX_BLANKS=false ## IndexWriter opts ## merge=10 is default; 4 == less mem usage ; 2 minimum merge_factor=6 ## max_field_length is max # terms/field max_field_length=1000000 MAX_FIELDS=50000 # to create "contents" field of all text indexall=false ## field indexing parameters ## sumfields list needs to match field.xxx common summary fields sumfields=docid # special summary fields -- replace w/ fieldalias.TAG=newtag # field.docid=ID # field.docclass=CLA # field.title=RETE # field.summary=GeneSummary.SUMMARY.text ## default - Text or UnStored = index but dont store text fieldtype=Text field.docid=QueryID # docid = SubjectID or QueryID ?? # Summary.text # fieldtype.text=UnStored ## these must always be stored; Text or UnIndexed ## Keyword is problem as Search wants to lc() all 1st, Keyword is casefull fieldtype.docid=Text fieldtype.docclass=Text fieldtype.url=UnIndexed fieldtype.modified=Keyword fieldtype.title=UnIndexed fieldtype.summary=UnIndexed analyzer=org.eugenes.index.BiodataAnalyzer2 # all field defaults tokenizer=org.eugenes.index.BiodataFilters$LowerDataTokenizer tokenfilter=fbacode$DebugFilter tokenfilter.EOR=fbacode$DebugEndOfRecordFilter fieldrecoder=fbacode$FBID_Recoder # query,subject are cv/symbol terms; rest are numbers (some floats) # this wont accept floats .. e.g. eval tokenfilter.Bitscore=fbacode$NumberFilter,fbacode$DebugFilter tokenfilter.Send=fbacode$NumberFilter tokenfilter.Sstart=fbacode$NumberFilter,fbacode$DebugFilter tokenfilter.Qend=fbacode$NumberFilter tokenfilter.Qstart=fbacode$NumberFilter,fbacode$DebugFilter tokenfilter.Gaps=fbacode$NumberFilter tokenfilter.Mismatch=fbacode$NumberFilter tokenfilter.AlignLen=fbacode$NumberFilter,fbacode$DebugFilter tokenfilter.PctIdent=fbacode$NumberFilter,fbacode$DebugFilter #----- fix blast.xml output ---- # perlfix= \ # #!/usr/bin/perl # # slimblastxml.pl -- cut verbosity down for NCBI BLAST -m 7 xml output # print "\n\n"; # while(<>){ # next if (m,^<\?xml, || m,^<\!DOCTYPE,); # if (m,<(BlastOutput_reference|BlastOutput_param|Iteration_stat),) { $skipto= $1; } # if ($skipto) { # $skipto='' if (m,,); # next; # } # print; # } # print "\n"; # trailer