# dbs/lucegene/ugpxml.properties
# d.gilbert, apr 2004
LIB_NAME=ugpxml
title = FlyBase Unified Gene Page XML
DATA_ROOT=web/data/unified-gene/
INDEX_PATH=indices/lucene/ugpxml/
MIME_TYPE=text/xml
## --------- search/report keys -------------------
searchfield=all
outfields=Symbol,Full_name,Chromosome,Genome_map,Date,url
storefields=docid,GeneID,Symbol,Full_name,Date,Chromosome,Genome_map,url
linkto=\
GeneID>seqs-db_xref\n\
GeneID>bindxml-DBX\n\
linkto.seqs=GeneID-db_xref
# reporthtmlxsl=conf/ugpxml2.xsl
# xsl = ugpxml2.xsl
header.native=\n\
\n\
\n
footer.native=\n
# header.xml-ugpxml=\n\
# \n
# footer.xml-ugpxml=\n
## difference in header. footer. is for native file data versus lucegene index fields
header.xml=\n
footer.xml=\n
batchformat = text/xml
batchformats = text/xml, text/plain, text/csv, text/tsv
title.text/xml=UGP XML
nativeformat = text/xml
# text/xml-ugpxml
docurl=http://flybase.net/cgi-bin/fbidq.html?
docurl.field=GeneID
# batchurl=/cgi-bin/fbidq.html?
# batchproc=cgi-bin/fbidq.html
batch.forward=lookup.jsp
batch.idfield=GeneID
## ? instead add as stylesheet to xml, with http://xxx/templates/*.xsl ?
# xsl = GeneSummary2.xsl
# reporthtmlxsl=conf/GeneSummary2.xsl
# header.native=\n
# footer.native=\n
## really want to shorten these xpath names in index;
## the index xpath needs something else;
# title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name=Name
# title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol=Symbol
# title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Date=Date
# title.GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type=Gene_type
# outfields=docid,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Date,\
# GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type,\
# url
# storefields=docid,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name,\
# GeneSummaries.GeneSummary.BASIC_INFORMATION.Date,\
# GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type,\
# url
# ---- indexing values -----------------------
# locate data with regex file, folder patterns
regex_folder=
#regex_file=^\w?.*\.xml$
#regex_file=^flybase-ugp\.xml$
regex_file=^.*ugp.*\.xml$
regex_skipfile=
regex_skipfolder=.*
# (tmp|.*\.old)
INDEX_CLASS=org.eugenes.index.LuceneXmlIndexer
## append existing index or create new
INDEX_APPEND=false
## index names as values (as well as field names)?
INDEX_TAGS=true
## index values ?
INDEX_ATTRIBUTES=true
## use fieldname xpath; full top.middle.last field name?
INDEX_XPATH=true
## INDEX_LEVEL=0 means index main records one level below xml tag
INDEX_LEVEL=0
INDEX_BLANKS=false
## IndexWriter opts
## merge=10 is default; 4 == less mem usage ; 2 minimum
merge_factor=6
## max_field_length is max # terms/field
max_field_length=1000000
MAX_FIELDS=100000
# to create "contents" field of all text
indexall=false
## field indexing parameters
## sumfields list needs to match field.xxx common summary fields
sumfields=docid,docclass
# special summary fields -- replace w/ fieldalias.TAG=newtag
# field.docid=ID
# field.docclass=CLA
# field.title=RETE
# field.summary=GeneSummary.SUMMARY.text
fieldalias.GeneSummaries.GeneSummary.att_id=docid
fieldalias.GeneSummaries.GeneSummary.Type=docclass
## default - Text or UnStored = index but dont store text
fieldtype=UnStored
## looks like indexer mem blowup was on trying to index residues/alignment by mistake
# fieldtype.residues=ignore
fieldtype.att_id=Text
fieldtype.GeneID=Text
fieldtype.Symbol=Text
fieldtype.Full_name=Text
fieldtype.Species=Text
fieldtype.Date=Text
fieldtype.Title=Text
fieldtype.Source=Text
fieldtype.Type=Text
fieldtype.Chromosome=Text
fieldtype.Genome_map=Text
fieldtype.Scaffold=Text
# Summary.text
# fieldtype.text=UnStored
## these must always be stored; Text or UnIndexed
## Keyword is problem as Search wants to lc() all 1st, Keyword is casefull
fieldtype.docid=Text
fieldtype.docclass=Text
fieldtype.doclink=Text
fieldtype.url=UnIndexed
fieldtype.modified=Keyword
fieldtype.title=UnIndexed
fieldtype.summary=UnIndexed
analyzer=org.eugenes.index.BiodataAnalyzer2
# all field defaults
tokenizer=fbacode$LowerWordTokenizer
tokenfilter=fbacode$DebugFilter
tokenfilter.docid=fbacode$DebugEndOfRecordFilter
tokenfilter.start=fbacode$NumberFilter
tokenfilter.end=fbacode$NumberFilter
#fieldrecoder.start=LucegeneIndexers$GameSpan_FieldRecoder
#fieldrecoder.end=LucegeneIndexers$GameSpan_FieldRecoder
## FIXME - add Genome_map location recoder ? or change xml ?
#fieldrecoder.name=LucegeneIndexers$AddCommonField_FieldRecoder
fieldrecoder.att_id=LucegeneIndexers$AddCommonField_FieldRecoder
#
# while(){
# chomp;
# $f=$t=$_;
# s/GeneSummaries.GeneSummary.//;
# s/GENE_ONTOLOGY./GO./;
# s/goterm.//;
# s/FUNCTION.Expressions.//;
# s/FUNCTION.//;
# s/LITERATURE.//;
# s/LOCATION.//;
# s/BASIC_INFORMATION.//;
# s/GENE_PRODUCT.Transcripts.//;
# s/GENE_PRODUCT.//;
# s/Synonyms.Name/Synonyms/;
# s/Reference.ref./Reference./;
# s/att_//;
# print "fieldalias.$f=$_\n";
# }
# recode all those long xpaths
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Date=Date
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name=Full_name
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.GeneID=GeneID
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species=Species
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species.att_id=Species.id
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species.up=Species.up
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol=Symbol
fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Synonyms.Name=Synonyms
fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression=Expression
fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression.Phenotype=Expression.Phenotype
fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression.att_type=Expression.type
fieldalias.GeneSummaries.GeneSummary.FUNCTION.External_links.db_xref=External_links.db_xref
fieldalias.GeneSummaries.GeneSummary.FUNCTION.Protein_domains.db_xref=Protein_domains.db_xref
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm=GO.Biological_process.goterm
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm.att_id=GO.Biological_process.id
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm.goevidence=GO.Biological_process.goevidence
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm=GO.Cellular_component.goterm
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.att_id=GO.Cellular_component.id
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.goevidence=GO.Cellular_component.goevidence
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.ref=GO.Cellular_component.ref
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm=GO.Molecular_function.goterm
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm.att_id=GO.Molecular_function.id
fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm.goevidence=GO.Molecular_function.goevidence
fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type=Gene_type
fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript=Transcript
fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.Polypeptide=Transcript.Polypeptide
fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.Polypeptide.att_id=Transcript.Polypeptide.id
fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.att_id=Transcript.id
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Creator=Reference.Creator
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Date=Reference.Date
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Identifier=Reference.Identifier
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Source=Reference.Source
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Title=Reference.Title
fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Type=Reference.Type
fieldalias.GeneSummaries.GeneSummary.LOCATION.Chromosome=Chromosome
fieldalias.GeneSummaries.GeneSummary.LOCATION.Genome_map=Genome_map
fieldalias.GeneSummaries.GeneSummary.LOCATION.Map=Map
fieldalias.GeneSummaries.GeneSummary.LOCATION.Map.att_type=Map.type
fieldalias.GeneSummaries.GeneSummary.LOCATION.Scaffold=Scaffold
fieldalias.GeneSummaries.GeneSummary.SUMMARY.text=SUMMARY.text
fieldalias.GeneSummaries.GeneSummary.Source=Source
fieldalias.GeneSummaries.GeneSummary.Title=Title