# dbs/lucegene/ugpxml.properties # d.gilbert, apr 2004 LIB_NAME=ugpxml title = FlyBase Unified Gene Page XML DATA_ROOT=web/data/unified-gene/ INDEX_PATH=indices/lucene/ugpxml/ MIME_TYPE=text/xml ## --------- search/report keys ------------------- searchfield=all outfields=Symbol,Full_name,Chromosome,Genome_map,Date,url storefields=docid,GeneID,Symbol,Full_name,Date,Chromosome,Genome_map,url linkto=\ GeneID>seqs-db_xref\n\ GeneID>bindxml-DBX\n\ linkto.seqs=GeneID-db_xref # reporthtmlxsl=conf/ugpxml2.xsl # xsl = ugpxml2.xsl header.native=\n\ \n\ \n footer.native=\n # header.xml-ugpxml=\n\ # \n # footer.xml-ugpxml=\n ## difference in header. footer. is for native file data versus lucegene index fields header.xml=\n footer.xml=\n batchformat = text/xml batchformats = text/xml, text/plain, text/csv, text/tsv title.text/xml=UGP XML nativeformat = text/xml # text/xml-ugpxml docurl=http://flybase.net/cgi-bin/fbidq.html? docurl.field=GeneID # batchurl=/cgi-bin/fbidq.html? # batchproc=cgi-bin/fbidq.html batch.forward=lookup.jsp batch.idfield=GeneID ## ? instead add as stylesheet to xml, with http://xxx/templates/*.xsl ? # xsl = GeneSummary2.xsl # reporthtmlxsl=conf/GeneSummary2.xsl # header.native=\n # footer.native=\n ## really want to shorten these xpath names in index; ## the index xpath needs something else; # title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name=Name # title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol=Symbol # title.GeneSummaries.GeneSummary.BASIC_INFORMATION.Date=Date # title.GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type=Gene_type # outfields=docid,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Date,\ # GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type,\ # url # storefields=docid,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name,\ # GeneSummaries.GeneSummary.BASIC_INFORMATION.Date,\ # GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type,\ # url # ---- indexing values ----------------------- # locate data with regex file, folder patterns regex_folder= #regex_file=^\w?.*\.xml$ #regex_file=^flybase-ugp\.xml$ regex_file=^.*ugp.*\.xml$ regex_skipfile= regex_skipfolder=.* # (tmp|.*\.old) INDEX_CLASS=org.eugenes.index.LuceneXmlIndexer ## append existing index or create new INDEX_APPEND=false ## index names as values (as well as field names)? INDEX_TAGS=true ## index values ? INDEX_ATTRIBUTES=true ## use fieldname xpath; full top.middle.last field name? INDEX_XPATH=true ## INDEX_LEVEL=0 means index main records one level below xml tag INDEX_LEVEL=0 INDEX_BLANKS=false ## IndexWriter opts ## merge=10 is default; 4 == less mem usage ; 2 minimum merge_factor=6 ## max_field_length is max # terms/field max_field_length=1000000 MAX_FIELDS=100000 # to create "contents" field of all text indexall=false ## field indexing parameters ## sumfields list needs to match field.xxx common summary fields sumfields=docid,docclass # special summary fields -- replace w/ fieldalias.TAG=newtag # field.docid=ID # field.docclass=CLA # field.title=RETE # field.summary=GeneSummary.SUMMARY.text fieldalias.GeneSummaries.GeneSummary.att_id=docid fieldalias.GeneSummaries.GeneSummary.Type=docclass ## default - Text or UnStored = index but dont store text fieldtype=UnStored ## looks like indexer mem blowup was on trying to index residues/alignment by mistake # fieldtype.residues=ignore fieldtype.att_id=Text fieldtype.GeneID=Text fieldtype.Symbol=Text fieldtype.Full_name=Text fieldtype.Species=Text fieldtype.Date=Text fieldtype.Title=Text fieldtype.Source=Text fieldtype.Type=Text fieldtype.Chromosome=Text fieldtype.Genome_map=Text fieldtype.Scaffold=Text # Summary.text # fieldtype.text=UnStored ## these must always be stored; Text or UnIndexed ## Keyword is problem as Search wants to lc() all 1st, Keyword is casefull fieldtype.docid=Text fieldtype.docclass=Text fieldtype.doclink=Text fieldtype.url=UnIndexed fieldtype.modified=Keyword fieldtype.title=UnIndexed fieldtype.summary=UnIndexed analyzer=org.eugenes.index.BiodataAnalyzer2 # all field defaults tokenizer=fbacode$LowerWordTokenizer tokenfilter=fbacode$DebugFilter tokenfilter.docid=fbacode$DebugEndOfRecordFilter tokenfilter.start=fbacode$NumberFilter tokenfilter.end=fbacode$NumberFilter #fieldrecoder.start=LucegeneIndexers$GameSpan_FieldRecoder #fieldrecoder.end=LucegeneIndexers$GameSpan_FieldRecoder ## FIXME - add Genome_map location recoder ? or change xml ? #fieldrecoder.name=LucegeneIndexers$AddCommonField_FieldRecoder fieldrecoder.att_id=LucegeneIndexers$AddCommonField_FieldRecoder # # while(){ # chomp; # $f=$t=$_; # s/GeneSummaries.GeneSummary.//; # s/GENE_ONTOLOGY./GO./; # s/goterm.//; # s/FUNCTION.Expressions.//; # s/FUNCTION.//; # s/LITERATURE.//; # s/LOCATION.//; # s/BASIC_INFORMATION.//; # s/GENE_PRODUCT.Transcripts.//; # s/GENE_PRODUCT.//; # s/Synonyms.Name/Synonyms/; # s/Reference.ref./Reference./; # s/att_//; # print "fieldalias.$f=$_\n"; # } # recode all those long xpaths fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Date=Date fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Full_name=Full_name fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.GeneID=GeneID fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species=Species fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species.att_id=Species.id fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Species.up=Species.up fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Symbol=Symbol fieldalias.GeneSummaries.GeneSummary.BASIC_INFORMATION.Synonyms.Name=Synonyms fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression=Expression fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression.Phenotype=Expression.Phenotype fieldalias.GeneSummaries.GeneSummary.FUNCTION.Expressions.Expression.att_type=Expression.type fieldalias.GeneSummaries.GeneSummary.FUNCTION.External_links.db_xref=External_links.db_xref fieldalias.GeneSummaries.GeneSummary.FUNCTION.Protein_domains.db_xref=Protein_domains.db_xref fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm=GO.Biological_process.goterm fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm.att_id=GO.Biological_process.id fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Biological_process.goterm.goevidence=GO.Biological_process.goevidence fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm=GO.Cellular_component.goterm fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.att_id=GO.Cellular_component.id fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.goevidence=GO.Cellular_component.goevidence fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Cellular_component.goterm.ref=GO.Cellular_component.ref fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm=GO.Molecular_function.goterm fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm.att_id=GO.Molecular_function.id fieldalias.GeneSummaries.GeneSummary.GENE_ONTOLOGY.Molecular_function.goterm.goevidence=GO.Molecular_function.goevidence fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Gene_type=Gene_type fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript=Transcript fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.Polypeptide=Transcript.Polypeptide fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.Polypeptide.att_id=Transcript.Polypeptide.id fieldalias.GeneSummaries.GeneSummary.GENE_PRODUCT.Transcripts.Transcript.att_id=Transcript.id fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Creator=Reference.Creator fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Date=Reference.Date fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Identifier=Reference.Identifier fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Source=Reference.Source fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Title=Reference.Title fieldalias.GeneSummaries.GeneSummary.LITERATURE.Reference.ref.Type=Reference.Type fieldalias.GeneSummaries.GeneSummary.LOCATION.Chromosome=Chromosome fieldalias.GeneSummaries.GeneSummary.LOCATION.Genome_map=Genome_map fieldalias.GeneSummaries.GeneSummary.LOCATION.Map=Map fieldalias.GeneSummaries.GeneSummary.LOCATION.Map.att_type=Map.type fieldalias.GeneSummaries.GeneSummary.LOCATION.Scaffold=Scaffold fieldalias.GeneSummaries.GeneSummary.SUMMARY.text=SUMMARY.text fieldalias.GeneSummaries.GeneSummary.Source=Source fieldalias.GeneSummaries.GeneSummary.Title=Title