// fbacode.java

/**
  * fbacode
  Data class and field specific parsers to handle
  flybase reference acode data.
  
  See also fbacode.properties which links these classes
  to general acode parser.

  .. some of these are not for Acode data; move out ?
  
  E.g., fbgnslim2.properties:
    field.docid=MAIN_ID
    fieldrecoder.ID=fbacode$FBMainID_Recoder  -- decide if ID  is for main record
    tokenizer.MAIN_ID=fbacode$IDTokens

    fieldrecoder.GSYM=fbacode$SYM_Recoder   -- parses species field; create lowercase field
    tokenizer.GSYM=fbacode$SymTokens
    tokenizer.GSYM.lc=fbacode$LowerSymTokens

    tokenizer.ENZ=fbacode$words      -- parse as words
    tokenizer.ENZ.cv=fbacode$cvterms -- parse as cv phrases
    fieldrecoder.ENZ=fbacode$GO_Recoder   -- creates .cv subfield w/ cv term string, GO/EC ids

  Equivalent to these flybase srs/icarus/db/fbgn.is, fbrules.is parsers
 
			## take FBgn's only
  pri_acc: ~  { $In:[fields c:{ID ID2}] $Out }
         # ( lrec?  key ( /FBgn[0-9]+/ {if:$tp=='ID' $Uniq:ID else $Uniq}
           ( lrec? key
             ( /FBgn[0-9]+/ { if:$tp=='ID' $Uniq:ID else $Uniq}
             | /PFgn([0-9]+)/ { $Uniq:[s:"FBgn$1"] if:$tp=='ID' $Uniq:ID }
             | word | nonword )+ nl )+
           ~
                       ## take FBgn's only
  ID: ~         { $In:[pri_acc c:ID] $Out }
         (/(FBgn|PFgn)[0-9]+/ { $Uniq $entryName=$Ct } | word | nonword)+
      ~ 

  SYN: ~	{ $In:[preSYMlike c:{GSYM SYN}] $Out} 
  		( /([0-9A-Za-z.]+)\\\\(.+)/ {$Uniq  $Uniq:[s:$2]}
  		| /.+/ {$Uniq}
  		)* ~		

  SYM: ~	{ $In:[preSYMlike c:GSYM] $Out } 
  		( /([0-9A-Za-z.]+)\\\\(.+)/ {$Uniq $Uniq:[s:$2] $species=$1} 
  		| /.+/ {$Uniq}
  		)* ~

    # Enzyme - CV field + GO field + EC num field  
		#  'serine-tRNA ligase ; GO:0004828 ; EC:6.1.1.11 .....' - new May20
    #  'ENZ|arylalkylamine N-acetyltransferase == EC 2.3.1.87' - old
  enzfuncRecoder: ~
		 x{ $cv='' }
		 ( word {$Uniq $StrApp:[$cv s:$Ct]} | / +/ {$StrApp:[$cv s:'_']} | /[^ ;\n]/ {$StrApp:[$cv s:$Ct]} )*
     x{ $Uniq:[s:$cv] $Uniq:[TERM s:$cv] 
				$StrApp:[$cv s:'_'] $Uniq:[s:$cv] $Uniq:[TERM s:$cv] } 
     ( /EC[ =:-]* / /[0-9-]+\\.[0-9-]+\\.[0-9\\.-]+/ { 
		 			$Uniq $Uniq:[ECNUM s:$Ct] $Uniq:[s:"EC:$Ct"] }  
		 | /GO[ =:-]* / /[0-9]+/ { 
		 			$Uniq $Uniq:[GO s:$Ct] $Uniq:[s:"GO:$Ct"] }  
     | / *;/ 
     | (/ *[|][^\n]+/) # skip comment about homology in new format 
		 | /[^\n]/
     )*
     ~

  $fbrules.ENZ -- enzfuncRecoder
	$fbrules.FNC -- enzfuncRecoder
	$fbrules.CEL -- enzfuncRecoder
  

  Usage, given location in dbs/lucegene/ that index script finds:
  
    bin/lucegene-index.sh  -debug -l fbrftest >& tmp/log.fbrft

Some SRS icarus fbgn.is rules
##	fbid: 		~ /[FP][BF][A-Za-z][A-Za-z][0-9]+/ ~
##	fbidpatt:  ~ /(FB|PF)[a-z][a-z][A-Za-z0-9-]+/  ~   ## this one includes FBst-BL0123 stocks
##  word: 		~ /[0-9A-Za-z_]+/ ~  
##  nonword: 	~ (/[^0-9A-Za-z_\n]+/ | /\n +/) ~  
##  symword: 	~ /[^ \/|\n]+/ ~ # new style - sure delimiters  
##	glocword: ~ /[0-9A-Za-z_.\[\]\-]+/ ~
##	clocword: ~ /[<>*?-]*[0-9h][0-9A-Za-z_<>?+*-] / ~


*/

import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.text.SimpleDateFormat;

import org.eugenes.index.LuceneBaseIndexer;
import org.eugenes.index.LuceneBaseIndexer.FieldRecoder;

import org.eugenes.index.biodata.DataFilter;
import org.eugenes.index.biodata.DataTokenizer;
import org.eugenes.index.BiodataFilters.NumberField;


import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DateField;


public class fbacode
{

  final static boolean nodups=true, withdups=false; // add >1 field value to doc?
  final static int kSubrecLevel = 1; // for fieldpath count
  
  static boolean debug    = LuceneBaseIndexer.debug;
  static PrintStream logp = LuceneBaseIndexer.logp;
  
 /**
   small field recoder to add new main ID field for acode with multiple nested
   ID fields - really need to have main acode indexer handle this.
  */
  public static class FBMainID_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)>kSubrecLevel) return kNoChange;  
      idx.addField("MAIN_ID", val.toString(), doc, nodups);  
      return kFieldAdded;
   }
  }

  /**
   Greek_Recoder converts any SGML &agr; or <up><down> to plain text
  */
  public static class Greek_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String s= val.toString();
      if (isgreek(s)) {
        val.replace(0, val.length(), greek2text(s));
        //if (debug && !"contents".equals(fieldName)) 
        //logp.println("# greek2text "+fieldName+":{"+s+"}={"+val+"}");
        return kValChanged;
        }
     return kNoChange;   
   }
  }

  /**
   replace FBMainID_Recoder for any field; adds "MAIN_"+fieldName -- use other syntax?
  */
  public static class FBMainField_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)>kSubrecLevel) return kNoChange; 
      idx.addField("MAIN_"+fieldName, val.toString(), doc, nodups);  
      return kFieldAdded;
   }
  }

  public static class FB_ALESR_Field_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      boolean skip= !(fieldPath.endsWith("ALESR")
           || fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)<=kSubrecLevel) ;
      //if (debug) logp.println("# ALESR "+fieldPath+"."+fieldName+" skip="+skip);
      if(skip) return kNoChange; 
      idx.addField("MAIN_"+fieldName, val.toString(), doc, nodups);  
      return kFieldAdded;
   }
  }


  /**
   add default gene class value if not already added
   EOR is a special case field sent at end of each record; dont' realy want to index it
  */
//  public static class FBGN_EOR_Recoder implements FieldRecoder { 
//    static String kDefaultClass="gene";
//    static String kDefaultSpecies="Dmel";
//
//    public int recodeField(LuceneBaseIndexer idx, Document doc, 
//      String fieldName, String fieldPath, StringBuffer val)
//    {
//      // should be able to use one class here and get FBGN/FBAN from fieldPath: GENR.* = FBgn; 
//      //if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)>1) return kNoChange;  
//      boolean added= idx.addField("MAIN_CLA", kDefaultClass, doc, nodups); 
//      added = added || idx.addField("species", kDefaultSpecies, doc, nodups);  
//      if (debug) logp.println("# EOR "+fieldPath);
//      return kSkipField + kFieldAdded; // dont index or leave this to props ?
//   }
//  }
  
  //? need FB_EOSUBR_Recoder for FBgn/FBal ?
  
  public static class FB_EOR_Recoder implements FieldRecoder { 
    static String kFBanClass="annotation"; // gene? now use annotation
    static String kFBgnClass="gene";  
    static String kFBalClass="allele";  
    static String kFBrfClass="paper";  
    static String kDefaultSpecies="Dmel";

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      //if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)>1) return kNoChange;  
      String mclass="";
           if (fieldPath.indexOf("ALESR")>=0) mclass=kFBalClass;
      else if (fieldPath.startsWith("GENR")) mclass=kFBgnClass;
      else if (fieldPath.startsWith("GADR")) mclass=kFBanClass;
      else if (fieldPath.startsWith("REFR")) mclass=kFBrfClass;
      
      boolean added= idx.addField("MAIN_CLA", mclass, doc, nodups);
      if ( mclass == kFBrfClass ) 
        ;
      else { 
        boolean addsp= idx.addField("species", kDefaultSpecies, doc, nodups); //? missing this  
        added = added || addsp;
        }
        
      if (debug) logp.println("# EOR "+fieldPath);
      return kSkipField + kFieldAdded;
   }
  }

    // should rename this: FBidDBX_Recoder
  public static class Contents_Recoder implements FieldRecoder { 
    // look for things in all fields text/word stream 
    static Pattern fbid  = Pattern.compile("\\b((FB|PF)[a-zA-Z]+\\d+)");
    //static Pattern ecnum = Pattern.compile("EC:[0-9-]+\\.[0-9-]+\\.[0-9\\.-]+",Pattern.CASE_INSENSITIVE);

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      Matcher ma; int n=0;
      String sval= val.toString();
      ma= fbid.matcher(sval); 
      while(ma.find()) { idx.addField("DBX", ma.group(1), doc, withdups); n++; }
      //return kSkipField + kFieldAdded;
      return (n>0 ? kFieldAdded : kNoChange);
   }
  }
  
  public static class FBID_Recoder implements FieldRecoder { 
    // look for things in all fields text/word stream 
    //static Pattern fbid  = Pattern.compile("\\b((FB|PF)[a-zA-Z]+\\d+)");
    //static Pattern fbid  = Pattern.compile("\\b([FP][BF][A-Za-z]+\\d+)");
    static Pattern fbid  = Pattern.compile("\\b(FB|PF)[A-Za-z]{2,5}[0-9]+");

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      int n=0;
      String sval= val.toString();
      Matcher ma= fbid.matcher(sval); 
      while( ma.find() ) { idx.addField("DBX", ma.group(), doc, withdups); n++; }
      return (n>0 ? kFieldAdded : kNoChange);
   }
  }

  /** 
    BINDInter_Recoder for bindxml with Interaction_a and _b sections
    separately index fieldPath subfields under _a and _b
  */
  
  public static class BINDInter_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String mol=null;
      if (fieldPath.indexOf("Interaction_a")>=0) mol="_a";  
      else if (fieldPath.indexOf("Interaction_b")>=0) mol="_b";  
      else return kNoChange;
      String sval= val.toString();
      idx.addField( fieldName+mol, sval, doc, withdups); // add _mol field
      return kFieldAdded;
   }
  }
  
  
  public static class defline_Recoder implements FieldRecoder { 

    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();
      int at= sval.indexOf(' '); if (at>0) sval= sval.substring(0,at);
      if (sval.startsWith(">")) sval= sval.substring(1);
      idx.addField( "ID", sval, doc, nodups);   

      if (mainrecsave_Recoder.theVal!=null)
        idx.addField( "ID2", mainrecsave_Recoder.theVal, doc, nodups);   
      return kFieldAdded;
   }
  }

  public static class mainrecsave_Recoder implements FieldRecoder { 
    public static String theVal=""; //??
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      theVal= val.toString();
      // or savedfields.put(fieldName, val.toString());
      return kNoChange;
   }
  }
  
  public static class deflineTokens extends DataTokenizer 
  {
    public deflineTokens(Reader in) { super(in); }  
    public deflineTokens() { super(); }
    protected boolean isTokenChar(char c) { return !(c<=' ' || ",;:=()".indexOf(c)>=0); }
    protected char normalize(char c) {  return Character.toLowerCase(c); } //usually case insense 
  }
  
  
  /** 
    get species from symbol
  */
  public static class SYM_Recoder implements FieldRecoder { 
    static Pattern regex= Pattern.compile("(\\w[^\\\\]+)\\\\"); // awful patt for \ escape char
    static String kDefaultSpecies="Dmel";
    
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();
      Matcher ma= regex.matcher(sval);
      int ret= 0;
      if (ma.lookingAt()) { 
        if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)<=kSubrecLevel) { // ==1 ?
          idx.addField("species", ma.group(1), doc, nodups);  // do this only for MAIN record
          ret |= kFieldAdded;
          }
        int e= ma.end();
        val.append(' ').append(sval.substring(e)); //e+1? and add non-species to main field
        ret |= kValChanged;  
        }
      else {
        //? move this into FBGN_EOR_Recoder, nodups ?
        //idx.addField("species", kDefaultSpecies, doc, withdups);  
        //ret |= kFieldAdded; 
        }
        
        // changed .lc to .cs (case-sense); make lowercase default field search
        // ?? change .cs to _cs for backward compat; more sensible
      idx.addField(fieldName+".cs", val.toString(), doc, withdups); // 2nd copy for case-insens? 
      ret |= kFieldAdded;  
      return ret;
   }
  }


  public static class SymTokens  extends DataTokenizer 
  {
    public SymTokens(Reader in) { super(in); }  
    public SymTokens() { super(); }
    protected boolean isTokenChar(char c) { return !Character.isWhitespace(c); }
    protected char normalize(char c) {  return c; }
  }
  
  public static class LowerSymTokens  extends DataTokenizer 
  {
    public LowerSymTokens(Reader in) { super(in); }  
    public LowerSymTokens() { super(); }
    protected boolean isTokenChar(char c) { return !Character.isWhitespace(c); }
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }
  
  public static class dbxrefTokens extends DataTokenizer 
  {
    public dbxrefTokens(Reader in) { super(in); }  
    public dbxrefTokens() { super(); }
    protected boolean isTokenChar(char c) { return !(c<=' ' || ",;(){}[]<>|/".indexOf(c)>=0); }
    protected char normalize(char c) {  return Character.toLowerCase(c); } //usually case insense 
  }


  public static class words extends DataTokenizer 
  {
    public words(Reader in) { super(in); }  
    public words() { super(); }
    protected boolean isTokenChar(char c) { return Character.isLetterOrDigit(c); }
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }

  public static class cvterms  extends DataTokenizer 
  {
    public cvterms(Reader in) { super(in); }  
    public cvterms() { super(); }
    protected boolean isTokenChar(char c) { return !(c<' ' || c== ';' || c==','); }// allow whitespace in cvterm; but not newlines
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }

    // ??change .cv suffix to _cv 
  public static class CV_Recoder 
    implements FieldRecoder { 
    static Pattern cvterm= Pattern.compile("\\s*([^;]+)\\s*;?",Pattern.CASE_INSENSITIVE);
  
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();
      Matcher ma= cvterm.matcher(sval); 
        // get all in field? or just .lookingAt()
      while(ma.find()) idx.addField(fieldName+".cv", ma.group(1), doc, withdups); 
      return kFieldAdded; 
      //? kSkipField or cut indexed parts and put rest in generic WORDs/XXX field ?
   }
  }


  /**  Allele class recoder -- put direct to 'docclass'? or ALC.cv
    ## ALC + CLA for alleles == class; CLA == wild-type generic if no ALC?
    fieldrecoder.CLA=fbacode$FB_ALESR_Field_Recoder,fbacode$ALC_Recoder
    fieldrecoder.ALC=fbacode$ALC_Recoder
  */
  
  public static class ALC_Recoder 
    implements FieldRecoder { 
    //ALC|antimorph with @Scer\GAL4<up>GMR.PF</up>@
    static String  ALCfield = "ALC.cv"; // not docclass?
    static Pattern withx  = Pattern.compile("\\s*(with)\\s*");
    static Pattern symbol = PhenotypeCV_Recoder.symbol; //Pattern.compile("@([^@]+)@");
    static Pattern cvterm = Pattern.compile("\\s*(\\w[^;,&(|\\\\]+)\\s*"); // require leading char so '/+' is skipped
    
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      boolean skip= ( 
        "CLA".equals(fieldName) && fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)<=kSubrecLevel) ;
      if (skip) return kNoChange; 

      String sval= val.toString(); // this is full field with newlines ! do by line?
      sval= symbol.matcher(sval).replaceAll(";");
      sval= withx.matcher(sval).replaceAll(";");

      Matcher ma= cvterm.matcher(sval); 
      while(ma.find()) {
        String cv= ma.group(1).trim(); 
        if (cv.length()>1) idx.addField( ALCfield, cv, doc, withdups); //? no dups
        }
      return kFieldAdded; 
    }
    
  }
  
  public static class MU_Recoder extends PhenotypeCV_Recoder
  {
    static Pattern escaped = Pattern.compile("\\\\[^\n]*");
    //what are all these escapes for? ' MU:spontaneous \mutator'
    //MU:recombination \between transposable elements
    
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();  
      sval= escaped.matcher(sval).replaceAll("\n");
      val.replace(0, val.length(), sval);
    
      return super.recodeField(idx, doc, fieldName, fieldPath, val);
    }
    
  }

 
  public static class PhenotypeCV_Recoder 
    implements FieldRecoder { 
    static Pattern cvterm = Pattern.compile("\\s*(\\w[^;,&(|\\\\]+)\\s*[;,&(|\\\\]?"); // require leading char so '/+' is skipped
    static Pattern prewith= Pattern.compile("\\(with\\s+(.+)\\) "); // need trailing space; problems
    static Pattern symbol = Pattern.compile("@([^@]+)@");
    static Pattern squiggles = Pattern.compile("\\{[^}]*\\}");
    static Pattern parens = Pattern.compile("\\([^)]*\\)");
    static Pattern withx  = Pattern.compile("\\s*(of|by|with)\\s*");
    static Pattern alesym = Pattern.compile("\\w+\\[.+\\]\\S*");
      // still get this occasional symbol GIC.cv:sxl[-]    
      // still get some symbols as cv: scer\gal4[1]
          
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      Matcher ma;
      String sval= val.toString(); // this is full field with newlines ! do by line?
 
      //if (debug) logp.println("#"+fieldName+"="+sval);

      StringBuffer cvbuf= new StringBuffer();
      ma= prewith.matcher(sval); 
      while(ma.find()) {
        idx.addField("symbols", ma.group(1), doc, withdups);
        ma.appendReplacement(cvbuf,";");
        }
      ma.appendTail(cvbuf); sval= cvbuf.toString(); cvbuf.setLength(0);

      ma= symbol.matcher(sval); 
      while(ma.find()) {
        idx.addField("symbols", ma.group(1), doc, withdups);
        ma.appendReplacement(cvbuf,";");
        }
      ma.appendTail(cvbuf); sval= cvbuf.toString(); cvbuf.setLength(0);
      
      sval= withx.matcher(sval).replaceAll(";");
      sval= squiggles.matcher(sval).replaceAll(";");
      sval= parens.matcher(sval).replaceAll(";");

      ma= cvterm.matcher(sval); 
      while(ma.find()) {
        String cv= ma.group(1).trim(); 
        if (cv.length()>1 
         && !alesym.matcher(cv).matches()) 
          idx.addField(fieldName+".cv", cv, doc, withdups); 
        }
      return kFieldAdded; 
      //? kSkipField or cut indexed parts and put rest in generic WORDs/XXX field ?
   }
  }


  public static class GO_Recoder 
    implements FieldRecoder { 
    static Pattern goterm= Pattern.compile("\\s*([^;]+)\\s*;?",Pattern.CASE_INSENSITIVE);
    static Pattern goid  = Pattern.compile("\\bGO:[0-9-]+",Pattern.CASE_INSENSITIVE);
    static Pattern ecnum = Pattern.compile("EC:[0-9-]+\\.[0-9-]+\\.[0-9\\.-]+",Pattern.CASE_INSENSITIVE);
  
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();
      Matcher ma;
      ma= goterm.matcher(sval);
      if(ma.lookingAt()) idx.addField(fieldName+".cv", ma.group(1), doc, withdups); 
      //? put the ids in separate field? .go .ec ...
      //? or separate toplevel field: GOID, ECID, or generic DBXREF ? - ACC/AccNumber now in SRS index
      ma= goid.matcher(sval); 
      while(ma.find()) idx.addField("DBX", ma.group(), doc, withdups);   //fieldName+".cv"
      ma= ecnum.matcher(sval);
      while(ma.find()) idx.addField("DBX", ma.group(), doc, withdups);   //fieldName+".cv"
      return kFieldAdded;
      //? kSkipField or cut indexed parts and put rest in generic WORDs/XXX field ?
   }
  }

  public static class FBbtId_Recoder // apply to FBcv.acode/LNK field now
    implements FieldRecoder { 
    static Pattern btid  = Pattern.compile("\\bFBbt:[0-9-]+",Pattern.CASE_INSENSITIVE);
  
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
      String fieldName, String fieldPath, StringBuffer val)
    {
      String sval= val.toString();
      Matcher ma= btid.matcher(sval); 
      if(ma.find()) { idx.addField("docid",  ma.group(), doc, nodups);  return kFieldAdded; }
      return kNoChange;
   }
  }

  /**
    * Location_FieldRecoder
    Parse flybase sequence location fields = genbank standard locations
    create three numeric index fields: fieldName.start, fieldName.stop, fieldName.strand (-1,1)
    BLOC|join(100..200,300..400) 
    BLOC|complement(2000..3000)
    BLOC|1..2
    
    -- also handle loc=2L:100..200,300..400 ; loc=2L:complement(2000..3000)
    create field fieldName.chr=2L if (\w+):
  */

  public static class Location_Recoder
    implements FieldRecoder { 
    
    //? final static String regex_loc="(\\d+)\\.\\.(\\d+)";
    static Pattern regexLoc= Pattern.compile("(\\d+)");
    static Pattern regexChr= Pattern.compile("(\\w+):");
    
    public int recodeField(LuceneBaseIndexer idx, Document doc, 
        String fieldName, String fieldPath, StringBuffer val)
    {
      if (fieldPath.indexOf(LuceneBaseIndexer.xpathDelim)>0) 
        return kNoChange; // skip subrec locations always ?
        //^ instead use addField(... doc, nodup) ?
      
      String chr=null;
      String sval= val.toString();
      Matcher ma;
      ma= regexChr.matcher(sval);
      if (ma.lookingAt()) {
        chr= ma.group(1);
        int e= ma.end();
        sval= sval.substring(e); // dont delete - caller wants full buf ??
        }
      ma= regexLoc.matcher(sval);
      if (ma.find()) {
        String start = ma.group(); //(1) ?   
        String stop  = start;
        while( ma.find() ) stop = ma.group(); //(2) ?
        //(ma.groupCount()>1) ? ma.group(ma.groupCount()) : start;
        // ^^ not good, need to .find() last
        
        if (chr!=null) idx.addField(fieldName+".chr", chr, doc, withdups);
        String strand= (sval.indexOf("complement(")>=0) ? "-1" : "1";
        idx.addField(fieldName+".start", start, doc, withdups);
        idx.addField(fieldName+".stop", stop, doc, withdups);
        idx.addField(fieldName+".strand", strand, doc, withdups);
        return kFieldAdded;
        }
      return kNoChange;
    }
  }
  
  public static class LocationTokens extends DataTokenizer 
  {
    public LocationTokens(Reader in) { super(in); }  
    public LocationTokens() { super(); }
      // need to keep "Chr:complement(1111..22222,33333..44444)" chars
    protected boolean isTokenChar(char c) { return !(c==';'||c<=' '); }
    protected char normalize(char c) {  return c; }
  }
  
  
  public static class IDTokens extends DataTokenizer 
  {
    public IDTokens(Reader in) { super(in); }  
    public IDTokens() { super(); }
    protected boolean isTokenChar(char c) { return Character.isLetterOrDigit(c); }
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }


  //##  dateword: ~ /[^0-9]*([0-9]+)[ \/\-]*([A-Z][a-z][a-z])[a-z]*[ \/\-]*([0-9]+)/  ~
  // use java date parser?
  public static class DateTokens extends DataTokenizer 
  {
    public DateTokens(Reader in) { super(in); }  
    public DateTokens() { super(); }
    protected boolean isTokenChar(char c) { return !(c<' ' || c==';'); }
    protected char normalize(char c) {  return c; }
  }
  
  public static class DateFilter extends DataFilter 
  {
    static SimpleDateFormat df1, df2, df3, df4, todf;  
    static { 
      df1 = new SimpleDateFormat("dd MMM yy"); df1.setLenient(true); 
      df2 = new SimpleDateFormat("yyyy.MM.dd"); df2.setLenient(true); 
      df3 = new SimpleDateFormat("yyyy"); df3.setLenient(true); 
      df4 = new SimpleDateFormat("MM/dd/yy"); df4.setLenient(true); 
      // what of dd/mm/yy or mm/dd/yy ?
      todf = new SimpleDateFormat("yyyyMMdd");  
      }
    // fbformat is "dd MMM yy" 28 Feb 04

    public Token next() throws IOException {
      Token t = input.next();
      if (t != null) try {
        int c;
        Date dt= null;
        String text = t.termText();  
        //text= text.trim(); // allows whitespace..
        c= text.indexOf('/'); 
        if (c>0 && (dt==null))   
          try { dt= df4.parse(text); } catch (Exception dx) {}
        c= text.indexOf('-'); if (c>0) text= text.replace('-','.'); 
        c= text.indexOf('.'); 
        if (dt==null) try { dt= df1.parse(text); } catch (Exception dx) {}
        if (c>0 && dt==null) try { dt= df2.parse(text); } catch (Exception dx) {}
        if (dt==null) {
          if (c>0) text= text.substring(0,c);
          try { dt= df3.parse(text); } catch (Exception dx) {}
          }

        //if (debug) logp.println("# datefilter in="+t.termText()+" out="+dt);
        if (dt==null) return null; // t or null?
        text = todf.format(dt); //DateField.dateToString(dt);
        return new Token( text, t.startOffset(), t.endOffset(), t.type());
        } 
      catch (Exception ex) { } 
      //? eat it; this is mostly failing .. need to handle messy date formats better
      // 2004.5.17 -- in refs > drop '.', '-', ..
      return t; 
      }
  }


  public static class LowerDataTokenizer extends DataTokenizer 
  {
    public LowerDataTokenizer(Reader in) { super(in); }  
    public LowerDataTokenizer() { super(); }
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }
  public static class LowerWordTokenizer extends DataTokenizer 
  {
    public LowerWordTokenizer(Reader in) { super(in); }  
    public LowerWordTokenizer() { super(); }
    protected boolean isTokenChar(char c) { return Character.isLetterOrDigit(c); }
    protected char normalize(char c) {  return Character.toLowerCase(c); }
  }


  public static class DebugFilter extends DataFilter 
  {
    public Token next() throws IOException {
      Token t = input.next();
      if (t == null) ;  
      else if (debug) logp.println(this.getField()+":"+t.termText());
      return t;
      }
  }

  public static class DebugEndOfRecordFilter extends DataFilter 
  {
    public Token next() throws IOException {
      Token t = input.next();
      if (t == null) ; 
      else if (debug) {
        logp.println(this.getField()+":"+t.termText());
        logp.println("-----------------");
        logp.println();
        }
      return t;
      }
  }

  public static class NumberFilter // should return null unless is numeric string
    extends DataFilter 
  {
    public Token next() throws IOException {
      Token t = input.next();
      if (t == null) return null;
      try {
        String text = t.termText(); // need word tokenizer + num parts: .+-
        int ival= 0;

        int c= text.indexOf('.'); 
        if (c>0) text= text.substring(0,c);

        ival= Integer.parseInt(text);
        String nums = NumberField.numToString( ival ); // can except
        t= new Token( nums, t.startOffset(), t.endOffset(), t.type());
        return t;
        } 
      catch (Exception e) { return null; } //? eat it or throw IOException
      }
  }

    // not much use as filter given that most tokenizers skip ';' -- use in recoder chain
  public static class GreekFilter  extends DataFilter 
  {
    public Token next() throws IOException {
      Token t = input.next();
      if (t == null) return null;
      String term= t.termText();
      if (isgreek(term)) {
        String text= greek2text(term);
        return new Token( text, t.startOffset(), t.endOffset(), t.type());
        } 
      else return t; 
      }
  }

	public final  static String[] updownSGML= { "<up>", "</up>", "<down>", "</down>" };
	public final  static String[] updownTEXT= { "[", "]", "[[", "]]" };

	public static String[] greekSGML = { 
		"&agr;", "&Agr;", 
		"&bgr;", "&Bgr;",
		"&dgr;", "&Dgr;",
		"&egr;", "&Egr;",
		"&ggr;", "&Ggr;",
	  "&kgr;", "&Kgr;",
		"&lgr;", "&Lgr;", 
	  "&ngr;", "&Ngr;",
		"&pgr;", "&Pgr;", 
		"&ohgr;", "&OHgr;", 
		"&zgr;", "&Zgr;", 
		"&psgr;", "&PSgr;", 
		"&eegr;", "&EEgr;", 
		"&thgr;", "&THgr;",
	  "&igr;", "&Igr;",
	  "&mgr;", "&Mgr;",
	  "&xgr;", "&Xgr;",
	  "&ogr;", "&Ogr;",
	  "&rgr;", "&Rgr;",
	  "&sgr;", "&Sgr;",
	  "&tgr;", "&Tgr;",
	  "&ugr;", "&Ugr;",
	  "&phgr;", "&PHgr;",
	  "&khgr;", "&KHgr;"
		 };
	public static String[]  greekTEXT = { 
		"alpha", "Alpha", 
		"beta", "Beta",
		"delta", "Delta", 
		"epsilon", "Epsilon", 
		"gamma", "Gamma", 
	  "kappa","Kappa",
		"lambda", "Lambda", 
	  "nu", "Nu",
		"pi", "Pi",   
		"omega", "Omega", 
		"zeta", "Zeta",   
		"psi", "Psi",   
		"eta", "Eta",  
		"theta", "Theta",
		"iota", "Iota",
		"mu", "Mu",
		"xi", "Xi",
		"omicron","Omicron",
		"rho", "Rho",
		"sigma", "Sigma",
		"tau", "Tau",
		"upsilon", "Upsilon",
		"phi", "Phi",
		"chi", "Chi"
		 };

  static Pattern anyxPattern    = Pattern.compile("(&\\w{1,3}gr;|</?(up|down)>|[&<])"); 
  static Pattern greekPattern   = Pattern.compile("(&\\w{1,3}gr;)"); // agr, pgr...
  static Pattern updownPattern  = Pattern.compile("(</?(up|down)>)"); 

  static Properties greektextmap = makemap(greekSGML, greekTEXT );  
  static Properties updntextmap  = makemap(updownSGML, updownTEXT );  
  static Properties makemap(String[] alist, String[] tolist)
  {
    Properties p= new Properties();
    for (int i=0; i<alist.length; i++) p.put(alist[i], tolist[i]);
    return p;
  }

  static void appendmatch(String from, Matcher ma, Properties matchmap, StringBuffer sb)
  {
    String to  = matchmap.getProperty(from);
    if (to!=null) ma.appendReplacement(sb,to);
    else ma.appendReplacement(sb,from);
  }

  static boolean isgreek(String s)  
  { 
    if (s==null || s.length()==0) return false;
    Matcher ma= anyxPattern.matcher(s);
    return ma.find();
  }
  
  static String greek2text(String s)  
  { 
    if (s==null || s.length()==0) return s;
    Matcher ma= anyxPattern.matcher(s);
    boolean gotone= ma.find();
    if (!gotone) return s;
    StringBuffer sb= new StringBuffer();
    while(gotone) {
      String from= ma.group(1);
      if (greekPattern.matcher(from).matches())  
        appendmatch(from, ma, greektextmap, sb);
      else if (updownPattern.matcher(from).matches())  
        appendmatch(from, ma, updntextmap, sb);
      else  
        ma.appendReplacement(sb,from);
      gotone= ma.find();
      }
    ma.appendTail(sb);
    return sb.toString();
  }
  
  
}

/*
reuse these field names where possible
flybase/dbs/srs/ic/db/fbsrs-codes.is

## acode field keys for SRS
## generated by flybase.report.FbSRScodes
## acode_key: { srscode:NEWCODE  vl:"visible label"  } 

$fldinfo={
#fban-oct03 added/changed
AANAM:  { vl:"Protein name" }
AFFY:  {   vl:"Affy oligo" }
EST:  { vl:"EST" }
CDS:  { noindex:1   vl:"Coding sequence location" }
CLOCC:  { srscode:"CLOC" vl:"Computed cytology" }
PEPST: { srscode:CMT   vl:"Peptide status"}
#---
AAB:     {  vl:"Assoc. Aberration"}
AAL:     {  vl:"Length in a.a."}
AALEN:   {  vl:"Peptide length"}
ABA:     {  vl:"Associated aberration"}
ABOD:    {  vl:"Antibodies generated"}
ABODURL: {  vl:"Antibody URL (DSHB Hybridomas)"}
ABREFDSR:{  vl:"Data from ref."}
ABSR:    {  vl:"Aberration Record"}
ABSSR:   {  vl:"Parent Aberration"}
ABSTR:   {  srscode:skip vl:"Abstract" noview:1}
ABSY:    {  srscode:SYM vl:"Symbol"}
ACC:   {  vl:"Accession"}
ACLA:    {  srscode:CLA vl:"Class of aberration"}
ACM:     {  vl:"Complements"}
AFC:     {  vl:"Fails to complement"}
AFS:     {  vl:"Fails to rescue"}
AGT:     {  srscode:ASAL vl:"Allele or genotype used"}
ALC:     {  srscode:CLA vl:"Allele class"}
ALER:    {  vl:"Allele Record"}
ALESR:   {  vl:"Allele"}
ALETAB:  {  srscode:skip vl:"Allele table" noview:1}
ALSO:    {  vl:"Also published as"}
ALTSQ:   {  vl:"Alternative sequence"}
AM:      {  vl:"Allelism info."}
AMD:     {  vl:"Deletes/disrupts"}
AMDD:    {  vl:"Does not delete/disrupt"}
AMDP:    {  vl:"Duplicated for"}
AMIS:    {  vl:"Misc. allele information"}
AMND:    {  vl:"Not duplicated for"}
AMP:     {  vl:"Partially disrupts"}
AMPD:    {  vl:"Partially duplicated for"}
AMSO:    {  vl:"Rescue and compl. info."}
ANRB:    {  vl:"Not rescued by"}
APC:     {  vl:"Partially complements"}
APR:     {  vl:"Partially rescues"}
APRB:    {  vl:"Partially rescued by"}
ARB:     {  vl:"Rescued by"}
ARG2:    {  vl:"FlyBase gene annotation"}
ARGS:    {  vl:"FlyBase gene annotation"}
ARM:     {  vl:"Chromosome arm"}
ARS:     {  vl:"Rescues"}
ASAL:    {  vl:"Associated allele"}
ASBA:    {  vl:"Associated balancer"}
ASCO:    {  vl:"Associated construct"}
ASCRGN:  {  srscode:ASGN vl:"Reflects expression of"}
ASGN:    {  vl:"Associated gene"}
ASM:     {  vl:"Assay mode"}
ASPP:    {  vl:"Associated polypeptide"}
ASQ:     {  vl:"Genomic sequence analysis"}
ASTI:    {  vl:"Associated insertion"}
ASTP:    {  vl:"Associated transposon"}
ASTR:    {  vl:"Associated transcript"}
ASYM:    {  vl:"Symbol"}
AU:      {  vl:"Author"}
AU1:     {  vl:"Author"}
BFD:     {  vl:"BFD Line"}
BGV:     {  vl:"Balancer/ Genotype variant"}
BGVR:    {  vl:"Variant Record"}
BGVSY:   {  srscode:SYM vl:"Symbol"}
BI:      {  vl:"Biosis"}
BIP:     {  vl:"Breakpoints inherited from progenitors"}
BLOC:    {  vl:"Sequence map"}
BM:      {  vl:"Aberration info."}
BMD:     {  vl:"Disrupted in"}
BMDD:    {  vl:"Not disrupted in"}
BMDP:    {  vl:"Duplicated in"}
BMND:    {  vl:"Not duplicated in"}
BMP:     {  vl:"Partially disrupted in"}
BMPD:    {  vl:"Partially duplicated in"}
BODP:    {  vl:"Expression data"}
BPT:     {  vl:"Breakpoints"}
BSN:     {  srscode:SYN vl:"Balancer short name"}
CC:      {  vl:"Comments"}
CCL:     {  srscode:CLA vl:"Construct type"}
#CCLOC:  {  srscode:CLOC vl:"Cytological location"}
CCM:     {  vl:"Comments on cytology"}
CDNA:    {  vl:"cDNA"}
#CDS:    {  vl:"Coding sequence"}
CEL:     {  vl:"Cellular component"}
CGSYM:   {  vl:"CG Symbol"}
CH:      {  vl:"Characteristics"}
CLA:     {  vl:"Class of gene"}
CLNR:    {  vl:"Clone Record"}
CLNSR:   {  vl:"Clone"}
CLOC:    {  vl:"Cytogenetic map"}
CMT:   {  vl:"Annotator comment"}
CNM:     {  vl:"Common Name"}
CNS:     {  srscode:ASTP vl:"Carried in construct"}
CO:      {  vl:"Coden"}
COR:     {  vl:"Comments on origin"}
CPW:     {  vl:"Library plate-well"}
CS:      {  vl:"Cloning site"}
CSQ:     {  srscode:skip vl:"Sequence"}
CTG:     {  vl:"Physical contig"}
CTSYM:   {  vl:"Transcript symbol"}
CVBODP:  {  vl:"Expression pattern"}
CVBODPC:         {  vl:"Expression pattern comment"}
CVCEL:   {  vl:"Cell location (CV)"}
CVEC:    {  vl:"Vector"}
CYA:     {  vl:"Associated cytology"}
CYC:     {  vl:"Notes on cytogenetic map"}
DARTS:   {  vl:"GenBank sequence report"}
DBA:     {  vl:"DNA/RNA sequences"}
DBAF:    {  vl:"Flanking sequence"}
DB:      {  vl:"External database name"}
DBX:     {  vl:"External database link"}
DEC:     {  vl:"Descendant"}
DES:     {  vl:"Description"}
DHO:     {  vl:"Probable reference ortholog"}
DIS:     {  vl:"Discoverer"}
DOMAIN:  {  vl:"Domain"}
DT:      {  vl:"Date"}
ENL:     {  vl:"Left entity"}
ENR:     {  vl:"Right entity"}
ENZ:     {  vl:"Molecular function"}
EOR:     {  srscode:skip vl:"End Record" noview:1}
EPA:     {  vl:"Tissue/Position" noview:1}
EPAT:    {  vl:"Expression pattern"}
EPATR:   {  vl:"Expression pattern"}
EPP:     {  vl:"Pattern" noview:1}
EPT:     {  vl:"Stage" noview:1}
ER:      {  vl:"Related publication"}
EVD:     {  vl:"Evidence"}
EXNSQ:   {  vl:"Exon composition"}
ExpatTable:      {  vl:"Expression pattern"}
FGD:     {  vl:"Formalized genetic data"}
FI:      {  srscode:skip vl:"File index" noview:1}
FLQ:     {  vl:"Flanking sequence"}
FNC:     {  vl:"Biological process"}
FSQ:     {  vl:"Foreign sequence"}
GADR:    {  vl:"Annotation Record"}
GADSR:   {  vl:"Genome Annotation"}
GENR:    {  vl:"Gene Record"}
GENSR:   {  vl:"Parent Gene"}
GIA:     {  vl:"Genetic interaction (effect, anatomy)"}
GIA2:    {  vl:"Genetic interaction (anatomy, effect)"}
GID:     {  srscode:ID vl:"FlyBase ID"}  
GIC:     {  vl:"Genetic interaction (effect, class)"}
GIC2:    {  vl:"Genetic interaction (class, effect)"}
GII:     {  vl:"Genetic interaction info."}
GLC:     {  vl:"Comments on genetic locn."}
GLOC:    {  vl:"Recombination map"}
GO:      {  vl:"Gene Ontology" noview:1}
GOTERM:  {  vl:"GO Term"}
GPD:     {  vl:"Gene product"}
GSNA:    {  vl:"Short name"}
GSYM:    {  vl:"Symbol"}
GeneInterTable:  {  vl:"Genetic Interactions"}
HG:      {  vl:"Similar genes"}
HGTAB:   {  srscode:skip vl:"Similar genes table" noview:1}
HITS:    {  vl:"Positive PCR assay"}
IBAL:    {  vl:"Usable as balancer"}
ICC:     {  vl:"Comment"}
ICL:     {  srscode:CLA vl:"Insertion class"}
ID:      {  vl:"FlyBase ID"}
ID2:     {  vl:"Secondary ID"}
IFL:     {  vl:"Interactive Fly"}
INSITU:  {  vl:"Insitu image"}   
INSR:    {  vl:"Insertion"}
INSY:    {  srscode:ASTI vl:"Insertion synonym" noview:1}
IPRO:    {  vl:"InterPro motifs"}
ISBN:    {  vl:"Isbn"}
ISS:     {  vl:"Issue"}
ITP:     {  vl:"Insertion's transposon type"}
JR:      {  vl:"Journal"}
KLOC:    {  vl:"Computed kilobase location" noview:1}
LG:      {  vl:"Language"}
LGA:     {  vl:"Addn. lang.s"}
LIB:     {  srscode:CLA vl:"Library class"}
LOCB:    {  vl:"Localization basis"}
LOI:     {  vl:"Location inferred from insertion in"}
LOST:    {  vl:"Availability"}
MABST:   {  vl:"Abstract"}
MCR:     {  vl:"Transposon Record"}
MD:      {  vl:"Molecular data"}
MED:     {  vl:"PubMed"}
MK:      {  vl:"Markers"}
MMP:     {  vl:"Molecular map"}
MOD:     {  vl:"Modification of progenitor"}
MOLDR:   {  vl:"Molecular data Record"}
MRK:     {  vl:"Body part marker"}
MSR:     {  vl:"Transposon Record"}
MU:      {  vl:"Mutagen"}
NAF:     {  vl:"DNA Features"}
NAM:     {  vl:"Full name"}
NB:      {  vl:"Neighbor"}
NBR:     {  vl:"Neighbor Record"}
NBV:     {  vl:"Neighbor value"}
NCO:     {  vl:"New cytological order"}
OAB:     {  vl:"Genetic data about other aber."}
OLAP:    {  vl:"Overlaps"}
OP:      {  vl:"Offprint"}
ORI:     {  vl:"Orientation"}
OTH:     {  vl:"Other information"}
PAC:     {  vl:"Protein sequences"}
PB:      {  vl:"Publisher"}
PCL:     {  vl:"Cell loc. summary"}
PDOM:    {  vl:"Protein domains"}
PED:     {  vl:"Position-effect data"}
PEV:     {  vl:"Position-effect variegation"}
PEVD:    {  vl:"Dominant PEV in"}
PEVN:    {  vl:"No PEV in"}
PEVR:    {  vl:"Recessive PEV in"}
PG:      {  vl:"Pages"}
PHC:     {  vl:"Phenotypic class"}
PHI:     {  vl:"Mutant phenotype"}
PHM:     {  vl:"Phenotype manifest in"}
PHP:     {  vl:"Phenotypic info."}
PHS:     {  vl:"Status"}
PID:     {  vl:"Antibody ID (DSHB Hybridomas) "}
POL:     {  vl:"Polymorphic variant"}
PPB:     {  vl:"Place pub."}
PPC:     {  vl:"Population comments"}
PPR:     {  vl:"Polypeptide Record"}
PPS:     {  vl:"Population info"}
PPSR:    {  vl:"Polypeptide"}
PRG:     {  vl:"Progenitor"}
PSZ:     {  vl:"Protein size (kD)"}
PT:      {  vl:"Part"}
PTD:     {  vl:"Protein & Transcript"}
PTR:     {  vl:"Protein & Transcript"}
PTRR:    {  vl:"Protein & Transcript Record"}
PhenoTable:      {  vl:"Allele phenotypes"}
ProtTransTable:  {  vl:"Proteins & Transcripts"}
RCI:     {  vl:"Related transgene constructs and insertions"}
RCIR:    {  vl:"Related transgene constructs and insertions"}
RDID:    {  srscode:ID vl:"Ref."}
RDL:     {  vl:"Data from reference"}
RDL1:    {  srscode:RDL vl:"Data from reference"}
RDLSR:   {  vl:"Data from reference"}
REF:     {  vl:"References"}
REFDR:   {  vl:"Ref. Data Record"}
REFDSR:  {  vl:"Data from ref."}
REFF:    {  vl:"Reference"}
REFM:    {  vl:"Mini ref."}
REFR:    {  vl:"Reference Record"}
REFTAB:  {  srscode:skip vl:"Reference table" noview:1}
RESZ:    {  vl:"Record size" noview:1}
RETE:    {  srscode:skip vl:"Table Entry" noview:1}
REV:     {  vl:"Recent reviews"}
RG:      {  vl:"Ring"}
RL:      {  vl:"Ref. list"}
RLR:     {  vl:"References"}
RLSR:    {  vl:"References"}
RNAT:    {  vl:"RNA type"}
RPA:     {  vl:"Rep. protein sequence"}
RPTC2:   {  vl:"Available reports"}
RPTCONTENT:      {  vl:"Available reports"}
RPTL:    {  srscode:skip vl:"Data report" noview:1}
RS:      {  vl:"Restriction site"}
RSQ:     {  vl:"Rep. DNA sequence"}
SCAF:    {  vl:"Scaffold"}
SEG:     {  vl:"Segment"}
SEP:     {  vl:"Separable component"}
SEPR:    {  vl:"Sep. component Record"}
SER:     {  vl:"Series"}
SGTP:    {  vl:"Segment type"}
SK:      {  vl:"Stocks"}
SKC:     {  vl:"Stocks count" noview:1}
SPSY:    {  srscode:SYM vl:"Symbol"}
SQ:      {  srscode:skip vl:"Sequence"}
SQLEN:   {  vl:"mRNA length"}
STS:     {  vl:"STS"}
STSNAM:  {  srscode:NAM vl:"Name"}
STSR:    {  vl:"STS Record"}
SUMX:    {  srscode:skip vl:"Summary" noview:1}
SYM:     {  vl:"Symbol"}
SYN:     {  vl:"Synonyms"}
TCC:     {  vl:"Comments"}
TCH:     {  vl:"Characteristics"}
TCL:     {  vl:"Description"}
TCSR:    {  vl:"Transcript"}
TCTP:    {  vl:"Transposon construct type"}
TE:      {  vl:"Transposable el. data"}
TFT:     {  vl:"Feature"}
TGT:     {  srscode:ASGN vl:"Genotype"}
TI:      {  vl:"Title"}
TIR:     {  vl:"Insertion Record"}
TMPLCONT:        {  vl:"Report content"}
TP:      {  srscode:CLA vl:"Type"}
TPFBMMP:         {  vl:"Molecular map"}
TPFBSQ:  {  vl:"Transposon sequence" noview:1}
TPR:     {  vl:"Transposon Record"}
TPSY:    {  srscode:ASTP vl:"Transposon synonym" noview:1}
TPU:     {  vl:"Uses"}
TRL:     {  vl:"Length (Kb)"}
TRN:     {  srscode:ASTP vl:"Responsible transposon"}
TRNA:    {  srscode:ASTI vl:"Transposon insertions"}
TRR:     {  vl:"Transcript Record"}
TRREC:   {  vl:"AnnoTranscript Record"}
TRRECSR:         {  vl:"Transcript"}
TRSR:    {  vl:"Transposon"}
UAB:     {  vl:"Useful aneuploid aberrations"}
VCL:     {  vl:"Is valid record" noview:1}
VERS:    {  vl:"Release vers."}
VL:      {  vl:"Volume"}
VPR:     {  vl:"5' experiment info"}
WT:      {  vl:"Wild-type function"}
WTI:     {  vl:"Interacts genetically with"}
XDA:     {  vl:"Expression data available"}
YR:      {  vl:"Year"}
ZR:      {  vl:"Zool. Rec."}
env:     {  vl:"null" noview:1}
fbsym:   {  vl:"FlyBase symbol" noview:1}
self:    {  vl:"null" noview:1}
'{':     {  srscode:SOR vl:"Start Record" noview:1}
'}':     {  srscode:EOR vl:"End Record" noview:1}
skip:    {  vl:"skip field" noview:1}
}

*/