#!/usr/local/bin/perl # GeneOntol.pm # see Meow.pm caller =head1 NAME GeneOntol - read Gene Ontology (GO) data =head1 USAGE use GeneOntol; $GeneOntol::debug= 1; # get standard data file names my $datapath= "/c6/tmp/geneont/data/"; my $outpath= $datapath; my @args= GeneOntol::getDefaults($datapath, $outpath [, $fbobspath]); # parse GO data files, create FBgo.acode and BerkeleyDB go.bdb $result= GeneOntol::readData(@args); # read GO data files with better parser, in $args{jlib} classpath $result= GeneOntol::javaParser( @args ); # create BerkeleyDB hash of FBgo.acode @args= GeneOntol::getDefaults( '', $fbobspath, $fbobspath); # data in fbobspath/FBgo.acode GeneOntol::acode2bdb(@args); # make go.bdb # open database 'r' to read GeneOntol::openBDB( $outpath, "go.bdb", 'r'); if (GeneOntol::dbIsOpen()) {} # for each gene database id $did, get associated GO terms $val= GeneOntol::getGoTerms( $did, 'F'); # list BDB info GeneOntol::printBDBstatus(*STDOUT); GeneOntol::printBDB(*STDOUT); GeneOntol::closeBDB(); use GeneOntol; open(O,">go.dump") || die; $outh= *O; $GeneOntol::debug= 1; # GeneOntol::openBDB( '/bio/work/meow/server/.etc/jdata/fbobs/', "go.bdb", 'r'); GeneOntol::openBDB( '/bio/work/meow/tmp/work/fbobs/', "go.bdb", 'r'); GeneOntol::printBDBstatus($outh); GeneOntol::printBDB($outh); GeneOntol::closeBDB(); close(O); apr01 - revised to parse data with java gostore.zip, making fbobs/FBgo.acode use from eugenes datagen see also go2eugenes.pl - recode FBgo.acode database gene ids to eugenes ids -- copy into this package? =cut package GeneOntol; ## from flybase::cvread; my $macos= 0; use Acodes; use BerkeleyDB; # unless ($macos) { eval( "use BerkeleyDB;" ); } BEGIN { $kComponent = 1; $kDerived= 2; $kSeries= 4; $kInstance = 5; $kToplevel= 6; $kComment = 30; $kSynonym = 31; $kAbbrev= $kSynonym; $kPossibleSynonym= 32; $kReference = 33; $kTermQuestionable= 34; $kGoID= 35; $kUnknownData= 99; $kDontPack= 1; $GOkey= 'GOR'; $notree= 0; $useGenelinks= 1; %gosections = ( biological_process => 'P', molecular_function => 'F', cellular_component => 'C', ); %dbnames = ( FB => 'FlyBase', #? use FB: as alias for FlyBase: ? SGD => 'SGDID', #? SGD: as alias for SGDID: ? MGI => 'MGI', WB => 'WB', #?? what the heck is wormbase/acedb/wormpep db id/name? PMB => 'PomBase', # in current gene_association.wb, have db=WB, dbid=WP:CE12345 # eugenes uses ACEDB:symbol, ACEPRED:symbol, acedb/wormbase use symbol as ID ); } sub getDefaults { my ($datapath, $outpath, $fbobs, $workpath )= @_; ## may02 - revise to new file structure and in .gz format ## ${datapath} now is bio-mirror/geneontology/ with these folders: # defs/ gene-associations/ gp2protein/ # docs/ gobo/ ontology/ my @args= ( terms => [ "${datapath}ontology/component.ontology", "${datapath}ontology/function.ontology", "${datapath}ontology/process.ontology", ], docs => [ "${datapath}defs/GO.bib", "${datapath}defs/GO.defs", ], ## docs/defs also has (may02) # word_dictionary = !Gene Ontology dictionary of words # xrf_abbs= !Abbreviations for cross-referenced databases. # ^^ use these, e.g. has flybase url, others and 'standard' dbid genes => [ # fly => "${datapath}/../flybase/gene_association.fb", # apr01 - change to use flybase's version fly => "${datapath}gene-associations/gene_association.fb", # may02 - back to GO source of this mouse => "${datapath}gene-associations/gene_association.mgi", yeast => "${datapath}gene-associations/gene_association.sgd", weed => "${datapath}gene-associations/gene_association.tair", worm => "${datapath}gene-associations/gene_association.wb", # added jul01 pombe => "${datapath}gene-associations/gene_association.pombase",# added nov01 man => "${datapath}locuslink/gene_association.ll", # added nov01 ], acode => "${fbobs}FBgo.acode", outacode => "${outpath}FBgo.acode", outfile => "${outpath}FBgo-new.acode", # not used? # treefile => "${outpath}FBgotree.acode", bdbfile => "${outpath}go-go.bdb", ## urk! these are really go-did.bdb, go-go.bdb datapath => "${datapath}", outpath => "${outpath}", fbobs => "${fbobs}", tmppath => "${workpath}/tmp/", ); return @args; } ## as of may02, these are available assoc.s # gene_association.compugen.Genbank.gz # gene_association.compugen.Swissprot.gz # gene_association.fb.gz # gene_association.goa.gz -- ?? # gene_association.gramene_oryza.gz - rice # gene_association.mgi.gz # gene_association.pombase.gz # gene_association.rgd.gz - rat # gene_association.sgd.gz # gene_association.tair.gz # gene_association.tigr_ath.gz -- use instead of tair ? # gene_association.tigr_gene_index.gz --? # gene_association.tigr_vibrio.gz # gene_association.wb.gz ## and # /bio/biomir-pub/biomirror/geneontology # oat% ls # defs/ gene-associations/ gp2protein/ ontology/ # docs/ gobo/ locuslink/ # # ./defs: # GO.bib.gz GO.word_dictionary.gz GO.xrf_abbs_spec.gz # GO.defs.gz GO.xrf_abbs.gz # # ./gobo/anatomy.ontology: # Mouse_anatomy_by_time_xproduct.gz anatomy.tair.gz # README anatomy_defs.fb.gz # TAIR_ontology.defs.gz anatomy_defs.tair.gz # anatomy.fb.gz # # ./gobo/mutation.ontology: # mutationevents.defs.gz mutationevents.ontology.gz # # ./gobo/phenotype.ontology: # phenotype.txt.gz # # ./gobo/sequence.ontology: # sequence.defs.gz sequence.ontology.gz # # ./gobo/temporal.ontology: # README temporal.tair.gz temporal_defs.tair.gz sub isNewData { my %goargs = @_; # convert into associative array ## test for .gz of files my $outfile= $goargs{outfile}; my $bdbfile= $goargs{bdbfile}; my $tmppath= $goargs{tmppath}; my $datapath= $goargs{datapath}; my $isold= 0; my $update= 0; my @cvf= @ {$goargs{terms}}; foreach $_ (@cvf) { #my $f if (! -f $_ && -f "$_.gz") { my $t= $_; $t =~ s,^$datapath,$tmppath,; my $td= $t; $td =~ s,/[^/]+$,/,; mkdir($td, 0777); system("gunzip -c $_.gz > $t; touch -r $_.gz $t"); $_= $t; ## replace in @cvf $update= 1; } $isold |= (isOldTarget($_,$bdbfile)) ; #isOldTarget($f,$outfile) || } $goargs{terms}= \@cvf if ($update); my @defs= @ {$goargs{docs}}; foreach $_ (@defs) { #my $f if (! -f $_ && -f "$_.gz") { my $t= $_; $t =~ s,^$datapath,$tmppath,; my $td= $t; $td =~ s,/[^/]+$,/,; mkdir($td, 0777); system("gunzip -c $_.gz > $t; touch -r $_.gz $t"); $_= $t; ## replace in @cvf $update= 1; } } $goargs{docs}= \@defs if ($update); my %geneass= @ {$goargs{genes}}; my ($k,$v); while ( ($k,$_)= each %geneass ) { if (! -f $_ && -f "$_.gz") { my $t= $_; $t =~ s,^$datapath,$tmppath,; my $td= $t; $td =~ s,/[^/]+$,/,; mkdir($td, 0777); system("gunzip -c $_.gz > $t; touch -r $_.gz $t"); $_= $t; $geneass{$k}= $t; $update= 1; } $isold |= (isOldTarget($_,$bdbfile)) ;#isOldTarget($f,$outfile) || } if ($update) { $datapath= $tmppath; ##? $goargs{genes}= \%geneass; } # my @goargs= @ %goargs; ## no good my @goargs; while (($k,$v)= each(%goargs)) { push(@goargs, $k, $v); } return (wantarray) ? ($isold, $datapath, \@goargs) : $isold; } sub isOldTarget( $$) { ## a standard lib ... local($source,$target) = @_; my $res= 0; if (! -f $target) { $res= 1; } else { my $targtime= -M $target; ## -M is file age in days.hrs before now $res= (-M $source) < $targtime; } print STDERR "isOldTarget: $target older than $source? " if $debug; print STDERR (($res) ? "yes\n" : "no\n") if $debug; return $res; } sub checkstats { my %args = @_; # convert into associative array # cwd would be $workpath my @cvf= @ {$args{terms}}; my %geneass= @ {$args{genes}}; my $outacode= $args{outacode}; my $origacode= $args{acode}; my $err= ''; print "GeneOntol::checkstats()\n"; # check that outacode >= origacode ? #? count flds: SYM, in outacode, origacode ? my @newkeys= `egrep '^[A-z]' $outacode |sed 's/\|.*//' |sort |uniq -c`; my @oldkeys= `egrep '^[A-z]' $origacode |sed 's/\|.*//' |sort |uniq -c`; my %newkeys= map{ ($n,$f)= split; ($f,$n); } @newkeys; my %oldkeys= map{ ($n,$f)= split; ($f,$n); } @oldkeys; print "Field Differences(new,old)\nField\tNew\tOld\n"; foreach my $k (sort keys %newkeys) { if ($newkeys{$k} ne $oldkeys{$k}) { print "$k\t$newkeys{$k}\t$oldkeys{$k}\n"; #? is it $err if new < old? } } foreach my $k (sort keys %oldkeys) { print "$k\t0\t$oldkeys{$k}\n" unless($newkeys{$k}); } print "\n"; my %gonew= (); open(F, $outacode); while () { while (/(GO:\d+)/g) { $gonew{$1}++; } } close(F); my %gorig= (); foreach my $f (@cvf) { open(F,$f); while () { while (/(GO:\d+)/g) { $gorig{$1}++; } } close(F); } if (scalar(keys %gonew) < scalar(keys %gorig)) { print "GO IDs missing in $outacode\n"; $err += "Missing IDs ; "; foreach $id (keys %gorig) { unless($gonew{$id}) { print "$id " ; $ni++; print "\n" if (($ni %10) == 0); } } } else { print "All GO IDs are in $outacode\n" if $debug; } print "\n"; # oat% grep 'GO:' {component,function,process}.ontology | sed -e 's/^.*GO:/GO:/' \ # -e 's/ .*$//' |sort | uniq | wc # 5995 5995 65945 # ^^ this is bad, using perl got # Sum of GO: = 7648 # oat% grep 'GO:' $ob/FBgo.acode | sed 's/.*GO:/GO:/' | sort | uniq |wc # 7680 7698 84622 # check that number of terms in cvf == number of terms in outacode # >> hash of GO:nnn for cvf and outacode? # for geneass, count? FBgnxxx in each, compare to count in outacode ? return $err; } sub readData { my %args = @_; # convert into associative array my @cvf= @ {$args{terms}}; my $outfile= $args{outfile}; my $treefile= $args{treefile}; my %geneass= @ {$args{genes}}; unless( openBDB( '', $args{bdbfile}, 'c') ) { warn "Cannot openBDB( $args{bdbfile} )\n"; } if (-r $outfile) { unlink($outfile); ##? unlink symlink if it is such ? unlink("$outfile.idx"); } $acode= new Acodes(); unless( $acode->create($outfile) ) { die "bad outfile: $outfile\n"; } if ($treefile) { $treecode= new Acodes(); unless( $treecode->create($treefile)) { $treecode= undef; warn "bad treefile: $treefile\n"; } } $cvid= 1; $secid= 1; %genelinks= (); foreach my $org (sort keys %geneass) { my $cv= $geneass{$org}; print STDERR "gene assoc. org=$org, file=$cv\n" if $debug; ## patch for $cv.gz -> make tmp/$cv if (open(F,$cv)) { $acode->putComment('organism: '.$org ); $cv =~ s=.+[\/\:]==g; $acode->putComment('source: '.$cv ); readassoc(*F ) ; #unless($debug); close(F); } else { warn "Cannot open '$cv'"; } } foreach my $cv (@cvf) { ## patch for $cv.gz -> make tmp/$cv if (open(F,$cv)) { print STDERR "term file=$cv\n" if $debug; $cv =~ s=.+[\/\:]==g; $acode->putComment('source: '.$cv ); readcv(*F ) ; #unless($debug); close(F); } else { warn "Cannot open '$cv'"; } } $acode->close(); if ($treecode) { dumpTree($treecode, $section, $secid); $treecode->close(); } closeBDB(); return 0; } sub javaParser { my %args = @_; # convert into associative array my @cvf= @ {$args{terms}}; my @docs= @ {$args{docs}}; my %geneass= @ {$args{genes}}; # $ENV{'JAVA_HOME'}= "/usr/java/" unless ($ENV{'JAVA_HOME'}); # $ENV{'CLASSPATH'}= "$ENV{JAVA_HOME}/lib/classes.zip" unless ($ENV{'CLASSPATH'}); # $ENV{'MO_JLIB_PATH'}= $jpath; my $javahome= $args{JAVA_HOME} || $ENV{JAVA_HOME}; my $cp= $args{CLASSPATH} || $ENV{CLASSPATH}; my $jlib= $args{jlib} || $ENV{MO_JLIB_PATH}; my $japp= $args{japp} || 'flybase.geneont.GOMain'; my $jflags= $args{jflags} || '-ms20m -mx150m'; # nov01 - 150m; ran out of mem w/ mx50m, 14may01 on countlinks my $jars= $args{jars} || "/bio/work/meow/new/gostore.zip:${jlib}/gostore.zip:${jlib}/pse.zip:${jlib}/djgl_3_1_0.jar:${jlib}/jgl3.1.0.jar"; $cp = $jars . ":$cp"; ## this makes .odb files - need to chdir to proper directory for that? ## or set param: obpath=$args{obpath} ## assume caller has done this: chdir($args{outpath}); my ($parms, $err); unless($args{golinks} =~ /^no|-1/) { $parms = "kind=golinks update"; foreach my $org (sort keys %geneass) { $parms .= " data=$geneass{$org}"; } foreach my $doc (@docs) { $parms .= " data=$doc"; } $err= callJava( $japp, $cp, $jflags, $parms); # makes tempfiles fbgo-links.odb , fbobs/FBgo.links.acode return $err if ($err); } unless($args{golink2acode} =~ /^no|-1/) { #set parms="debug kind=golink2acode update path=$dpath " $parms = "kind=golink2acode update path=$args{datapath}"; $err= callJava( $japp, $cp, $jflags, $parms); return $err if ($err); } unless($args{cv} =~ /^no|-1/) { #set parms="debug kind=cv update path=$dpath data=component.ontology data=function.ontology data=process.ontology" $parms = "kind=cv update"; foreach my $cv (@cvf) { $parms .= " data=$cv"; } $err= callJava( $japp, $cp, $jflags, $parms); # makes fbgo.odb, fbobs/FBgo.acode return $err if ($err); } #set parms="debug kind=countlinks update outpath=fbobnew " my $tmppath= $args{tmppath}; my $madetmp= 0; unless (-d $tmppath) { print STDERR "mkdir $tmppath\n"; mkdir($tmppath, 0777); $madetmp=1; } unless($args{countlinks} =~ /^no|-1/) { $parms = "kind=countlinks update path=$args{datapath} outpath=$tmppath"; #? needs to be different from $fbobs $err= callJava( $japp, $cp, $jflags, $parms); return $err if ($err); } # need to mv newpath/FBgo.acode* to $fbobs/ if (-e "$tmppath/FBgo.acode") { if ($view||$debug) { print STDERR " system( /bin/mv $tmppath/FBgo.acode* $args{outpath} )\n"; } system("/bin/mv $tmppath/FBgo.acode* $args{outpath}"); rmdir($tmppath) if ($madetmp); } #? remove temporaries: fbgo-links.odb , fbobs/FBgo.links.acode return 0; } sub callJava { my( $app, $classpath, $jflags, $args)= @_; my $result= 0; my $jbin= "$ENV{JAVA_HOME}/bin/java"; my $savecp= $ENV{'CLASSPATH'}; $ENV{'CLASSPATH'}= $classpath; ## set env or pass as -classpath $cp ? my $jdebug= ($debug) ? 'debug' : ''; # $args = "env=SERVER_PATH=$SERVER_PATH " . $args; #?? or dump env to file and env=$envfile if ($view||$debug) { print STDERR "system( $jbin $jflags $app $jdebug $args )\n"; } if (!$view) { $result= system("$jbin $jflags $app $jdebug $args"); } $ENV{'CLASSPATH'}= $savecp; warn "Error with callJava( $app, $args)" if ($result); return $result; } sub acode2bdb { # (@args) -- make go.bdb my %args = @_; # convert into associative array # my @cvf= @ {$args{terms}}; # my $outfile= $args{outfile}; # my $treefile= $args{treefile}; # my %geneass= @ {$args{genes}}; my $acode= $args{acode}; unless( openBDB( '', $args{bdbfile}, 'c') ) { warn "Cannot openBDB( $args{bdbfile} )\n"; return -1; } local(*ACODE); unless (open(ACODE, $acode)) { warn "open $acode"; return -1; } $/= "# EOR\n"; my $data= ''; while ($data= ) { # processAcodeRec( $data ); # SYM|heterotrimeric G-protein GTPase # LNK|GO:0003927 # SEC|F my $goid= ($data =~ m/\nLNK\|(GO:\d+)/ ) ? $1 : undef; my $term= ($data =~ m/\nSYM\|([^\n]+)/ ) ? $1 : undef; unless(defined($goid) && defined($term)) { warn "acode2bdb - error in record: goid=$goid\nterm=$term\nrecord=\n$data\n"; next; } my $gosection= ($data =~ m/\nSEC\|(\w)/ ) ? $1 : 'U'; #!! SEC is bad in apr01 data -- all 'O' ! my @goid2= (); # in LNK| continue my $pat= "\nLNK\|"; my $at= index( $data, $pat, 0); if ($at>=0) { my $e= $at + length($pat); $at= $e; ## skip fldtag| my $x; while (($x= index($data,"\n", $e)) > 0) { if (substr($data,$x+1,1) eq '|') { $e = $x+1; } else { $e= $x; last; } } my $val= substr($data, $at, $e-$at); while ($val =~ m/(GO:\d+)/g) { my $goid2= $1; push(@goid2, $goid2) unless($goid2 eq $goid); } } putGoDb( $goid, \@goid2, $term, $gosection); # GNF|FB:FBgn0004435 # |G&agr;49B # |TAS # GNM|MGI:95785 # |Gnb3 # |ISS my ($kv, $dbid, $sym); my @d = split(/\n/,$data); for (my $i= 0; $i<$#d; $i++) { $kv= $d[$i]; if ($kv =~ m/^GN\w+\|(\S+)/) { $dbid= $1; $i++; $sym= $d[$i]; $sym =~ s/^\|//; putGeneLink( $goid, $dbid, $sym); } } } $/= "\n"; close(ACODE); closeBDB(); return 0; } sub changeToEugenesID { # my( $oldlib, $newlib, $yeastob, $mouseob, $weedob, $fixtairsym, $wormob ) = @_; ## acode libs, Meow dataclass obs my %parms= @_; ## switch to key => val parameter format my $oldlib= $parms{oldlib}; my $newlib= $parms{newlib}; my $yeastob= $parms{yeastob}; my $mouseob= $parms{mouseob}; my $weedob= $parms{weedob}; my $fixtairsym= $parms{fixtairsym}; my $wormob= $parms{wormob}; my ($data, $newdata, $i , $changed); my $oldfh= $oldlib->inlib; my $idtag= 'FBgo'; $/= "# EOR\n"; while ( $data= <$oldfh> ) { my $id=''; if ($data =~ /\nLID\|(\d+)/) { $id= $idtag.$1; } else { warn "No id for $data\n"; next; } # $newdata= changeToEugenesID( \$data); my ($db, $did, $egid, $tag, $changed, $newdat); my @dat= split(/\n/, $data); # foreach (@dat) while ($_= shift @dat) { if (/^(GN\w*\|)(\w+):(\S+)/) { $tag= $1; $db= $2; $did= $3; if ($db eq 'SGD' && $yeastob) { $egid= $yeastob->hasDID($did); $_= $tag.$egid if ($egid); } elsif ($db eq 'MGI' && $mouseob) { $egid= $mouseob->hasDID('MGI:'.$did); $_= $tag.$egid if ($egid); } elsif (($db eq 'WB' || $db eq 'ACEDB') && $wormob) { ## use SYMBOL not $did here - $did is odd WP:CE... I don't have any table for ## dang, wormbase GO is using WB as db name, WP:CE20433 as gene id, not same as eugenes # $egid= $wormob->hasWPID('WP:'.$did); #??? # $egid= $wormob->hasDID('ACEDB:'.$did) unless($egid); # my $sym= $dat[0]; # next line always? # $sym =~ s/^\|//; chomp($sym); #? $egid= $wormob->hasDID('ACEDB:'.$did); $egid= $wormob->hasDID('ACEPRED:'.$did) unless($egid); $_= $tag.$egid if ($egid); } elsif ($db eq 'TAIR' && $weedob) { ## dang, fix TAIR:TIGR_AT3g29420 -> At3g29420 ## double dang, TAIR is getting more useless nov01- GO db id is now non-traceable TAIR:gene:2018308 # GNA|TAIR:gene:2062859 # |gene:2062859 # |AT2G47180 # |IEA my $dbtag= 'TAIR:'; # $weedob->sourcedb; ##'AGI:' in eugenes is better if ($did =~ /gene\:/) { if ($dat[0] =~ /^\|gene:/) { my $x= shift @dat; } my $sym= $dat[0]; ## usable as of nov01 $sym =~ s/\|//; chomp($sym); $sym =~ s/^AT/At/i; $sym =~ s/(\d)G/$1g/i; $dat[0]= '|'.$sym; $did= $sym; $_= $tag . $weedob->sourcedb . $did if ($did); ## fallback - not finding egiud $fixtairsym= 0; } else { $did =~ s/TIGR_AT/At/; } ## obsolete !? $egid= $weedob->hasAGID($dbtag.$did); $egid= $weedob->hasDID($dbtag.$did) unless($egid); $_= $tag.$egid if ($egid); if ($fixtairsym) { $_ .= "\n\|$did"; # fix for missing symbols in go.gene_association.tair # GNA|TAIR:TIGR_AT5g58330 # |IEA } } elsif ($db eq 'LL') { $_= $tag.'HUgn'.$did; } elsif ($db eq 'FB') { $_= $tag.$did; } } $newdat .= "$_\n" if ($_); } $newlib->addRecord( $id, $newdat); ++$i; if ($debug) { if (($i % 10) == 0) { print STDERR '.'; } if (($i % 500) == 0) { print STDERR " $i:$id\n"; } if ($changed && $i>1 && $i<10) { # $debug print STDERR "\nOld $id\n$data\n"; print STDERR "\nNew $id\n$newdat\n"; } } } $/= "\n"; } sub readSome { my %args = @_; # convert into associative array my @cvf= @ {$args{terms}}; my $outfile= $args{outfile}; my $treefile= $args{treefile}; my %geneass= @ {$args{genes}}; my $cvadder= $args{cvadder} || \&addone; %genelinks= (); if (scalar(%geneass)) { foreach my $org (sort keys %geneass) { my $cv= $geneass{$org}; print STDERR "gene assoc. org=$org, file=$cv\n" if $debug; if (open(F,$cv)) { # $acode->putComment('organism: '.$org ); # $cv =~ s=.+[\/\:]==g; $acode->putComment('source: '.$cv ); readassoc(*F ) ; #unless($debug); close(F); } else { warn "Cannot open '$cv'"; } } } if (scalar(@cvf) && $cvadder) { foreach my $cv (@cvf) { if (open(F,$cv)) { print STDERR "term file=$cv\n" if $debug; # $cv =~ s=.+[\/\:]==g; $acode->putComment('source: '.$cv ); readcvTo(*F , $cvadder) ; # unless($debug); close(F); } else { warn "Cannot open '$cv'"; } } } } ## BerkeleyDB parts sub openBDB { # my $self= shift; my($pathto, $afile, $openflag)= @_; return unless($afile); $afile =~ s/(-did|-go)?\.\w*$//; $afile = $pathto . $afile; $didfile= $afile . '-did.bdb'; $gofile = $afile . '-go.bdb'; my $flags= 0; if ($openflag eq 'c') { unlink $gofile; unlink $didfile; $flags= BerkeleyDB::DB_CREATE; } elsif ($openflag eq 'r') { $flags= BerkeleyDB::DB_RDONLY; } # $idb = new BerkeleyDB::Btree( -Filename => $didfile, -Flags => $flags, # -Compare => \&nidcompare) # or die "Cannot open $idfile: [flags=$flags] $!\n" ; print STDERR "new BerkeleyDB::Hash( $didfile, $flags)\n" if $debug; $didb= undef; $didb = new BerkeleyDB::Hash( -Filename => $didfile, -Flags => $flags, -Property => DB_DUP) or warn "Cannot open BerkeleyDB $didfile: [flags=$flags] $!\n" ; print STDERR "new BerkeleyDB::Hash( $gofile, $flags)\n" if $debug; $godb= undef; $godb = new BerkeleyDB::Hash( -Filename => $gofile, -Flags => $flags) or warn "Cannot open BerkeleyDB $gofile: [flags=$flags] $!\n" ; return ($didb && $godb); } sub dbIsOpen { return ($didb && $godb); } sub closeBDB { # my $self= shift; $err = $didb->db_close() if ($didb); undef $didb ; # untie %didh ; $err = $godb->db_close() if ($godb) ; undef $godb ; # untie %didh ; } sub putGeneLink { my($goid,$dbid,$sym)= @_; if ($useGenelinks) { $genelinks{$goid} .= "$dbid\t$sym\n"; } # can be large list - 2+ MB files if ($didb) { $err = $didb->db_put($dbid, $goid); } } sub putGoDb { my ($goid, $rgoid2, $term, $seccode) = @_; #? also go term kind? == C,F,P, if ($godb) { $err = $godb->db_put($goid, "$seccode\t$term"); if ($rgoid2) { foreach my $g2 (@$rgoid2) { $err = $godb->db_put($g2, "$seccode\t$term"); } } } } sub getGoLinks { # my $self= shift; my( $did)= @_; my $goid= undef; $err = $didb->db_get($did, $goid); # is hash w/ dup links if (wantarray) { my @goid; push( @goid, $goid); my $cursor = $didb->db_cursor() ; while ($cursor->c_get($k, $v, BerkeleyDB::DB_NEXT_DUP) == 0) { push( @goid, $v) if ($k eq $did); } $cursor->c_close(); return @goid; } else { return $goid; } } sub getGoTerm { # my $self= shift; my( $goid)= @_; my $term= undef; my $secterm= undef; $err = $godb->db_get($goid, $secterm); ($seccode,$term)= split(/\t/,$secterm); return wantarray ? ($term,$seccode) : $term; } sub getGoTerms { my( $did, $seccode)= @_; my ( $term,$secterm,$sec,$err); my $rec= ''; my @ids= getGoLinks($did); # need separate GO categories foreach my $goid (@ids) { # ($term, $sec)= getGoTerm($goid); $err = $godb->db_get($goid, $secterm); unless ($err) { ($sec,$term)= split(/\t/,$secterm); $rec .= "$term ; $goid\n" unless ($seccode && ($sec ne $seccode)); } } return $rec; } sub getAllGoTerms { my( $did)= @_; my ($seccode,$term,$secterm,$sec,$err); my %rec= (); my @ids= getGoLinks($did); # need separate GO categories foreach my $goid (@ids) { # ($term, $sec)= getGoTerm($goid); $err = $godb->db_get($goid, $secterm); unless ($err) { ($sec,$term)= split(/\t/,$secterm); $rec{$sec} .= "$term ; $goid\n"; } } return %rec; } # sub nidcompare { # my ($nid1, $nid2) = @_ ; return ($nid1 <=> $nid2); ## cmp for string compare # } sub printBDB { # my $self= shift; my($fh)= @_; # openBDB('', $afile, 'r'); my $totdid= 0; my $totgo= 0; my ($k, $v) = ("", "") ; my $cursor = $didb->db_cursor(); $cursor->c_get($k, $v, BerkeleyDB::DB_LAST); print $fh "#DID Last rec: $k -> $v\n" ; $cursor->c_close(); $cursor = $godb->db_cursor(); $cursor->c_get($k, $v, BerkeleyDB::DB_LAST); print $fh "#GO Last rec: $k -> $v\n" ; $cursor->c_close(); print $fh "# GO data\n"; print $fh "# GOID\tTERM\n"; $cursor = $godb->db_cursor(); while ($cursor->c_get( $k, $v, BerkeleyDB::DB_NEXT) == 0) { print $fh "$k\t$v\n"; $totgo++; } $cursor->c_close(); print $fh "# DID data\n"; print $fh "# DID\tGOID\n"; $cursor = $didb->db_cursor(); while ($cursor->c_get( $k, $v, BerkeleyDB::DB_NEXT) == 0) { print $fh "$k\t$v\n"; $totdid++; } $cursor->c_close(); print $fh "# GO total count: $totgo\n\n" ; print $fh "\n# DID total count: $totdid\n" ; return 0; } sub printBDBstatus { # my $self= shift; my $fh= shift; ## don't need -- db_stat works better my $ref; $ref= $godb->db_stat(); if ($ref) { print $fh "\nGO db status\n"; my %stath= %{$ref}; foreach my $k (sort keys %stath) { print $fh "$k =\t $stath{$k}\n"; } print $fh "------------------------\n"; } $ref= $didb->db_stat(); if ($ref) { print $fh "\nDID db status\n"; my %stath= %{$ref}; foreach my $k (sort keys %stath) { print $fh "$k =\t $stath{$k}\n"; } print $fh "------------------------\n"; } } #------- # !version: $Revision: 1.201 $ # !date: $Date: 2000/11/23 09:15:24 $ # FB FBgn0004168 5-HT1A GO:0007198 FB:FBrf0055969 IDA P # MGI MGI:108450 Adcy9 GO:0004016 96278831 TAS F adenylate cyclase 9 # SGD S0004660 AAC1 GO:0006854 SGD:gawaz_1990_aanio IDA P YMR056C # db dbid sym BLANK goid re kind BLANK gokind BLANK ?gene_name? ## ! there are blank fields (2 tabs) in places above sub readassoc { my($cvh)= @_; my ($db,$dbid,$sym,$goid,$ref,$rkind,$gokind,$xtras,$ngo,$bl1,$bl2,$bl3); while (<$cvh>) { chomp(); if (/\s*\!/) { checkComment( $_); } else { next unless(/\S/); # ($db,$dbid,$sym,$bl1,$goid,$ref,$rkind,$bl2,$gokind,$bl3,$xtras)= split(/\t/,$_,11); ($db,$dbid,$sym,$bl1,$goid,@xtras)= split(/\t/); $db = $dbnames{$db} || $db; # rename $db according to eugenes names? $dbid= "$db:$dbid" unless ($dbid =~ /:/); # $genelinks{$goid} .= "$dbid\t$sym\t$gokind\n"; putGeneLink($goid, $dbid, $sym); print STDERR "$ngo) $goid=$dbid\t$sym\n" if ($debug && (++$ngo % 1000) == 1); } } } # sub addlinks { # my ($goid,$mainrec) = @_; # my ($vals,$val); # $vals= $genelinks{$goid}; # return unless ($vals); # my @vals= split(/\n/,$vals); # foreach $val (@vals) { # # my ($dbid,$sym,$gokind)= split(/\t/); # $val =~ tr/\t/\n/; # $acode->addField( $mainrec, 'GN', $val); # } # } sub readcv { my($cvh )= @_; # require "Acodes.pl"; ## or Acodes.pm my @parts; while (<$cvh>) { chomp(); if (/\s*\!/) { checkComment( $_); } else { next if ($skipSection); next unless(/\S/); if (s/^(\s+)//) { $lev= length($1); } else { $lev= 0; } # $lev= 0 unless($lev); @parts = nextPart($_, undef, 1); addone( $lev, $cvid++, \@parts ) if (@parts); undef @parts; } } print STDERR "total records processed: $total\n\n" ; } sub readcvTo { my($cvh, $adder )= @_; # require "Acodes.pl"; ## or Acodes.pm my @parts; while (<$cvh>) { chomp(); if (/\s*\!/) { checkComment( $_); } else { next if ($skipSection); next unless(/\S/); if (s/^(\s+)//) { $lev= length($1); } else { $lev= 0; } # $lev= 0 unless($lev); @parts = nextPart($_, undef, 1); &{$adder}($lev, $cvid++, \@parts ) if (@parts); # addone( $lev, $cvid++, \@parts ) if (@parts); undef @parts; } } print STDERR "total records processed: $total\n\n" ; } sub nextPart { my($s, $refparts, $first)= @_; my @parts; my ($c, $val); if ($refparts) { @parts= @$refparts; } else { @parts= (); } # if (1) { if ($first) { $s =~ s/^\s*([\<\%\~\$]?)//; $c= $1; } ## no $c match for some sections top level else { $s =~ s/^\s+([\<\%\~\$\!\;])//; $c= $1; } if ($s =~ m/\s+[\<\%\~\$\!\;]\s/) { $val= $`; $s= $& . $'; } else { $val= $s; $s= ''; } $val =~ s/^\s*(.+)\s*$/$1/; # } else { # $s =~ s/^\s*([\<\%\~\$\!\;])?\s*([^\<\%\~\$\!\;]+)//; # $c= $1; $val= $2; # if ($s =~ /^;/ && $val =~ /\&\w+$/) { # $val .= ';'; $s =~ s/^;//; ## dang greeks '&blob;' # $s =~ s/([^\<\%\~\$\!\;]+)//; # $val .= $1; # } # $val =~ s/\s*$//; # } return @parts unless($c || $val); my $kind= 0; if ($c eq '<') { $kind= $kComponent; } elsif ($c eq '%') { $kind= $kInstance; } elsif ($c eq '~') { $kind= $kDerived; } elsif ($c eq '$') { $kind= $kToplevel; } elsif ($c eq '!') { $kind= $kComment; ## print STDERR "\n>>> Comment misplaced? :$s\n" if ($npart == 0); } elsif ($c eq ';') { if ($val =~ /GO:(\S+)/ ) { $kind= $kGoID; } # $val= "GO$1"; # elsif ($val =~ /FBrf:(\S+)/) { $kind= $kReference; $val= "FBrf$1"; } elsif ($val =~ /^(EC|TC):/) { $kind= $kReference; } elsif ($val =~ /^(EC|TC).(.+)/) { $kind= $kReference; $val= "$1:$2"; } # malformed data elsif ($val =~ /synonym:\s*(.+)/) { $kind= $kSynonym; $val= $1; } elsif ($val =~ /abbrev:\s*(.+)/) { $kind= $kAbbrev; $val= $1; } ##? elsif ($val =~ /maybe:\s*(.+)/) { $kind= $kPossibleSynonym; $val= $1; } elsif ($val =~ /comment:\s*(.+)/) { $kind= $kComment; $val= $1; $kind= $kTermQuestionable if ($val eq '?'); #?? is comment always to end-of-line ? - can have any charset $val .= $s; $s= ''; } else { $kind= $kUnknownData; } } if ($val) { my @part= ( $kind, $val ); push(@parts, \@part); } if ($s =~ /\S/) { @parts= nextPart($s, \@parts, 0); } return @parts; } sub addone { my($lev, $cvid, $refparts)= @_; my @parts= @{$refparts}; my $rp= shift @parts; ## pull primary term my @part= @{$rp}; my $pkind= $part[0]; my $term= $part[1]; if ($lev == 1) { if ($treecode && $section) { dumpTree($treecode, $section, $secid); } $section= $term; $secid++; $seccode= $gosections{$section} || $section; } ## $fterm= new CVterm($term, $kind, $lev); $mainrec= $acode->newRecord($GOkey); $acode->addField( $mainrec, 'SYM', $term); my $needid= 1; my $goid= undef; my @goid= undef; # for 2ndary ids #? use GOid instead of LID ? ## GOid should be $parts[1] $acode->addField( $mainrec, 'LID', $cvid); # $acode->addField( $mainrec, 'ID', $goid); # $acode->setId($cvid, $mainrec); ## $acode->addField( $mainrec, 'ID', sprintf("FBcv%07d",$cvid)); $acode->addField( $mainrec, 'LEV', $lev); # $acode->addField( $mainrec, 'SEC', $section); # $acode->addField( $mainrec, 'SID', $secid); ##? print STDERR " $cvid=$term\n" if ($debug && ($cvid % 100) == 1); ## $parent= $cvsec->getTreeWalker()->get(); ## $fterm->addParent($parent, $pkind); $syns{$term} .= "$cvid,"; $idterm{$cvid} = $term; if ($lev<1) { $pars{$cvid} = ""; @lastid= (); $parid= ''; $rootid= $cvid; } ## elsif ($lev < $lastlev) { $parid= $lastid[$lev-1]; $pars{$cvid} .= "$parid,"; } else { $parid= $lastid[$lev-1]; $pars{$cvid} .= "$parid," if ($parid); } # if ($lastlev<$lev) { $parid= $lastid; } # elsif ($lastlev>$lev) { $parid= $lastid[$lev]; } # elsif ($lastlev == $lev) { $parid= $parid; } ## $parid= $cvid; ##! bad if $lastlev > $lev if ($parid) { my $parterm= $idterm{$parid}; $acode->addField( $mainrec, 'PAR', "$parterm ; $pkind") if ($parterm); } $lastid[$lev]= $cvid; $lastlev= $lev; my ($rpart, $kind, $val, $sid); foreach $rpart (@parts) { @part= @{$rpart}; $kind= $part[0]; $val= $part[1]; if ($kind == $kInstance || $kind == $kComponent) { ## $fterm->addParent($val, $kind); $acode->addField( $mainrec, 'PAR', "$val ; $kind"); $sid= $syns{$val}; if ($sid) { $sid =~ s/,.*//; #### my $pid= $pars{$ids}; ## ?? need to do this after all terms collected ! $pars{$cvid} .= "$sid," if ($sid); } ## addParentOfTerm( $term, $val, 1); } elsif ($kind == $kGoID) { ## $fterm->addLink($val); if ($needid) { # $goid= $val; #! val can have two IDs == GO:0003695, GO:0016017 if ($val =~ /, /) { @goid= split(/, /,$val); $goid= shift @goid; } else { $goid= $val; @goid= (); } $acode->addField( $mainrec, 'ID', $goid); $acode->setId($goid, $mainrec); $needid= 0; if (scalar(@goid)>0) { foreach my $g2 (@goid) { $acode->addField( $mainrec, 'ID2', $g2); } } } } elsif ($kind == $kDerived) { ## $fterm->addParent($val, $kind); $acode->addField( $mainrec, 'PAR', "$val ; $kind"); } elsif ($kind == $kSynonym || $kind == $kPossibleSynonym) { ## $fterm->addSynonym($val); $acode->addField( $mainrec, 'SYN', "$val"); ## $cvdoc->addSynonym($val); $syns{$val} .= "$cvid,"; ## keep syns only per section !? } elsif ($kind == $kTermQuestionable) { ## $fterm->setQuestionable(1); $acode->addField( $mainrec, 'QUEST', $val); } elsif ($kind == $kReference) { ## $fterm->addLink($val); $acode->addField( $mainrec, 'LNK', $val); } elsif ($kind == $kComment) { ## $fterm->addComment($val); $acode->addField( $mainrec, 'CMT', $val); } else { print STDERR "Unknown data: $kind == $val\n"; } } undef @parts; $acode->packFields($mainrec); putGoDb($goid, \@goid, $term, $seccode); # # addlinks($goid,$mainrec); ## dont pack these fields $val= $genelinks{$goid}; if ($val) { $ngo++; print STDERR "link $goid => $val\n" if ($debug && ($ngo % 500) == 1); @parts= split(/\n/,$val); foreach $val (@parts) { $val =~ tr/\t/\n/; # my ($dbid,$sym,$gokind)= split(/\t/); $acode->addField( $mainrec, 'GN', $val, $kDontPack); } undef @parts; } ## write $fterm data to acode lib ## and tree info to ??? $acode->putRec( $mainrec); $total++; } sub checkComment { local( $_)= @_; if (/^\!version: \$Revision: ([\d\.]+)/) { $version= $1; $acode->putComment('version: '.$version) if ($acode); } elsif (/^\!date: \$Date: ([\d\/\: ]+)/) { $date= $1; $acode->putComment('date: '.$date) if ($acode); } return ''; } sub dumpTree { my($treecode, $secname, $secid)= @_; my ($fld, $first, $kid, $par, $k,$v); # return if ($notree); ##? create %kids list from %pars ?? ## write line as lid=p:pid,pid,pid c:kid,kid,kid ... ? my $secrec= undef; if (scalar(%pars) || scalar(%syns)) { $secrec= $treecode->newRecord('VSECR'); $treecode->addField( $secrec, 'SEC', $section); #? SYM $treecode->setId($secid,$secrec); $treecode->addField( $secrec, 'ID', sprintf("FBvs%07d",$secid)); $treecode->addField( $secrec, 'ROOT', $rootid); ## sprintf("FBcv%07d",$rootid) } if (scalar(%pars)) { my %kids= (); my @p; while (($k,$v) = each(%pars)) { @p= split(/,/,$v); foreach (@p) { $kids{$_} .= "$k," if (/\d/); } } my @rec= @{$secrec->{'vec'}}; $first= 1; while (($k,$par) = each(%pars)) { ## $treecode->addField( $secrec, 'PTE', "$k=$v"); if ($first) { $fld= "PTE|$k="; $first= 0; } else { $fld= "|$k="; } $par =~ s/,$//; $fld .= "p:$par " if ($par); $kid= $kids{$k}; $kid =~ s/,$//; $fld .= "c:$kid" if ($kid); push( @rec, $fld); } $secrec->{'vec'}= \@rec; } if (scalar(%syns)) { my @rec= @{$secrec->{'vec'}}; $first= 1; while (($k,$v) = each(%syns)) { ## $treecode->addField( $secrec, 'SYN', "$k=$v"); $v =~ s/,$//; if ($first) { $fld= "SYN|$k=$v"; $first= 0; } else { $fld= "|$k=$v"; } push( @rec, $fld); } $secrec->{'vec'}= \@rec; } $treecode->putRec( $secrec) if ($secrec); %pars= (); %syns= (); %idterm= (); $parid= ''; } 1; __END__ association format # 14 fields: # DB,DB_Object_ID,DB_Object_Symbol,[NOT],GOid,DB:Reference(|DB:Reference),Evidence, # With,Aspect,DB_Object_Name(|Name),DB_Object_Synonym(|Synonym), # DB_Object_Type,taxon(|taxon),Date #example # SGD,S0000296,PHO3,,GO:0015888,SGD:8789|PMID:2676709,IMP,,P,,YBR092C,gene,taxon:4932,20001122 $datapath= "MacHome:bio:flybase:fbjava:goreport:data:"; # $datapath= "/c6/tmp/geneont/data/"; $outpath= $datapath; ==> component.ontology <== !version: $Revision: 1.106 $ !date: $Date: 2000/11/17 23:51:54 $ !editors: Michael Ashburner (FlyBase), Midori Harris (SGD), Judith Blake (MGD) $Gene_Ontology ; GO:0003673 $cellular_component ; GO:0005575 %membrane ; GO:0016020 function.ontology <== !version: $Revision: 1.190 $ !date: $Date: 2000/11/17 00:24:28 $ !editors: Michael Ashburner (FlyBase), Midori Harris (SGD), Judith Blake (MGD) $Gene_Ontology ; GO:0003673 $molecular_function ; GO:0003674 %nucleic acid binding ; GO:0003676 %DNA binding ; GO:0003677 %DNA helicase ; GO:0003678, GO:0003679 % helicase ; GO:0004386 %AT DNA binding ; GO:0003680 %bent DNA binding ; GO:0003681 ==> process.ontology <== !version: $Revision: 1.158 $ !date: $Date: 2000/11/23 11:13:56 $ !editors: Michael Ashburner (FlyBase), Midori Harris (SGD), Judith Blake (MGD) $Gene_Ontology ; GO:0003673 $biological_process ; GO:0008150 %cell growth and maintenance ; GO:0008151 %metabolism ; GO:0008152 %catabolism ; GO:0009056 %macromolecule catabolism ; GO:0009057 %biosynthesis ; GO:0009058 ; synonym:anabolism 724 4487 39665 component.ontology 3389 25352 238868 function.ontology 28862 202030 1674164 gene_association.fb 14766 181916 1399609 gene_association.mgi 3074 22115 196970 gene_association.pombase 9545 75355 668173 gene_association.sgd 2673 17576 162004 process.ontology ==> gene_association.fb !version: $Revision: 1.23 $ !date: $Date: 2000/10/05 13:50:22 $ FB FBgn0004168 5-HT1A GO:0007198 FB:FBrf0055969 IDA P FB FBgn0004168 5-HT1A GO:0007208 FB:FBrf0055969 IDA P FB FBgn0004168 5-HT1A GO:0007165 FB:FBrf0126705 ISS P FB FBgn0004168 5-HT1A GO:0004930 FB:FBrf0126705 ISS F ==> gene_association.mgd !version: $Revision: 1.10 $ !date: $Date: 2000/11/17 15:23:03 $ ! ! from Mouse Genome Database (MGD) & Gene Expression Database (GXD) ! MGI MGI:108450 Adcy9 GO:0004016 96278831 TAS F adenylate cyclase 9 MGI MGI:108450 Adcy9 GO:0005886 96278831 TAS C adenylate cyclase 9 MGI MGI:108450 Adcy9 GO:0006171 96278831 TAS P adenylate cyclase 9 db dbid sym goid ref kind gokind ?gene_name? ==> gene_association.sgd !version: $Revision: 1.201 $ !date: $Date: 2000/11/23 09:15:24 $ SGD S0004660 AAC1 GO:0006854 SGD:gawaz_1990_aanio IDA P YMR056C SGD S0004660 AAC1 GO:0005349 SGD:gawaz_1990_aanio IDA F YMR056C SGD S0004660 AAC1 GO:0005743 SGD:gawaz_1990_aanio TAS C YMR056C SGD S0000289 AAC3 GO:0006854 SGD:drgon_1991_aaixw IMP P YBR085W|ANC3 ... ==== 14may01 .............new BerkeleyDB::Hash( /bio/work/meow/tmp/work//fbobs/go-did.bdb, 1) new BerkeleyDB::Hash( /bio/work/meow/tmp/work//fbobs/go-go.bdb, 1) finished GeneOntol::checkstats() Field Differences(new,old) Field New Old DE 438 408 GNA 4196 GNM 15382 15204 GNY 16768 16763 GOR 7635 7562 LEV 7635 7562 LID 7635 7562 LNK 7635 7562 NGNA 7635 NGNF 7635 7562 NGNM 7635 7562 NGNY 7635 7562 PAR 7634 7561 RF 77 74 SEC 7635 7562 SYM 7635 7562 SYN 555 551 All GO IDs are in /bio/work/meow/tmp/work//fbobs/FBgo.acode ## wormbase go table db dbid sym goid ref kind gokind ?gene_name? ! Gene assignments for Caenorhabditis elegans, Wormbase. ! IEA annotations were generated by www.sanger.ac.uk. ! WB_Date: Sun Jul 1 00:59:25 PDT 2001 ! !version: $Revision: 1.2 $ !date: $Date: 2001/07/01 17:07:57 $ ! WB WP:CE20433 2L52.1 GO:0003700 PUBMED:11159333 IEA F 2L52.1|2L52.1 WB WP:CE20433 2L52.1 GO:0005634 PUBMED:11159333 IEA C 2L52.1|2L52.1 ## jul01 - need fix for odd wormbase database:WP:id thing, see others ==> gene_association.compugen.Genbank <== !version: $Revision: 1.1 $ !date: $Date: 2001/04/26 14:59:58 $ CGEN PrID397959 AAHCHITA GO:0006030 CGEN:ProdVersion 0.3.1 IEA P CGEN PrID397959 AAHCHITA GO:0005615 CGEN:ProdVersion 0.3.1 IEA C CGEN PrID397959 AAHCHITA GO:0004568 CGEN:ProdVersion 0.3.1 IEA F ==> gene_association.compugen.Swissprot <== !version: $Revision: 1.1 $ !date: $Date: 2001/04/26 15:01:16 $ CGEN PrID517972 O00058 GO:0008152 CGEN:ProdVersion0.3.1 IEA P CGEN PrID517972 O00058 GO:0016491 CGEN:ProdVersion0.3.1 IEA F CGEN PrID152905 O00060 GO:0006955 CGEN:ProdVersion0.3.1 IEA P ==> gene_association.fb <== !version: $Revision: 1.28 $ !date: $Date: 2001/06/01 15:20:26 $ FB FBgn0015567 &agr;-Adaptin GO:0005886 FB:FBrf0093110 IDA C FB FBgn0015567 &agr;-Adaptin GO:0007269 FB:FBrf0108281 NAS P !version: $Revision: 1.33 $ !date: $Date: 2001/06/26 12:44:22 $ ! ! from Mouse Genome Database (MGD) & Gene Expression Database (GXD) ! MGI MGI:108450 Adcy9 GO:0004016 MGI:80863 TAS F adenylate cyclase 9 MGI MGI:108450 Adcy9 GO:0005886 MGI:80863 TAS C adenylate cyclase 9 ==> gene_association.pombase <== !version: $Revision: 1.6 $ !data: $Date: 2001/05/01 00:15:22 $ ! ! from Valerie Wood (val@sanger.ac.uk) ! Pombase SPAC3A12.18 ZWF1 GO:0005975 SWP:O00091 ISS P ZWF1 SPAC9.01 Pombase SPAC3A12.18 ZWF1 GO:0008152 SWP:O00091 ISS P ZWF1 SPAC9.01 ==> gene_association.sgd <== !version: $Revision: 1.302 $ !date: $Date: 2001/06/12 08:15:26 $ SGD S0004660 AAC1 GO:0005471 SGD:12031 IDA F YMR056C SGD S0004660 AAC1 GO:0005743 SGD:12031 TAS C YMR056C SGD S0004660 AAC1 GO:0006854 SGD:12031 IDA P YMR056C ==> gene_association.tair <== !version: $Revision: 1.3 $ !date: $Date: 2001/04/29 17:36:03 $ ! ! from The Arabidopsis Information Resource (TA(R) ! www.arabidopsis.org ! TAIR TIGR_AT1g01050 GO:0016462 IEA F AT1g01050 TAIR TIGR_AT1g01080 GO:0003733 IEA F AT1g01080 ==> gene_association.wb <== ! Gene assignments for Caenorhabditis elegans, Wormbase. ! IEA annotations were generated by www.sanger.ac.uk. ! Processed for GO by Erich Schwarz . ! Processed for GO on root@mokelumne.caltech.edu account. ! Machine specs: Linux mokelumne 2.4.1-0.1.9 #1 Wed Feb 14 22:15:15 EST 2001 i68 6 unknown ! WB_Date: Sun Jul 1 00:59:25 PDT 2001 ! ! CVS Repository Version Information: !version: $Revision: 1.2 $ !date: $Date: 2001/07/01 17:07:57 $ ! WB WP:CE20433 2L52.1 GO:0003700 PUBMED:11159333 IEA F 2L52.1|2L52.1 WB WP:CE20433 2L52.1 GO:0005634 PUBMED:11159333 IEA C 2L52.1|2L52.1 === may02 -- java ODI persistance failure >> due to java memory limit too low (mx90 -> mx200 ok) Aats-met NAS.FB:FBgn0025966 anon-EST:Liang-2.15 ISS.FB:FBgn0034401 CG15100 ISS.FB:FBg... committing ldb store...171000COM.odi.FatalInternalException: An exception occur red during a commit. Stack trace at point of original failure: ***************************************** COM.odi.ObjectNotPersistentException: A persistent object was required in this c ontext, but the object: "LL:17477 Mpg IEA^LLL:24561 Mpg IEA^LMGI:97073 Mpg IEA" was not persistent. cricket cmdline for go app set ej=$eg/.etc/jlib set rungo=($eg/.totop/java-local/bin/java \ -ms10M -mx90M \ -classpath $ej/flycvodi2.zip:$ej/gostore2.zip:$ej/gnu-regexp-1.1.3.jar:$ej/pse.zip:$ej/djgl_3_1_0.jar:$ej/jgl3.1.0.jar:$eg/.totop/bin-local/java/lib/classes.zip\ flybase.geneont.GOMain)