#!/usr/local/bin/perl # Meow.pm # make euGenes/Meow server data library parts from various sources # from flybase/fbdatagen.pl, nov'99, dgg # package main; ## tests # $Meow::debug= 1; # Meow::initEnv(); # Meow::usage(0); # Meow::perldoc(); # Meow::datagen(); # Meow::editDocs(); =head1 NAME Meow - manage euGenes Eukaryotic Genome information system data =head1 DESCRIPTION Manages biology databanks, including transport from Internet source and databank updates for euGenes. Originally euGenes was called MEOW, a prototype service developed for the model eukaryote organism workshop group. =head1 AUTHOR D.G. Gilbert, Nov. 1999, software@bio.indiana.edu =head1 SOURCE http://iubio.bio.indiana.edu/eugenes/ =head1 TODO Add version/date collected comments to data files. Archive old data before replacing w/ new. Checksum each record and add date changed for searches? (most don't have date updated) Start history file for each data, including fixed MEOW id? (now id is transient for several orgs) rewrite flybase data extraction in perl? Links: improve srs parsing of links to other dbs add seq accessions for linking to seq dbs? add reciprocal homolog info - if A is homolog of B, B should have homolog to A link. =head1 METHODS =cut package Meow; use Meow::Data; use Meow::Doc; ##? or do some auto-loading of these case we add more? use Meow::Fish; use Meow::Fly; use Meow::Man; use Meow::Mouse; use Meow::Mosquito; use Meow::Weed; use Meow::Worm; use Meow::Yeast; use GeneOntol; #? use BioMirror::SRS; use POSIX; use flybase::Datalib; BEGIN { $VERSION = "1.0"; @perlmods= qw( Meow.pm Data.pm Fish.pm Fly.pm Man.pm Mouse.pm Weed.pm Worm.pm Yeast.pm Acodes.pm ); # $debug= 0; $debug= $main::debug if ($main::debug); $dorun= 0; $view= 0; ## show commands w/o doing them (0 by default) $force= 0; ## force make, otherwise check file dates $dotime= 0; ##? show runtime $dohelp = 0; $dodoc= 0; $doliveupdate= 0; # mainly this flags movedata to do it not print it ## hash or array (ordered) ? %cmdset = ( datalib => 'make data libs specified by name or all', makeids => 'make id db for current data', printids => 'list id db for current data', headlines => 'make headline files', countkeys => 'count data lib keys', fieldhelp => 'make field help docs', editdocs => 'edit & update html pages', eugenes => 'work on EUGENES_SERVER instead of MEOW_SERVER', # make default meows => 'work on MEOW_SERVER instead of EUGENES_SERVER', # drop /meow/ path set makexml => 'make XML dump of data libs', srsupdate => 'do SRS check/update', movedata => 'move new data to library location', updateftp => 'copy/update data to FTP archive location', goread => 'read gene ontology data', fixgotair => 'one time fix go tair botch', ); # perldoc => 'print perl module docs', unshift(@INC, $ENV{BIOMIRROR_HOME}) if ($ENV{BIOMIRROR_HOME}); ## some extra tools %fileDisposition= (); ## can we assume path to this module is in @INC ? # $appdir= $ENV{'MO_APP_PATH'}; # if (! $appdir) { $appdir= "$ENV{MEOW_SERVER}/tools/perls/"; } # unshift(@INC, $appdir) if ( -d $appdir); eval { require "Meow/local-env.pm"; }; warn $@ if ($@); } sub Version { $VERSION; } ## === get Meow::Data classes sub getDataclass( $) { my $classname= shift; my %hash= datahash(); my $classob= $hash{$classname}; if (!defined $classob) { ## try caseless foreach (keys %hash) { if (/$classname/i) { return $hash{$_}; } } } return $classob; } sub datakeys() { my %hash= datahash(); return (sort keys %hash); } sub datalist() { if (!defined @datalist) { @datalist = Meow::Data::elements(); } return @datalist; } sub datahash() { if (!defined %datahash) { my @list= datalist(); foreach $obj (@list) { my $classname= ref($obj); $datahash{$classname}= $obj; } } return %datahash; } sub getDataFromTag( $) { my $name = uc(shift); my @list= datalist(); foreach $obj (@list) { my $tag= uc($obj->tag()); return $obj if ($tag eq $name); } return undef; } #---- sub doclist() { if (!defined @doclist) { @doclist = Meow::Doc::elements(); } return @doclist; } #---- =head1 Meow::datagen() Generate and update Meow data from sources. =cut sub datagen { %dbclasses= (); %cmds= (); $tstart= $tat= (times)[0]; usage($dodoc) if ($#main::ARGV < 0); readArgs(); %dbclasses= datahash() unless scalar(%dbclasses); initEnv(); usage($dodoc) if ($dohelp || $dodoc ); if ( $cmds{perldoc} ) { perldoc(); } if ($dorun) { $cmds{datalib}= 1; $cmds{headlines}= 1; $cmds{countkeys}= 1; $cmds{editdocs}= 1; # $cmds{fieldhelp}= 1; ## not ready # $cmds{makexml}= 1; # makeids $cmds{srsupdate}= 1; $cmds{movedata}= 1; # $cmds{updateftp}= 1; } libToWorkLink(0); ## remove any symlinks if ( $cmds{goread} ) { makeGeneOntoLib(); } if ( $cmds{fixgotair} ) { fixGOTAIR(); } if ( $cmds{srsupdate} ) { $dosrsup= 1; } if ( $cmds{datalib} ) { makeDataLib(\%dbclasses); $cmds{movedata}= 1; } if ( $cmds{makeids} ) { makeIdDb(\%dbclasses); $cmds{movedata}= 1; } if ( $cmds{printids} ) { printIdDb(\%dbclasses); } if ( $cmds{headlines} ) { makeDataHeadlines(\%dbclasses); $cmds{movedata}= 1; } if ( $cmds{countkeys} ) { countDataFieldKeys(\%dbclasses); } if ( $cmds{fieldhelp} ) { makeFieldhelp(\%dbclasses); $cmds{movedata}= 1; } if ( $cmds{editdocs} ) { editDocs(); $cmds{movedata}= 1; } if ( $cmds{makexml} ) { makeXml(\%dbclasses); $cmds{movedata}= 1; } libToWorkLink(0); ## remove any symlinks if ( $cmds{movedata} ) { moveNewData(); } ##? add call to meow/srs/etc/srscheck ? if ( $cmds{updateftp} ) { updateFtpData(); } &timeit($tstart); print STDERR "finished\n"; } ## ## subs ## sub readArgs { startSub("readArgs"); print STDERR "ARGV= ( @ARGV )\n"; #? %cmds= (); require Getopt::Long; ## Getopt::Long::config( qw( autoabbrev ignorecase permute ) ); $Getopt::Long::autoabbrev= 1; $Getopt::Long::ignorecase= 1; $Getopt::Long::permute= $Getopt::Long::PERMUTE; @optlist= ( 'env=s@' => \@argenv, 'debug!' => \$debug, 'force!' => \$force, 'help!' => \$dohelp, 'doc!' => \$dodoc, 'view!' => \$view, 'run!' => \$dorun, 'liveupdate!' => \$doliveupdate, 'time!' => \$dotime, ); foreach (keys %cmdset) { push( @optlist, ($_, \$cmds{$_}) ); } $optokay= Getopt::Long::GetOptions( @optlist); usage($dodoc) if ($dohelp || $dodoc || !$optokay); foreach (@argenv) { my ($ekey,$eval)= split(/=/,$_,2); $ENV{$ekey}= $eval; } my @dbs= @main::ARGV; ## whatever remains is databanks? while (@dbs) { $_= lc( shift(@dbs) ); if ($_ eq 'all') { %dbclasses= datahash(); last; } else { my $obj= getDataclass($_); if ($obj) { $dbclasses{$_}= $obj; } } } } sub usage { local($dodetails) = @_; $cmdhelp=''; foreach (sort keys %cmdset) { $cmdhelp .= "\t -$_ \t== $cmdset{$_}\n"; } my @dbs= datakeys(); for my $i (0..$#dbs) { $dbs[$i] =~ s/^\w+:://; } print <sourcedata(); print join(" :\t", $klass, $obj->name(), $obj->makeflags(), $obj->targetdata(), $obj->sourcedb(), join(', ', @$data) ) ."\n"; } print < $eval\n" if ($debug); } return $eval; } sub initEnv { startSub("initEnv"); $ENV{'JAVA_HOME'}= "/usr/java/" unless ($ENV{'JAVA_HOME'}); $ENV{'CLASSPATH'}= "$ENV{JAVA_HOME}/lib/classes.zip" unless ($ENV{'CLASSPATH'}); # if ( $cmds{eugenes} && $ENV{'EUGENES_SERVER'}) { # $ENV{'MEOW_SERVER'}= $ENV{'EUGENES_SERVER'} # } # make eugenes default, add meows cmd ? or drop meow if ( $cmds{meows} || !$ENV{'EUGENES_SERVER'}) { $ENV{'EUGENES_SERVER'}= $ENV{'MEOW_SERVER'}; # = $ENV{'EUGENES_SERVER'}; } else { $ENV{'MEOW_SERVER'}= $ENV{'EUGENES_SERVER'}; } if ($ENV{'MEOW_SERVER'}) { $ENV{'SERVER_PATH'}= $SERVER_PATH= $ENV{'MEOW_SERVER'}; } else { $ENV{'SERVER_PATH'}= "/bio/meow-pub/meow/server/" unless ($ENV{'SERVER_PATH'}); $SERVER_PATH= $ENV{'SERVER_PATH'}; } $ENV{'EUGENES_FTP'}= 'ftp://iubio.bio.indiana.edu/eugenes/' unless($ENV{'EUGENES_FTP'}); $ENV{'FTP_SITE'}= $FTP_SITE= $ENV{'EUGENES_FTP'}; # if ($SERVER_PATH =~ /eugenes/) { $MAIN_TITLE= 'euGenes'; } # else { $MAIN_TITLE= 'MEOW Genes'; } $MAIN_TITLE= 'euGenes'; $fbobs= "$SERVER_PATH/.etc/jdata/fbobs/"; # $fbdata= "$SERVER_PATH/.data/"; $headlinedir= "$SERVER_PATH/.etc/jdata/headlines/"; ##? $fbreslist::headlinedir $SRSRoot= "$SERVER_PATH/.srs"; $workpath= $ENV{'MO_WORK_PATH'}; if (! $workpath) { $workpath= "$SERVER_PATH/../data-work/"; } if (! -d $workpath) { print STDERR "mkdir $workpath\n"; mkdir($workpath, 0777) || warn "mkdir $workpath: $!"; } $ENV{'MO_WORK_PATH'}= $workpath; if (! -d "$workpath/fbobs") { mkdir( "$workpath/fbobs", 0777); } $archivepath= $ENV{'MO_ARCHIVE_PATH'}; if (! $archivepath) { $archivepath= "$SERVER_PATH/../data-archive/"; } if (! -d $archivepath) { print STDERR "mkdir $archivepath\n"; mkdir($archivepath, 0777) || warn "mkdir $archivepath: $!"; } $ENV{'MO_ARCHIVE_PATH'}= $archivepath; $updatepath= $ENV{'MO_UPDATE_PATH'}; if (! $updatepath) { $updatepath= "$SERVER_PATH/../data-update/"; } if (! -d $updatepath) { warn "no $updatepath "; } $ENV{'MO_UPDATE_PATH'}= $updatepath; $gnomap= $ENV{'GNOMAP_DATA'} || "$workpath/genomes/"; $godata= $ENV{'GO_DATA'} || "$workpath/geneont/"; $ENV{'GO_DATA'}= $godata; $jpath= $ENV{'MO_JLIB_PATH'}; if (! -d $jpath) { $jpath= "$SERVER_PATH/.etc/jlib"; } if (! -d $jpath) { die "Can't locate java library, MO_JLIB_PATH "; } $ENV{'MO_JLIB_PATH'}= $jpath; $jbin= 'java'; ## jre ? $sysclasses = $ENV{'CLASSPATH'}; $jars= "$jpath/jgl3.1.0.jar"; $jflags= ''; $appdir= $ENV{'MO_APP_PATH'}; if (! $appdir) { $appdir= "$SERVER_PATH/tools/perls/"; } if (! -d $appdir) { warn "no app dir at $appdir "; $appdir= $ENV{'PWD'}; ## this can be bogus, peg to $SERVER_PATH ? } unshift(@INC,$appdir); ## before chdir, need to $appdir in @INC for requires chdir($workpath) || die "chdir $workpath: $!"; ## may not need, but do for safety ## &dumpEnv() if ($dorun && $debug); } sub callJava { local( $app, $classpath, $jflags, $args)= @_; $result= 0; $classpath .= ":$sysclasses"; ## or use jre ? $ENV{'CLASSPATH'}= $classpath; ## set env or pass as -classpath $cp ? $args = "env=SERVER_PATH=$SERVER_PATH " . $args; #?? or dump env to file and env=$envfile #? need to dump ENV to file and pass as param !? - for some calls, not all? ##&dumpEnv() if ($debug); my $jdebug= ($debug) ? 'debug' : ''; if ($view||$debug) { print STDERR "system( $jbin $jflags $app $jdebug $args )\n"; } if (!$view) { $result= system("$jbin $jflags $app $jdebug $args"); } return $result; } sub callSystem { local( $cmd)= @_; $result= 0; if ($view||$debug) { print STDERR "system( $cmd )\n"; } if (!$view) { $result= system("$cmd"); } return $result; } sub libToWorkLink { # startSub("libToWorkLink"); local($makelink, $taghash, $sufhash)= @_; $result= 0; ## for after datalib/$fbobs is updated, to update others chdir($workpath); ## ?? my $outpath= "$workpath/fbobs"; ##? want in $fbobs not $workpath ? my $sufs; if ($sufhash) { my %sh= %{$sufhash}; $sufs .= 'acode|' if ($sh{acode} > 0); $sufs .= 'count|' if ($sh{count} > 0); $sufs .= 'bdb|' if ($sh{bdb} > 0); $sufs =~ s/\|$//; $sufs = "\.($sufs)"; } else { $sufs= '\.(acode|count|bdb)'; } return $result if (!opendir( DIR, $fbobs)); my @ffiles= grep( /$sufs/, readdir(DIR)); ## ?? index| closedir(DIR); foreach $ff (@ffiles) { my ($tag,$suf)= split(/\./, $ff); next if ($taghash && ${$taghash}{$tag} < 0 ); # next if ($taghash && ${$taghash}{$tag} != 1 ); ## next if ($sufhash && $$sufhash{$suf} != 1 ); ## ? unlink -l if $makelink but not $sufhash ???? if ( $makelink && !(-e "$outpath/$ff")) { symlink( "$fbobs/$ff", "$outpath/$ff"); } if (!$makelink && -l "$outpath/$ff") { unlink( "$outpath/$ff"); } } return $result; } ## add feature table updates to ftp loc... sub updateFtpData { startSub("updateFtpData"); $result= 0; ## $archivepath= $ENV{'MO_ARCHIVE_PATH'}; my $srcpath= "$SERVER_PATH/"; my $featpath= "$workpath/genomes/"; ## also archive these folders - ?.etc/gnomap? .etc/jlib? # $SERVER_PATH/.srs/icarus/db/ ?-> srs/v5/, need v6/ # $SERVER_PATH/tools/data-perls/ # $SERVER_PATH/docs/field-help/ docs/? #? drop gziping of ind. files; add gtar -zcf of each org folder ? #?? add fbobs/FBgo.acode to export? fbgo.odb, fbgo.odt?? my ($trgpath, $orgpath, $cmd, $src, $trg); my @ffiles; # $orgpath = "$SERVER_PATH/.etc/jdata/fbobs"; # $trgpath = "$archivepath/"; # @ffiles= qw(FBgo.acode FBgo.acode.idx); # foreach my $ff (@ffiles) { # $src= "$orgpath/$ff"; # $trg= "$trgpath/$ff"; # next unless($force || isOldTarget("$src","$trg")); # $cmd= "/bin/cp -p $src $trg"; # $result = &callSystem($cmd); # } foreach my $klass (sort keys %dbclasses) { my $obj= $dbclasses{$klass}; my $tag= $obj->tag(); my $org= $obj->orgpath(); next unless($org); $trgpath = "$archivepath/$org"; unless (-d $trgpath) { print STDERR "mkdir $trgpath\n"; mkdir($trgpath, 0777); } $orgpath = "$SERVER_PATH/$org"; return "can't open $orgpath" if (!opendir( DIR, $orgpath)); @ffiles= grep( /^(acode|hgtable|refprot|refseq)/, readdir(DIR)); closedir(DIR); foreach my $ff (@ffiles) { ## copy if targetIsOld $src= "$orgpath/$ff"; $trg= "$trgpath/$tag.$ff"; ## add $tag. to file name? next unless($force || isOldTarget("$src","$trg")); $cmd= "/bin/cp -p $src $trg"; # $cmd .= "; gzip -c $trg > $trg.gz; touch -r $trg $trg.gz"; $result = &callSystem($cmd); } my $hasfeats= ($obj->csomes() ? 1 : 0); if ($hasfeats) { $orgpath = "$featpath/$org"; $trgpath = "$archivepath/$org/features/"; warn "can't open $orgpath" if (!opendir( DIR, $orgpath)); @ffiles= grep(/(features\-|idmap|dna\-)/, readdir(DIR)); closedir(DIR); foreach my $ff (@ffiles) { $src= "$orgpath/$ff"; $trg= "$trgpath/$ff"; next unless($force || isOldTarget("$src","$trg")); $cmd= "/bin/cp -p $src $trg"; # $cmd .= "; gzip -c $trg > $trg.gz; touch -r $trg $trg.gz"; $result = &callSystem($cmd); } } my $mainpath= $archivepath; $orgpath = "$org/"; $trg= "$archivepath/$org.tar.gz"; ## gtar needs full path for --file= if ($mainpath =~ s|/(eugenes.*)$||) { my $subp= $1; $orgpath = "$subp/$orgpath"; # $trg= "$subp/$trg"; } if ($force || isOldTarget("$mainpath/$orgpath","$trg")) { ## first archive any old $trg - with date !? if (-r $trg) { my @tm= localtime( $^T - 24*60*60*(-M $trg) ); my $date= POSIX::strftime("%Y-%m-%d",@tm); $cmd= "/bin/mv $trg $archivepath/old/$org-$date.tar.gz"; $result = &callSystem($cmd); } $cmd= "gtar --directory=$mainpath --gzip --exclude='*.gz' --create --file=$trg $orgpath"; $result = &callSystem($cmd); } } return $result; } sub disposeFiles { my ($file,$cmd)= @_; my ($tcmd); my @fs= `/bin/ls -1 $file`; foreach my $ff (@fs) { chomp($ff); ($tcmd = $cmd) =~ s/\$_/$ff/g; $fileDisposition{$ff}= $tcmd; } } sub moveNewData() { startSub("moveNewData"); ## need now/also to move newly made data into public locations .. ## !FIX for MEOW - move to server/organism/, strip $tag. file name &libToWorkLink(0); ## remove any symlinks print "#!/bin/sh"; if ($doliveupdate) { print "\n## --- These commands are being run ---\n"; } else { print "\n## --- Please run these commands if new data is okay ---\n"; } # # add commands to archive old data/list/count/idx files -- tar.gz to $archivepath # my ($cmd, $ff); foreach $ff (sort keys %fileDisposition) { $cmd= $fileDisposition{$ff}; if ($cmd) { # $_= $ff; $cmd= eval($cmd); ## this eval isn't what we want... $cmd= replaceVars($cmd); print "$cmd\n" if ($cmd); ## $result = &callSystem($cmd) if ($doliveupdate); } } my $outpath= "$workpath"; return $result if (!opendir( DIR, $outpath)); my @ffiles= grep( !/^\./, readdir(DIR)); closedir(DIR); foreach $ff (sort @ffiles) { next if ($ff =~ /\.keys$/); next if (-l $ff || -z $ff); next if ($fileDisposition{"$outpath/$ff"}); if ($ff =~ /fbsrs/) { $cmd= "/bin/mv $outpath/$ff $SERVER_PATH/.srs/icarus/db/"; } elsif ($ff =~ /refman.+html/) { $cmd= "/bin/mv $outpath/$ff $SERVER_PATH/docs/"; } elsif ($ff =~ /^fbgo\./) { $cmd= "/bin/mv $outpath/$ff $SERVER_PATH/.etc/jdata/"; } elsif ($ff =~ /\.dtd$/) { $cmd= "/bin/cp $outpath/$ff $archivepath/xml/"; $cmd .= "\n/bin/mv $outpath/$ff $fbobs/../"; } else { next; } print "$cmd\n" if ($cmd); ## leave to person till tested $result = &callSystem($cmd) if ($doliveupdate); } # what to do w/ fbobs/FBgo-orig.acode* -- need for flybase, others - public ftp? # drop fbobs/FBgo.links.acode # drop fbgo-links.odb, .odt $outpath= "$workpath/fbobs"; return $result if (!opendir( DIR, $outpath)); @ffiles= grep( !/^\./, readdir(DIR)); closedir(DIR); foreach $ff (sort @ffiles) { my($tag)= $ff =~ m/^(\w+)\./; my $obj= getDataFromTag($tag); my $orgpath; $orgpath= $obj->orgpath() if ($obj); $orgpath= "Missing" unless($orgpath); $orgpath = "$SERVER_PATH/" . $orgpath; next if ($ff =~ /\.keys$|tmp$/); next if (-l $ff || -z $ff); next if ($fileDisposition{"$outpath/$ff"}); # elsif (-z $ff) { next; } ## don't move empty files! == error if (-d $ff) { $cmd= "echo ## what is in $ff ##"; } elsif ($ff =~ /\.acode(.*)/) { $cmd= "/bin/mv $outpath/$ff $orgpath/acode$1"; } elsif ($ff =~ /\.list(.*)/) { $cmd= "/bin/mv $outpath/$ff $orgpath/list$1"; } elsif ($ff =~ /\.count/) { $cmd= "/bin/mv $outpath/$ff $orgpath/count"; } elsif ($ff =~ /\.xml/) { $cmd= "/bin/mv $outpath/$ff $archivepath/xml/"; } else { $cmd= "/bin/mv $outpath/$ff $fbobs/"; } #? what is it print "$cmd\n" if ($cmd); ## leave to person till tested $result = &callSystem($cmd) if ($doliveupdate); } $outpath= "$workpath/docs"; return $result if (!opendir( DIR, $outpath)); @ffiles= grep( !/^\./, readdir(DIR)); closedir(DIR); foreach $ff (sort @ffiles) { my($tag)= $ff =~ m/^(\w+)\./; # my $obj= getDocFromTag($tag); ## FIX! if ($tag =~ /^main/) { $cmd= "/bin/mv $outpath/$ff $SERVER_PATH/index.html"; } else { $cmd= "/bin/mv $outpath/$ff $SERVER_PATH/$tag/index.html"; } print "$cmd\n" if ($cmd); ## leave to person till tested $result = &callSystem($cmd) if ($doliveupdate); } unless($ENV{'MEOW_TEST_SERVER'}) { ##$cmd= "./datagen -updateftp"; ## need $self-path - from $0 ? $cmd = "perl -I/bio/work/meow -MMeow -e 'Meow::datagen;' -- -updateftp"; $cmd .= " -debug" if ($debug); $cmd .= " -view" if ($view); print "$cmd\n"; } $result = &callSystem($cmd) if ($doliveupdate); srsupdate($dosrsup,$doliveupdate); print "\n## ------------------------------------\n"; } sub srsupdate { my ($forceit, $doliveupdate)= @_; if ($forceit || scalar(%dataMade)) { my $cmd= <isMadeFromData()); $tagset{$klass}= $dbclasses{$klass}->tag; } } else { return 0; } ## need to call srscheck on each lib due to dang srscheck bug (v5.x) foreach my $klass (sort keys %tagset) { my $tag= $tagset{$klass}; next unless ($dbclasses{$klass}->isMadeFromData()); $cmd .= "srscheck -l $tag -o meowup.$tag;\n"; $cmd .= "./meowup.$tag;\n"; } $cmd .= "srscheck -o meowup.all;\n"; ## do all after for links, doublecheck $cmd .= "./meowup.all;\n"; print "$cmd\n" if ($cmd); $result = &callSystem($cmd) if ($doliveupdate); } return $result; } sub countDataFieldKeys { startSub("countDataFieldKeys"); &libToWorkLink(1); ## need to count keys in all acode files even if not new - for doc generators local($refdclass)= @_; my %dclass= %$refdclass; $result= 0; my $dokeys= ! scalar(%dclass); if (!$dokeys) { foreach $klass (keys %dclass) { my $makeflags= $dclass{$klass}{makeflags}; $dokeys= 1 if (($makeflags & $Meow::Data::kFromprog) != 0); } } return $result unless ($dokeys); return $result if (!opendir( DIR, $fbobs)); my @ffiles= grep( /\.acode$/, readdir(DIR)); closedir(DIR); my $outpath= "$workpath/fbobs"; ##? want in $fbobs not $workpath ? foreach $ff (@ffiles) { next unless($force || isOldTarget("$fbobs/$ff","$outpath/$ff.keys")); my $cmd= "egrep '^[A-z]' $fbobs/$ff |sed 's/\|.*//' |sort |uniq -c > $outpath/$ff.keys"; $result = &callSystem($cmd); } return $result; } # egrep -h '^[A-z]' *.acode |sed 's/\|.*//' |sort |uniq -c > all.keys sub makeDataHeadlines { startSub("makeDataHeadlines"); local($refdclass)= @_; my %dclass= %$refdclass; $result= 0; require "fbreslist.pm"; &fbreslist::init(); libToWorkLink(1, undef, { acode => 1 } ); my $workobs= "$workpath/fbobs"; foreach my $klass (keys %dclass) { my $dataobj= $dclass{$klass}; my $tag= $dataobj->tag; my $obsfile= "$workobs/". $dataobj->targetdata; next if (! -f $obsfile); next unless($force || isOldTarget( $obsfile,"$headlinedir/$tag.list")); if ($view||$debug) { print STDERR "fbreslist::makeHeadlines( $workobs, $workobs, $tag)\n"; } if (!$view) { ## $result= &fbreslist::makeHeadlines( $workobs, $workobs, $tag); my $name= $dataobj->name(); my $headlist= "$workobs/$tag.list"; my $countfile= "$workobs/$tag.count"; print STDERR "makeHeadlines $obsfile --> $headlist\n" if $debug; unlink $headlist if ( -f $headlist); $result = &callSystem("grep '^RETE' $obsfile > $headlist"); my ($count) = `wc $headlist` =~ /^\s*(\d+)/; $result = &callSystem("echo $count > $countfile") if (!$result); $result = &callSystem("echo 'DATANAME|$name' >> $headlist") if (!$result); $result = &fbreslist::makeReslistIndex( $workobs, "$tag.list") if (!$result); return $result if testErr($result,0); } } return $result; } sub makeGeneOntoLib { startSub("makeGeneOntoLib"); ## apr01 - convert to make using gostore.zip java parser... &libToWorkLink(0); # remove any links to active data; also does chdir($workpath) my $workobs= "$workpath/fbobs/"; $GeneOntol::debug= $debug; print STDERR "\@ goargs= GeneOntol::getDefaults($godata, $fbobs, $fbobs, $workobs); \n" if ($view || $debug ); my @goargs= GeneOntol::getDefaults($godata, $fbobs, $fbobs, $workobs); print STDERR "GeneOntol::isNewData(\@goargs); \n" if ($view || $debug ); my $isnew; #$isnew= GeneOntol::isNewData( @goargs); ($isnew, $godata, $refgoargs) = GeneOntol::isNewData( @goargs); # may update data path if copied .gz from archive or remote @goargs= @$refgoargs; print STDERR "GeneOntol::javaParser(\@goargs);\n" if ($isnew & ($view || $debug )); if ($isnew && !$view) { $result= 0; @goargs= GeneOntol::getDefaults($godata, $workobs, $fbobs); my $noparse= 1; # debug test # # push (@goargs, golinks => 'no', golink2acode => 'no', cv => 'no'); # debug test # push (@goargs, golinks => 'no', golink2acode => 'yes', cv => 'no', countlinks => 'no'); # debug test if (0 && !$noparse) { $result= GeneOntol::javaParser( @goargs ); # make $workobs/FBgo.acode } if (0 && $result == 0) { @goargs= GeneOntol::getDefaults($godata, $workobs, $fbobs); $result= GeneOntol::checkstats(@goargs); warn $result if ($result); # $result= 1; ## test } # return $result; # debug golink2acode # changeGOtoEugenesIDs was here ! if (0 && $result == 0) { @goargs= GeneOntol::getDefaults($godata, $workobs, $workobs); # data in workobs/FBgo.acode GeneOntol::acode2bdb(@goargs); # make go.bdb #?? do this BEFORE changeGOtoEugenesIDs() so go-did.db has original database ids ? #.. org toAcode() methods look for orig. db id } if ($result == 0) { #! do go2eugenes here... need to save orig. FBgo.acode for flybase! changeGOtoEugenesIDs($workobs); } #? if result == 0, move $workobs/FBgo.acode* and go.bdb to public path? # drop fbobs/FBgo.links.acode # drop fbgo-links.odb, .odt # do some check statistics of old FBgo and new # $fbobs= "$SERVER_PATH/.etc/jdata/fbobs/"; # push(@tempfiles, ); # in $workobs, $workobs/.. disposeFiles("$workobs/FBgo-orig.acode*", '/bin/mv $_ $archivepath/'); disposeFiles("$workobs/FBgo.links.acode*", '/bin/rm $_'); disposeFiles("$workpath/fbgo-links.od?", '/bin/rm $_'); } return $result; } sub fixGOTAIR { startSub("fixGOTAIR"); ## apr01 - convert to make using gostore.zip java parser... &libToWorkLink(0); # remove any links to active data; also does chdir($workpath) my $workobs= "$workpath/fbobs/"; my $newname= $workobs . 'FBgo.acode'; #? do we always have one here? my $oldname= $workobs . 'FBgo-tairx.acode'; unless (-e $newname) { warn "changeGOtoEugenesIDs: missing $newname"; return; } rename($newname, $oldname); rename($newname.'.idx', $oldname.'.idx'); libToWorkLink(1, { FBgo => -1 }, undef ); unlink($newname) if (-l $newname); # don't need... my $weedob= Meow::getDataclass('weed'); $data= $workobs . $weedob->targetdata; $weedob->openIdDb($data, 'r'); # my $fixtairsym= 1; my $oldlib= flybase::Datalib->new(); my $oldfile= $oldlib->open($oldname); my $newlib= flybase::Datalib->new(); $newlib->setIdtag( 'FBgo'); # should get from fname my $newfile= $newlib->create($newname,1); GeneOntol::changeToEugenesID( $oldlib, $newlib, '', '', $weedob, 0, '' ); $newlib->close(); $oldlib->close(); $weedob->closeIdDb(); &libToWorkLink(0); } sub changeGOtoEugenesIDs { startSub("changeGOtoEugenesIDs"); # go2eugenes.pl my $workobs= shift; my $newname= $workobs . 'FBgo.acode'; #? do we always have one here? my $oldname= $workobs . 'FBgo-orig.acode'; if (-e $oldname && ! -e $newname) { } else { unless (-e $newname) { warn "changeGOtoEugenesIDs: missing $newname"; return; } rename($newname, $oldname); rename($newname.'.idx', $oldname.'.idx'); } libToWorkLink(1, { FBgo => -1 }, undef ); unlink($newname) if (-l $newname); # don't need... my ($data, $mouseob, $manob, $yeastob, $wormob, $weedob, $fixtairsym, $oldlib, $newlib); if (1) { $mouseob= Meow::getDataclass('mouse'); $data= $workobs . $mouseob->targetdata; # warn 'mouseob='.$mouseob->toString()."\n" if $debug; # warn 'mouse.targetdata='.$data."\n" if $debug; $mouseob->openIdDb($data, 'r'); } if (1) { $yeastob= Meow::getDataclass('yeast'); $data= $workobs . $yeastob->targetdata; $yeastob->openIdDb($data, 'r'); } if (1) { $wormob= Meow::getDataclass('worm'); $data= $workobs . $wormob->targetdata; $wormob->openIdDb($data, 'r'); } # $manob= Meow::getDataclass('man'); # $data= $workobs . $manob->targetdata; # $manob->openIdDb($data, 'r'); if (1) { $weedob= Meow::getDataclass('weed'); $data= $workobs . $weedob->targetdata; $weedob->openIdDb($data, 'r'); $fixtairsym= 1; } $oldlib= flybase::Datalib->new(); my $oldfile= $oldlib->open($oldname); $newlib= flybase::Datalib->new(); $newlib->setIdtag( 'FBgo'); # should get from fname my $newfile= $newlib->create($newname,1); ##old## GeneOntol::changeToEugenesID( $oldlib, $newlib, $yeastob, $mouseob, $weedob, $fixtairsym, $wormob ); GeneOntol::changeToEugenesID( oldlib => $oldlib, newlib => $newlib, yeastob => $yeastob, mousob => $mouseob, wormob => $wormob, weedob => $weedob, fixtairsym => $fixtairsym, ); $newlib->close(); $oldlib->close(); $mouseob->closeIdDb() if $mouseob; $yeastob->closeIdDb() if $yeastob; $weedob->closeIdDb() if $weedob; $wormob->closeIdDb() if $wormob; &libToWorkLink(0); } # sub makeGeneOntoLibOld { # startSub("makeGeneOntoLibOld"); # # $GeneOntol::debug= $debug; # print STDERR "\@ goargs= GeneOntol::getDefaults($godata, $fbobs); \n" if ($view || $debug ); # my @goargs= GeneOntol::getDefaults($godata, $fbobs); # print STDERR " GeneOntol::isNewData(\@goargs); \n" if ($view || $debug ); # my $isnew= GeneOntol::isNewData( @goargs); # print STDERR "GeneOntol::readData(\@goargs);\n" if ($isnew & ($view || $debug )); # if ($isnew && !$view) { # $result= GeneOntol::readData(@goargs); # } # return $result; # } sub makeDataLib { startSub("makeDataLib"); local($refdclass)= @_; my %dclass= %$refdclass; %dataMade= (); libToWorkLink(1, undef, { bdb => 1} ); my $workobs= "$workpath/fbobs/"; foreach my $klass (keys %dclass) { my $dataobj= $dclass{$klass}; next unless ($dataobj->isMadeFromData()); my $isold= $dataobj->isOldTarget( $force, $workobs, $fbobs); next unless($isold); print STDERR "$dataobj \->toMeow( $updatepath, $workobs)\n" if ($debug||$view); unless($view) { $result= $dataobj->toMeow( $updatepath, $workobs); } return $result if testErr($result,0); $dataMade{$klass}= $dataobj->tag; ## flag new data? } return $result; } sub makeIdDb { startSub("makeIdDb"); local($refdclass)= @_; my %dclass= %$refdclass; # %dataMade= (); libToWorkLink(1, undef, { acode => 1 } ); ##! need data links my $workobs= "$workpath/fbobs/"; foreach my $klass (keys %dclass) { my $dataobj= $dclass{$klass}; next unless ($dataobj->isMadeFromData() && $dataobj->needIdDb); ## my $isold= $dataobj->isOldTarget( $force, $workobs, $fbobs); ## next unless($isold); print STDERR "$dataobj \->makeIdDb( $workobs)\n" if ($debug||$view); unless($view) { $result= $dataobj->makeIdDb( $workobs); } ## return $result if testErr($result,0); ## $dataMade{$klass}= $dataobj->tag; ## flag new data? } return $result; } sub printIdDb { startSub("printIdDb"); local($refdclass)= @_; my %dclass= %$refdclass; libToWorkLink(1); ##! need data links my $workobs= "$workpath/fbobs/"; foreach my $klass (keys %dclass) { my $dataobj= $dclass{$klass}; next unless ($dataobj->isMadeFromData() && $dataobj->needIdDb); print STDERR "$dataobj \->printIdDb( $workobs)\n" if ($debug||$view); unless($view) { $result= $dataobj->printIdDb( $workobs); } } return $result; } sub editDocs { &libToWorkLink(1, undef, { acode => 1, count => 1 } ); my @docs= doclist(); foreach my $docob (@docs) { print STDERR "\$docob->edit($workpath/docs/)\n" if ($debug||$view); unless($view) { $result= $docob->edit("$workpath/fbobs/","$workpath/docs/"); } return $result if testErr($result,0); } return $result; } sub makeXml { startSub("makeXml"); local($refdclass)= @_; my %dclass= %$refdclass; warn("makeXml: Not working till meow.report jar can dump xml\n"); return 1; &libToWorkLink(1); $result= 0; my $app='meow.report.Report'; my $jflags="-ms20m -mx90m"; # was mx50m my $jars= "$jpath/pse.zip:$jpath/jgl3.1.0.jar"; my $cp = "$jpath/meow.jar:$jars"; ## need to do if force or if .xml date < .acode date my $args= "kind=acode/rewrite readall mime=text/xml"; $args .= " env=FB_NORETE=true env=URL_FBACODE_DTD=http://flybase.bio.indiana.edu/.etc/jdata/fbacode.dtd"; ## possible sections: ab, gn, rf, pp, tr, ti, tp, ??? ## update sections?: pfgn, pfab ?? ## skip these: ba, al, ? cv foreach my $klass (keys %dclass) { my $dataobj= $dclass{$klass}; my $tag= $dataobj->tag; my $makeflags= $dataobj->{makeflags}; next if (($makeflags & $Meow::Data::kDumpxml) == 0); ##! note here new .acode may be in $workpath or $fbobs ? next unless($force || isOldTarget("$workpath/fbobs/$tag.acode","$archivepath/xml/$tag.xml")); my $kargs= $args ." id=$tag"; $result= &callJava( $app, $cp, $jflags, $kargs); return $result if testErr($result,0); $xmlMade{$klass}= 1; ## flag new data? } return $result; } sub makeFieldhelp { ## also do makeDtd ? startSub("makeFieldhelp"); local($refdclass)= @_; my %dclass= %$refdclass; ## my $app='meow.report.Report'; ## my $cp = "$jpath/meow.jar:$jars"; &libToWorkLink(1); foreach $klass (keys %dclass) { my $dataobj= $dclass{$klass}; my $tag= $dataobj->tag; my $makeflags= $dataobj->{makeflags}; next if (($makeflags & $Meow::Data::kFromprog) == 0); my $srcprog= $dataobj->{srcprog}; $srcprog= &replaceVars($srcprog); my $out= $dataobj->{outfiles}->[0]; $out= &replaceVars($out); my $args= $dataobj->{makecmd} . " out=$out"; my $app= $dataobj->{japp}; my $cp = "$srcprog:$jars"; next unless($force || isOldTarget( $srcprog, $out)); $result= &callJava( $app, $cp, $jflags, $args); return $result if testErr($result,0); # $dataMade{$klass}= $tag; ## flag new data? } return $result; } sub getLinkOriginal($) { local($source) = @_; my $rsource= readlink($source); return $source unless ($rsource); if ($rsource =~ m/^\.\./) { my $at= rindex( $source,'/'); $rsource= substr($source,0,$at) . '/' . $rsource; } return $rsource; } sub isOldTarget( $$) { # usage: if (&isOldTarget( $sourcefile, $targetfile)) { blah; } local($source,$target) = @_; print STDERR "isOldTarget: $target older than $source? " if $debug; my $res= 0; ## check existing $target $target= getLinkOriginal($target) if ( -l $target ); if (! -f $target) { $res= 1; } else { my $targtime= -M $target; ## -M is file age in days.hrs before now ## if data is a directory if ( -d $source) { print STDERR "dir=$source\n" if $debug; return 0 if (!opendir( DIR, $source)); my @ffiles= grep( !/^\./, readdir(DIR)); closedir(DIR); foreach $ff (@ffiles) { if ((-M "$source/$ff") < $targtime) { $res= 1; last; } } } elsif ( $source =~ s|/([^/]*\*[^/]*)$||) { my $pat= $1; $pat =~ s|\*|.*|; print STDERR "dir=$source match=$pat\n" if $debug; return 0 if (!opendir( DIR, $source)); my @ffiles= grep( /$pat/, readdir(DIR)); closedir(DIR); foreach $ff (@ffiles) { if ((-M "$source/$ff") < $targtime) { $res= 1; last; } } } elsif ( -l $source ) { $source= getLinkOriginal($source); $res= (-M $source) < $targtime; } elsif ( -f $source ) { $res= (-M $source) < $targtime; } else { $res= 0; } } print STDERR (($res) ? "yes\n" : "no\n") if $debug; ## " = $res\n" if $debug; return $res; } 1; __END__