#!/usr/local/bin/perl # go2eugenes.pl - recode FBgo.acode database gene ids to eugenes ids # add to Meow.pm ? use Meow; use Meow::Data; use Meow::Mouse; use Meow::Yeast; # use Acodes; use flybase::Datalib; my $fbobs= 'fbobs'; my $acode= 'FBgo.acode'; my $meowpath= 'fbobs/'; my $newpath= 'fbobnew/'; my $idtag= 'FBgo'; main(); sub main { # $ENV{MO_WORK_PATH}=`pwd`; # for initEnv's chdir($workpath), def='/bio/work/meow/tmp/work/' Meow::initEnv(); readArgs(); $mouseob= Meow::getDataclass('mouse'); my $data= $meowpath . $mouseob->targetdata; $mouseob->openIdDb($data, 'r'); $yeastob= Meow::getDataclass('yeast'); $data= $meowpath . $yeastob->targetdata; $yeastob->openIdDb($data, 'r'); $oldname= $meowpath . $acode; $oldlib= new flybase::Datalib(); $oldfile= $oldlib->open($oldname); $newname= $newpath . $acode; $newlib= new flybase::Datalib(); $newlib->setIdtag( $idtag); # should get from fname $newfile= $newlib->create($newname,1); if ($dorun) { processrecs( $oldlib, $newlib); } else { print STDERR "opened $oldfile\n"; print STDERR "created $newfile\n"; print STDERR "yeast id S0003302=". $yeastob->hasDID('S0003302')."\n"; print STDERR "yeast id S0004363=". $yeastob->hasDID('S0004363')."\n"; print STDERR "mouse id MGI:1353510=". $mouseob->hasDID('MGI:1353510')."\n"; print STDERR "mouse id MGI:1858961=". $mouseob->hasDID('MGI:1858961')."\n"; print STDERR "mouse id MGI:95640=". $mouseob->hasDID('MGI:95640')."\n"; } $newlib->close(); $oldlib->close(); $mouseob->closeIdDb(); $yeastob->closeIdDb(); } # GNM|MGI:1353510 # |Arhgef1 # GNY|SGD:S0000039 # |CDC24 # GNY|SGD:S0003302 # |ROM1 # GNY|SGD:S0004363 # |ROM2 sub processRec { my $rdata= shift; my ($db, $did, $egid, $tag, $changed, $newdat); my @dat= split(/\n/, $$rdata); foreach (@dat) { if (/^(GN\w*\|)(\w+):(\w+)/) { $tag= $1; $db= $2; $did= $3; if ($db eq 'SGD') { $egid= $yeastob->hasDID($did); $_= $tag.$egid if ($egid); } elsif ($db eq 'MGI') { $egid= $mouseob->hasDID('MGI:'.$did); $_= $tag.$egid if ($egid); } elsif ($db eq 'FB') { #? drop the FB: of FB:FBgn... $_= $tag.$did; } } $newdat .= "$_\n"; } return $newdat; } ## this is bad; # while ($$rdata =~ s/\nGN(M|Y)\|(\w+):(\w+)/GN$1|xxxx/g) { # $db= $2; $did= $3; $tag= "GN$1"; # $egid= ''; # if ($db eq 'SGD') { # $egid= $yeastob->hasDID($did); # } # elsif ($db eq 'MGI') { # $egid= $mouseob->hasDID('MGI:'.$did); # } # unless($egid) { # warn "Cannot find eugenes id for $db:$dbid\n"; # } # else { $$rdata =~ s/$tag\|xxxx/$tag|$egid/; $changed= 1; } # } sub processrecs { my( $oldlib, $newlib ) = @_; my ($data, $newdata, $i , $changed); my $oldfh= $oldlib->inlib; $/= "# EOR\n"; while ( $data= <$oldfh> ) { my $id=''; if ($data =~ /\nLID\|(\d+)/) { $id= $idtag.$1; } else { warn "No id for $data\n"; next; } $newdata= processRec( \$data); $newlib->addRecord( $id, $newdata); print STDERR '.'; if ((++$i % 100) == 0) { print STDERR " $id\n"; } if ($changed && $i>1 && $i<10) { # $debug print STDERR "\nOld $id\n$data\n"; print STDERR "\nNew $id\n$newdata\n"; } } $/= "\n"; } sub usage { my @dbs= Meow::datakeys(); for my $i (0..$#dbs) { $dbs[$i] =~ s/^\w+:://; } print < \$debug, 'help!' => \$dohelp, 'view!' => \$view, 'run!' => \$dorun, ); $optokay= Getopt::Long::GetOptions( @optlist); usage() if ($dohelp || !$optokay); }