#!/usr/local/bin/perl # flyfeatures.pl # for meow & flybase ## flybase specific parts of genomefeat.pl ## for flybase data, add cytoloc map pos from fly.acode or cytodb, ... ## add other cytoloc mapped data (cytodb set) using cyto->base mapping of sorsa.txt ## ?! also pull features from flybase-data/seqs/args.seq (FBgn) [and/not? darts.seq (FBtr)] ## in genbank format, for given FBgn curated sequence - won't necessarily ## match celera seq or gadfly seq but can add relative to FBgn seqrange ## for display purposes package flyfeatures; use Exporter(); @ISA = qw(Exporter); @EXPORT = qw( flymain rewriteCytos chopCytos readFlyCytomap sortflybasemap readflysorsa makeflysorsa getflycsomerange cytosearchall catScaffolds2Arm getunmappedgenes ); ## not exported right # flydata flycsomes flyscaffolds flycytomap flysym ## don't need these ? # initFlydata readFlyFeatures readFlyCytomap2 # flycytofile flyCytomap2Bases catScaffolds2Arm use Getopt::Std; use POSIX; # some data accessors - export doesn't seem to work on data values sub getDatapath { return $flydata; } sub getCsomes { return @flycsomes; } sub getScaffold { return $flyscaffolds{$_[0]}; } sub getCytomap { return $flycytomap{$_[0]}; } sub getSym { return $flysym{$_[0]}; } sub getMap2Bases { return flyCytomap2Bases($_[0]); } #------- sub flymain { @orgs=( 'fly' ); $orgs= join(',',@orgs); $flycsomes= join(',', @flycsomes); %opt=(); Getopt::Std::getopts('ADSC:O:cjuwsx',\%opt); $usage= < "${datapath}features-X.tsv", '2L' => "${datapath}features-2L.tsv", '2R' => "${datapath}features-2R.tsv", '3L' => "${datapath}features-3L.tsv", '3R' => "${datapath}features-3R.tsv", '4' => "${datapath}features-4.tsv", ); } sub initFlydata { ## BEGIN/ initFlydata $debug= 1; ##$main::debug; # $macHome= 1; push(@ARGV,"-DS"); $macHome= $main::macHome; ## paths for iubio/kalo $flydata= '/c7/eugenes/genomew/fly/'; # '/c5/tmp/meow/work/genomes/fly/'; #was '/c3/tmp/fly/cytodb/' $flycyto= "${flydata}cyto/"; $org= 'fly'; ## on macHome if ($macHome) { $flydata= 'MacHome:bio:flybase:fbjava:fbgrmap2:csomes:fly:'; $flycyto= "${flydata}cyto:"; } $sorsa= "${flydata}sorsa.txt"; ## $cytourl= "http://cricket.bio.indiana.edu:7081/.bin/cytosearchpl.pl?mimetype=text%2Ftab-separated-value&sort_list=Yes&limit=2&max_dp=6&max_df=2&"; ## chop limit to 0 so stay w/in arms $cytourl= "http://cricket.bio.indiana.edu:7081/.bin/cytosearchpl.pl?mimetype=text%2Ftab-separated-value&sort_list=Yes&limit=0&max_dp=6&max_df=2&"; $kMissingValue= -999999999; setFlyFeatureFiles($flydata); %flycytofiles = ( 'X' => "${flycyto}cytogene-X.tsv", '2L' => "${flycyto}cytogene-2L.tsv", '2R' => "${flycyto}cytogene-2R.tsv", '3L' => "${flycyto}cytogene-3L.tsv", '3R' => "${flycyto}cytogene-3R.tsv", '4' => "${flycyto}cytogene-4.tsv", ## add other data classes ); @flycytoclass= qw( cytogene cytodef cytodup cytoinv cytotloc cytoabs cytoins cytowalk ); # cytogene cytodef cytodup cytoinv cytotloc cytoabs cytoins cytowalk # cytoclone @flycsomes= ( 'X', '2L', '2R', '3L', '3R', '4' ); ## @flycsomes= ( '3R' ); ## cytosearch search keys %flycytosearchkey = ( cytogene => 'Genes', cytodef => 'Deletions', cytodup => 'Duplications', cytoinv => '2-break_Inversions', cytotloc => '2-break_Translocations', cytoabs => 'All_other_aberrations', cytoins => 'Transposon_insertions', cytowalk => 'Molecular_maps', cytoclone => '_clones\tClones', ); %flycsomebands = ( 'X' => [ '1A1','20F4'], '2L' => ['21A1','40F7'], '2R' => ['41A1','60F5'], '3L' => ['61A1','80F9'], '3R' => ['81F1','100F5'], '4' => ['101F1','102F8'], ); %cytobases= (); # for weaving fly scaffolds into csome arm data %flyscaffolds = ( # csome X bounds #1A1 -132153 -98239 #20F4 22893594 22982331 small => { name => 'small', csome => 'X', cytob => '1A1', ## approx cytoe => '1A3', ## approx baseb => -132153, bsize => 84373, ## reversed => 0, }, AE002566 => { name => 'AE002566', csome => 'X', cytob => '1A5', ## approx cytoe => '9F6', ## approx baseb => -16161, ## basee => 10562104, ## != -16161+10564115 != 10547954 ## bsize => 10564115, reversed => 0, }, AE002593 => { name => 'AE002593', csome => 'X', cytob => '9F7', ## approx cytoe => '19A3', ## approx baseb => 10562104, bsize => 9080116, reversed => 0, }, AE002620 => { name => 'AE002620', csome => 'X', cytob => '19C2', ## approx cytoe => '19F5', ## approx baseb => 19863897, bsize => 1265848, reversed => 1, }, AE002629 => { name => 'AE002629', csome => 'X', cytob => '19F6', ## approx cytoe => '20B1', ## approx baseb => 21129585, bsize => 536527, reversed => 1, }, # csome 2L bounds # 21A1 -111914 -69002 # 40F7 22527176 22558827 AE002638 => { name => 'AE002638', csome => '2L', cytob => '21A3', ## approx cytoe => '25C1', ## approx baseb => -25008, bsize => 4861285, ## reversed => 1, }, AE002690 => { name => 'AE002690', csome => '2L', cytob => '25C1', ## approx cytoe => '39D3', ## approx baseb => 4842283, bsize => 16346801, ## reversed => 0, }, AE002725 => { name => 'AE002725', csome => '2L', cytob => '39D5', ## approx cytoe => '40C1', ## approx baseb => 21249452, bsize => 705960, ## reversed => 0, }, # csome 2R bounds # 41A1 0 131746 # 60F5 20239113 20290734 AE002575 => { name => 'AE002575', csome => '2R', cytob => '57B4', ## approx cytoe => '60F3', ## approx baseb => 15717117, bsize => 4515887, ## reversed => 0, }, AE002769 => { name => 'AE002769', csome => '2R', cytob => '41B1', ## approx cytoe => '42B2', ## approx baseb => 186416, bsize => 1361891, ## reversed => 1, }, AE002778 => { name => 'AE002778', csome => '2R', cytob => '42B3', ## approx cytoe => '42E2', ## approx baseb => 1544190, bsize => 509295, ## reversed => 0, }, AE002787 => { name => 'AE002787', csome => '2R', cytob => '42E2', ## approx cytoe => '57B4', ## approx baseb => 2034466, bsize => 13672780, ## reversed => 1, }, # csome 3L bounds # 61A1 4423 29373 # 80F9 23197312 23224828 AE002584 => { name => 'AE002584', csome => '3L', cytob => '61A1', ## approx cytoe => '64C12', ## approx baseb => 4423, bsize => 5077262, ## reversed => 0, }, AE002602 => { name => 'AE002602', csome => '3L', cytob => '64D1', ## approx cytoe => '77B4', ## approx baseb => 5051536, bsize => 15101630, ## reversed => 1, }, AE002647 => { name => 'AE002647', csome => '3L', cytob => '77B4', ## approx cytoe => '80D5', ## approx baseb => 20169323, bsize => 2692893, ## reversed => 0, }, # csome 3R bounds # 81F1 -347091 -302208 3R # 100F5 27721052 27768793 AE002681 => { name => 'AE002681', csome => '3R', cytob => '82A3', ## -> CG12581 base loc cytoe => '83E4', baseb => -173, basee => 2164513,## end of 83E4 -> CG1169 base loc bsize => 2159491, ##! basee - baseb = 2164686 reversed => 1, }, AE002699 => { name => 'AE002699', csome => '3R', cytob => '83E4', cytoe => '84E3', baseb => 2164513, ## this aligns to end of AE002681, but is small mismatch due to ## -> CG15189 base loc, last item in 83E4 # basee => 3524824, bsize => 1363555, ##! basee - baseb = 1360311 reversed => 1, }, AE002708 => { name => 'AE002708', csome => '3R', cytob => '84E4', cytoe => '100F5', baseb => 3524824, # basee => 27768793, bsize => 24259494, ##! basee - baseb = 24243969 reversed => 0, }, # csome 4 bounds # 101F1 0 174562 # 102F8 965416 1154095 AE002796 => { name => 'AE002796', csome => '4', cytob => '101F1', ## approx cytoe => '102E1', ## approx baseb => 0, bsize => 751204, ## reversed => 1, }, AE002804 => { name => 'AE002804', csome => '4', cytob => '102E1', ## approx cytoe => '102F8', ## approx baseb => 729374, bsize => 403964, ## reversed => 0, }, ); } ## initFlydata / BEGIN sub BEGIN { initFlydata(); } sub flycytofile { my($csome, $kind)= @_; $kind= 'cytogene' unless($kind); $cytofile= "${flycyto}$kind$csome.tsv"; return $cytofile; } sub rewriteCytos { my($csome)= @_; readflysorsa(); foreach my $kind (@flycytoclass) { print STDERR "rewriteCyto = $csome, $kind\n"; readFlyCytomap($csome, $kind); } } sub catScaffolds2Arm { my($csome)= @_; my $file= $flyfeaturefiles{$csome}; local(*F,*O,$n); print STDERR "catScaffolds2Arm to $file\n"; unless (scalar(%flyscaffolds)) { die "%flyscaffolds is MISSING! "; } ## test for scaff files ... foreach my $scaf (values %flyscaffolds) { if ($scaf->{csome} eq $csome) { my $sfile= "$main::mework/$org/features-" . $scaf->{name} . ".tsv"; die "Can't read $sfile" unless (-r $sfile); } } ## need to make source line for joint file !! from flycsomebands & sorsa getflycsomerange($csome); return "Can't write $file" unless (open(O,">$file")); print O &main::feattabheader("$org/features-$csome.tsv",$org,""); print O "source\t$org\tChr $csome\t$csomerange[0]..$csomerange[1]\t-\n"; foreach my $scaf (values %flyscaffolds) { if ($scaf->{csome} eq $csome) { my $sfile= "$main::mework/$org/features-" . $scaf->{name} . ".tsv"; print STDERR " features for Chr $csome from $sfile\n" if ($debug); die "Can't read $sfile" unless (open(F,"$sfile")); while () { print O $_ if(/^(\w|# Features for)/); } close(F); } } close(O); } ## remove cytoxxx but for cytogene for meow/eugenes sub chopCytos { my($csome)= @_; my $infile= $flyfeaturefiles{$csome}; my $outfile= $infile.".eugenes"; #? or inpath/eugenes/infile? my $cytopat; foreach my $kind (@flycytoclass) { $cytopat .= $kind.'|' unless($kind eq 'cytogene'); } chop($cytopat); ## last | local(*F,*O,$n); print STDERR "chopCytos to $outfile\n"; die "Can't read $file" unless (open(F,"$infile")); die "Can't write $file" unless (open(O,">$outfile")); while () { print O $_ unless(/^($cytopat)/); } close(F); close(O); } sub sortflybasemap { my($csome)= @_; %hashmap= (); local(*F,*O,$n); my $file= $flyfeaturefiles{$csome} . '.sorted'; return "Can't write $file" unless (open(O,">$file")); $sourceline= ''; print &readFlyFeatures($csome,'hashmap', *O); print &readFlyCytomap2($csome, &flycytofile($csome,'cytogene').'.unmapped', 'hashmap');#sorthashmap ## add in other cytos, ! don't add range field - let gnomap do better job foreach my $kind (@flycytoclass) { next if ($kind eq 'cytogene'); print &readFlyCytomap2($csome, &flycytofile($csome,$kind), 'hashmap'); #sorthashmap } print O $sourceline if ($sourceline); foreach (sort {$a <=> $b} keys %hashmap) { my $val= $hashmap{$_}; print O $val; $n += ($val =~ tr/\n\r/\n\r/); # $n++; } close(O); print "sortflybasemap = $n\n"; } sub readflysorsa { local(*F,*O,$n); return if (scalar(%cytobases)); return "Can't read $sorsa" unless (open(F,$sorsa)); %cytobases= (); @sorsalist= (); while () { next unless(/^\d/); chomp(); my($cyto,$bb,$be)= split(); ### /\t/ ## (' ') was, now \t separated! $cytobases{$cyto}= "$bb\t$be"; push(@sorsalist, $cyto); } close(F); } sub cysort { my($a1,$al,$a2)= ($a =~ /^(\d+)([A-Z]?)(\d*)/); my($b1,$bl,$b2)= ($b =~ /^(\d+)([A-Z]?)(\d*)/); my $x= ($a1 <=> $b1); if ($x==0) { $x= ($al cmp $bl); if ($x==0) { $x= ($a2 <=> $b2); } } return $x; } sub cysteps { my($a,$b)= @_; for my $i (0..$#sorsalist) { if ($sorsalist[$i] eq $a) { for my $j ($i..$#sorsalist) { return ($i, $j-$i) if ($sorsalist[$j] eq $b ); } } } return (-1,-1); } sub gadbcytosort { my ($infile, $outfile)= @_; # for cytomap values extracted from gadb mysql # sorsa.txt - cyto-phys-map.table #use gadb1_mar01; # #select name,global_start,global_end # into outfile '/c6/eugenes/genomew/fly/gadb1_mar01.cytotab' # from seq_feature where type like 'cyto band' ; # #? $outfile = $sorsa= "${flydata}sorsa.txt"; $gadb= 'GadFly database, rel. 1,'; ## FIXME local(*IN,*OUT); open (IN,$infile) or die "opening $infile"; open (OUT,">$outfile") or die "writing $outfile"; my @tm= localtime( $^T - 24*60*60*(-M $infile) ); my $date= POSIX::strftime("%d-%B-%Y",@tm); print OUT "# flybase/maps/sorsa.txt $date\n"; print OUT "# Generated from $gadb & FlyBase cytosearch data\n"; print OUT "#\n"; print OUT "# Table of cytogenetic map position and base values (start, end)\n"; print OUT "#\n"; print OUT "#cyto start end base\n"; my $aord= ord('A'); my %ha= (); my ($k,$n,$l,$s); while(){ my($c,$b,$e)= split; my($n,$l,$s); #? skip any '10A' w/ no sub band values, as per old format $c =~ m/^(\d+)(\D*)(\d*)/; next unless($3); $n= $1; $l= $2 || 'A'; $s= $3 || 0; $k= $n * 1000 + (ord($l) - $aord)*100 + $s; $ha{$k}= $_; } close(IN); foreach $k (sort { $a <=> $b } keys %ha) { print OUT $ha{$k}; } } sub makeflysorsa { local(*F,*O,$n); return "Can't write $sorsa.new" unless (open(F,">$sorsa.new")); readflysorsa(); # @sorsalist= (sort cysort keys %cytobases); my $file= $flyfeaturefiles{'X'}; my @tm= localtime( $^T - 24*60*60*(-M $file) ); my $date= POSIX::strftime("%d-%B-%Y",@tm); print F "# flybase/maps/sorsa.txt $date\n"; print F "# Generated from $sourceName & FlyBase cytosearch data\n"; print F "#\n"; print F "# Table of cytogenetic map position and base values (start, end)\n"; print F "# from genome sequence data\n"; print F "#\n"; print F "#cyto start end base\n"; foreach $csome (@flycsomes) { %cytobases= (); print &readFlyFeatures($csome,'sorsa'); my @last; my @newlist; my $first= 1; foreach my $ca (sort cysort keys %cytobases) { ## need fixes - average last stop - this start to match ## if missing cytosteps, interpolate values for them ## missing end points - need other fix my($start,$stop) = split(/\t/, $cytobases{$ca}); if ($first) { $first= 0; } elsif ($start > $last[2]) { my ($index, $nsteps)= cysteps( $last[0], $ca); if ($nsteps==1) { $start= int(($last[2] + $start)/2); $last[2]= $start; # $cytobases{$last[0]}= "$last[1]\t$last[2]"; $newlist[$#newlist]= "$last[0]\t$last[1]\t$last[2]"; } elsif ($nsteps>1) { my($bstart,$bstop,$bstep); $bstart= $last[2]; $bstop= $bstart; $nsteps--; $bstep= int( ($start - $bstart)/($nsteps)); for my $k ($index+1..$index+$nsteps) { my $cb= $sorsalist[$k]; $bstop += $bstep; push(@newlist, "$cb\t$bstart\t$bstop"); $bstart= $bstop; } # $start= int(($last[2] + $start)/2); } } push(@newlist, "$ca\t$start\t$stop"); @last= ($ca, $start, $stop); } print F "# chromosome $csome\n"; print F join("\n",@newlist)."\n"; } close(F); } sub getflycsomerange { my($csome)= @_; print STDERR "no sorsa data\n" unless (scalar(%cytobases)); my $ca= $flycsomebands{$csome}[0]; my $cb= $flycsomebands{$csome}[1]; my($start,$bstop) = split(/\t/, $cytobases{$ca}); my($estart,$stop) = split(/\t/, $cytobases{$cb}); @csomerange= ($start, $stop); print STDERR "csome $csome baserange= $start..$stop\n" if ($debug); } sub maxrange { my($range)= @_; my ($pre, $suf,$start,$stop, $b, $u); $start= $kMissingValue; $stop= $start; $range =~ s/^([^\d<>-]*)//; $pre= $1; $range =~ s/(\D*)$//; $suf= $1; if ($range =~ m/^([<>]*)([\d-]+)/) { $u= $1; $start= $2; $start-- if ($u eq '<'); } if ($range =~ m/([<>]*)([\d-]+)$/) { $u= $1; $stop= $2; $stop++ if ($u eq '>'); } return ($start,$stop); } sub cytosearchall { my($csome)= @_; my $ca= $flycsomebands{$csome}[0]; my $cb= $flycsomebands{$csome}[1]; my $query= "$ca-$cb"; foreach my $klass (@flycytoclass) { my $outf= &flycytofile($csome,$klass); my $cent= $flycytosearchkey{$klass}; if ($klass eq 'cytoclone') { $cent= '_clones='.$cent; } else { $cent= '_chroment='.$cent; } my $url= $cytourl; $url .= "query=${query}&_chromabs=xother&${cent}"; print STDERR ("lynx -source '$url' > $outf\n") if $debug; system("lynx -source '$url' > $outf"); } # lynx -source 'http://cricket.bio.indiana.edu:7081/.bin/cytosearchpl.pl? # query=${query}&_chromabs=xother&_chroment=Genes # &mimetype=text%2Ftab-separated-value&sort_list=Yes&limit=2&max_dp=6&max_df=2' > $outf # http://cricket.bio.indiana.edu:7081/.bin/cytosearchpl.pl?query=21 # &_chromabs=xother&_chromabs=2-break_Inversions&sort_list=Yes&mimetype=text%2Ftab-separated-values&limit=2&max_dp=6&max_df=2 } sub getunmappedgenes { my($csome)= @_; readflysorsa(); ## for NCBI genomes, not flyxml: catScaffolds2Arm($csome); print &readFlyFeatures($csome,'ids'); print &readFlyCytomap2($csome, &flycytofile($csome,'cytogene'), 'unmapped'); } sub readFlyFeatures { my($csome, $kind, $outh)= @_; my $file= $flyfeaturefiles{$csome}; local(*F,*O,$n); die "Can't read $file" unless (open(F,$file)); my $bindex= 0; #mRNA BcDNA:LD21503 83E2 join(2060173..2060394,2061023..2062224) FLYBASE:FBan0001151,FLYBASE:FBgn0027527, while () { my $blength= length($_); if ($outh && /^\W/) { print $outh $_; } next unless(/^\w/); $line= $_; chomp(); my($class,$sym,$map,$range,$ids,$dbx)= split(/\t/); if ($kind eq 'hashmap') { my ($start,$stop)= maxrange($range); ## $start= -99999999 unless($start); ## @csomerange= ($start, $stop) if ($class eq 'source') ; if ($class eq 'source') { $sourceline .= $line; } else { $hashmap{$start} .= $line; } } elsif ($kind eq 'ids') { my $id= $1 if ($ids =~ /(FBgn\d+)/); $genemapped{$id}= 1 if ($id); } elsif ($kind eq 'index') { my ($start,$stop)= maxrange($range); if ($class eq 'source') { @csomerange= ($start, $stop); $stepsize= ($stop - $start) / 100; } if ($class eq 'source' || $class eq 'scaffold') { print $outh "$class\t$range\t$bindex\n"; } elsif ($start >= $nextstep) { print $outh "$nextstep\t$bindex\n"; } } elsif ($kind eq 'sorsa' && $range ne '-' && $map ne '-') { my ($start,$stop)= maxrange($range); if ($map !~ /[;-]/) { ## take min,max of all for $map/cyto if ($cytobases{$map}) { my($bstart,$bstop) = split(/\t/, $cytobases{$map}); $start= $bstart if ($bstart<$start); $stop= $bstop if ($bstop>$stop); } $cytobases{$map} = "$start\t$stop"; } } $n++; } close(F); return "readFlyFeatures = $n\n"; } sub getCytorange { my($ca,$cx)= @_; $ca =~ s/^\s*//; $ca =~ s/[\s*+-]$//; return () unless ($ca =~ /^\d/); #? don't have conversion info for hXXX if ($ca !~ /[A-G]/) { if ($cx) { $cx =~ s/\d*$//; $ca= $cx . $ca; } else { $ca .= 'A1'; } } elsif ($ca !~ /\d$/) { $ca .= '1'; } my ($start1,$stop1)= split(/\t/, $cytobases{$ca}); return ($start1,$stop1,$ca); } sub flyCytomap2Bases { my ($map)= @_; my($stop,$start)= ($kMissingValue,$kMissingValue); my @maps= split(/;/, $map); foreach my $mp (@maps) { my($ca, $cb)= split(/-/, $mp); my($start1,$stop1,$bstart,$bstop); ($start1,$stop1,$ca)= getCytorange($ca); if ($cb) { ($bstart,$bstop,$cb)= getCytorange($cb,$ca); $stop1= $bstop if ($bstop); } $start= $start1 if ($start==$kMissingValue && $start1 >= $csomerange[0] && $start1 <= $csomerange[1]); $stop = $stop1 if ($stop1 >= $csomerange[0] && $stop1 <= $csomerange[1]); } # debug - getting missing when should get real range !?? return ($start, $stop); } sub readFlyCytomap2 { my($csome, $cytofile, $kind)= @_; local(*F,*O,*UM,$n,$um); print STDERR "readFlyCytomap2 $cytofile\n" if $debug; return "Can't read $cytofile" unless (open(F,$cytofile)); if ($kind eq 'unmapped') { return "Can't write $cytofile" unless (open(O,">$cytofile.unmapped")); } if ($kind eq 'hashmap') { return "Can't write $cytofile" unless (open(UM,">$cytofile.norange")); } while () { next unless(/^\w/); $line= $_; chomp(); my($class,$sym,$map,$range,$id)= split(/\t/); if ($kind eq 'hashmap' || $kind eq 'sorthashmap') { my ($start,$stop)= ($kMissingValue,$kMissingValue); if ($range =~ /\d+/) { ($start,$stop)= maxrange($range); } else { ($start, $stop)= flyCytomap2Bases($map); if ($kind eq 'sorthashmap') { $range= "$start..$stop"; } else { $range= "-"; } ## "<$start..>$stop"; # ! let gnomap do this better $line = "$class\t$sym\t$map\t$range\t$id\n"; } if ($start!=$kMissingValue) { $hashmap{$start} .= $line; $n++; } elsif ($kind eq 'hashmap') { print UM "$line"; $um++; } } elsif ($kind eq 'unmapped') { unless ($genemapped{$id}) { $n++; print O "$_\n"; } } } close(F); if ($kind eq 'hashmap') { close(UM); unlink "$cytofile.norange" unless($um); } close(O) if ($kind eq 'unmapped'); return "readFlyCytomap2 in $cytofile = $n\n"; } # 83E3-4 FBgn0037423 CG1155 20 Mar 2000 # 83E4 FBgn0037424 CG1157 20 Mar 2000 # and: make consistant w/ features.tsv # 83E3-4 FBgn0037423 CG1155 20 Mar 2000 >> # gene CG1155 83E3-4 - FBgn0037423 sub readFlyCytomap { my($csome,$kind)= @_; $kind= 'cytogene' unless($kind); $cytofile= &flycytofile($csome, $kind); getflycsomerange($csome); local(*F,*O); %flycytomap= (); %flysym= (); $cytomap= $csome; print STDERR "readFlyCytomap $cytofile\n" if $debug; die "can't read $cytofile" unless (open(F,$cytofile)); my $topline= ; if ($topline =~ /^cyto\w+\t/) { # don't make .new if current one is new fields chomp($topline); my($kind,$sym,$map,$range,$id) = split(/\t/,$topline); $flycytomap{$id}= $map; $flysym{$id}= $sym; while () { chomp(); ($kind,$sym,$map,$range,$id) = split(/\t/); $flycytomap{$id}= $map; $flysym{$id}= $sym; } close(F); } else { die "can't make $cytofile.new" unless (open(O,">$cytofile.new")); while () { next unless(/^\d/); chomp(); my($map,$id,$sym,$date)= split(/\t/); ##?! fix for cytowalk ID == gene ID here ? FBgn000 -> FBwk000 for /.bin/molmap.html?query=FBgn0010113 if ($kind eq 'cytowalk') { $id =~ s/FBgn//; } $flycytomap{$id}= $map; $flysym{$id}= $sym; print O "$kind\t$sym\t$map\t-\t$id\n"; # $flymore{$id}= "$kind\t$sym\t$map\t-\t$id\n"; } close(F); close(O); rename ($cytofile, "$cytofile.old"); rename ("$cytofile.new", $cytofile); } } 1; __END__