#!/usr/local/bin/perl # Acodes.pl/.pm # - perl module for manipulating flybase acode data # - from fbacodelib.pl, see also java flybase libs # - should be Acodes.pm but stupid Solaris mailer insists that is an image file # dgg, feb'99 ##### package Acodes; ## package flybase::Acodes; require 5.001; use strict; use vars qw($debug $recsize $RETE_ID $RETE_VALUE $RETE_COUNT); BEGIN { $Acodes::debug = 0; $Acodes::RETE_ID = 3; $Acodes::RETE_VALUE = 2; $Acodes::RETE_COUNT = 1; $Acodes::recsize = length(pack("LL", 1, 500)); # store as unsigned long, unsigned long # $Acodes::fbobs= "fbobs"; # acode data path # $Acodes::fbidtag = "FBgn"; # test, do for each data class # $Acodes::endOfRecord= "# EOR\n"; # for non-acode readers (srs) to simplify # $Acodes::bigsize= 300000; # $Acodes::asuffix= ".acode"; # $Acodes::create= 0; # $Acodes::doidx= 0; } sub new { my($class, $filename, $create) = @_; # print STDERR "new Acodes( " . join(',',@_) . " )\n" if $debug; my $self = {}; bless $self; my %db2a = (); $self->{'db2a'}= \%db2a; my %retef= (); $self->{'retef'}= \%retef; my %fldkeys= (); $self->{'fldkeys'}= \%fldkeys; my %subkeys= (); $self->{'subkeys'}= \%subkeys; ## open(OUTF,">&STDOUT"); ##? ## $self->open($filename,$create) if ($filename); return $self; } sub open { my($self,$alib,$create)= @_; return undef unless($alib); my $aidx= $alib . ".idx"; if ($create && $create !~ /^[ro]/) { local(*ALIB, *AIDX); CORE::open(ALIB,">$alib") || die "Can't create $alib"; CORE::open(AIDX,">$aidx") || die "Can't create $aidx"; $self->{'alib'}= *ALIB; $self->{'aidx'}= *AIDX; $self->{'libname'}= $alib; } else { local(*INLIB, *INIDX); CORE::open(INLIB,$alib) || die "Can't open $alib"; CORE::open(INIDX,$aidx) || warn "Can't open $aidx"; $self->{'inlib'}= *INLIB; $self->{'inidx'}= *INIDX; $self->{'libname'}= $alib; } # return ($alib, $aidx); return wantarray ? ($alib, $aidx) : $alib; } sub getLibname { my($self) = @_; my $alib= $self->{'libname'}; return wantarray ? ($alib, $alib.".idx") : $alib; } sub create { my($self,$alib)= @_; return $self->open($alib,"w"); } sub close { my($self) = @_; CORE::close($self->{'alib'}) if $self->{'alib'}; CORE::close($self->{'aidx'}) if $self->{'aidx'}; CORE::close($self->{'inlib'}) if $self->{'inlib'}; CORE::close($self->{'inidx'}) if $self->{'inidx'}; } sub openout { my($self,$outname)= @_; $outname= '&STDOUT' if (!$outname); CORE::open(OUTF,">$outname") || die "Can't write to $outname"; $self->{'outname'}= $outname; return ($outname); } sub closeout { my($self) = @_; ## need to deal with filehandle variables! CORE::close(OUTF) if $self->{'outname'}; $self->{'outname'}= undef; } ## registerDbKeys sub registerKeys { my($self, $redb2acode) = @_; # my %db2a= %{$self->{'db2a'}}; foreach (keys(%{$redb2acode})) { my $val= ${$redb2acode}{$_}; ${$self->{'db2a'}}{$_}= $val if ($val); } } ## removeDbKeys sub removeKeys { my($self, $redb2acode) = @_; # my %db2a= %{$self->{'db2a'}}; foreach (keys(%{$redb2acode})) { delete ${$self->{'db2a'}}{$_}; } } sub fldkey { my($self,$dbkey)= @_; # my %db2a= %{$self->{'db2a'}}; my $akey= ${$self->{'db2a'}}{$dbkey}; if (!$akey) { print STDERR "dbkey with no acode equivalent: $dbkey\n"; ${$self->{'db2a'}}{$dbkey}= $dbkey; $akey= $dbkey; } return $akey; } sub setId { my($self, $id, $rec) = @_; $self->{'id'}= $id; $rec->{'id'}= $id if ($rec); ## add 'id' to record %rech ? } sub newRecord { my($self, $reckey) = @_; my @vec= (); ## ! this doesn't make array ref !???? my %fhash= (); my %rech= ( 'key' => $reckey, 'vec' => \@vec, 'fhash' => \%fhash ); # $self->{'atrec'}= \%rech; ## need to distinguish mainrec, subrecs, subsubrecs... return \%rech; } sub addSubrec { my($self, $mainr, $subr) = @_; ## get mainr from $self ? my @mainr= @{$mainr->{'vec'} || []}; my $subkey= $subr->{'key'} ; my @subr= @{$subr->{'vec'} || []}; push(@mainr,$subkey); push(@mainr,"{"); push(@mainr, @subr); push(@mainr,"}"); $mainr->{'vec'}= \@mainr; ##?? make sure its there? } sub getFields { my($self, $vecr, $keepFlds) = @_; my %kos= %{$keepFlds}; my @newflds= (); my @vecr= @{$vecr->{'vec'} || []}; foreach my $fld (@vecr) { my ($key)= $fld =~ /([^|]+)/; push(@newflds, $fld) if ($kos{$key}); } return @newflds; } sub addFieldsFrom { my($self, $mainr, $vecr, $keepFlds) = @_; my @mainr= @{$mainr->{'vec'} || []}; my %kos= %{$keepFlds}; my @vecr= @{$vecr->{'vec'} || []}; if (scalar(%kos)) { foreach my $fld (@vecr) { my ($key)= $fld =~ /([^|]+)/; push(@mainr, $fld) if ($kos{$key}); } } else { push(@mainr, @vecr); } $mainr->{'vec'}= \@mainr; ##?? make sure its there? } sub addField { my($self, $rec, $key, $val, $dontpack) = @_; ## get rec from $self my @rec= @{$rec->{'vec'} || []}; my $fld = $key if ($key); my $lastfld= $rec[$#rec]; if ($val) { my($v1, $vs); if (!$dontpack && $lastfld && $lastfld =~ /^$key\|/) { $fld= $self->appendField( $rec[$#rec], $val); ## isn't changing original $fld! $rec->{'vec'}= \@rec; ##?? make sure its there? return $fld; } if ($val =~ /\n/) { ($v1, $vs)= split(/\n/, $val, 2); $val= $v1; } $fld .= '|' . $val; $self->appendField( $fld, $vs) if ($vs); } push( @rec, $fld); $rec->{'vec'}= \@rec; ##?? make sure its there? return $fld; ##!? return ref of fld for appending? } sub addFieldHash { my($self, $rec, $key, $val) = @_; return $self->addField($rec,$key,$val); ## not working -- apparently fhash becomes new, local copy !??? ## get rec from $self # my @rec= @{$rec->{'vec'}}; # my %fhash= %{$rec->{'fhash'}}; my $rvec= $rec->{'vec'}; my $rhash= $rec->{'fhash'}; if ($rhash->{$key}) { ## return $self->appendField( $rhash->{$key}, $val); ## doesn't work } my $fld = $key if ($key); if ($val) { my($v1, $vs); if ($val =~ /\n/) { ($v1, $vs)= split(/\n/, $val, 2); $val= $v1; } $fld .= '|' . $val; $self->appendField( $fld, $vs) if ($vs); } push( @$rvec, $fld); # $rec->{'vec'}= \@rec; ##?? make sure its there? $rhash->{$key}= $rvec->[$#$rvec]; ## $fld; # $rec->{'fhash'}= \%fhash; ##?? make sure its there? return $fld; ##!? return ref of fld for appending? } sub appendField { my($self, $fld, $val) = @_; ## !?! must use $_[1] instead of $fld to change original ! if ($val) { if ($val =~ /\n/) { my @vs= split(/\n/,$val); foreach $val (@vs) { $_[1] .= "\n|" . $val; } } else { $_[1] .= "\n|" . $val; } } return $_[1]; } sub toString { my($self, $mainr) = @_; my $reckey= $mainr->{'key'}; my @mainr= @{$mainr->{'vec'} || []}; my $doc = "$reckey\n{\n"; foreach (@mainr) { $doc .= $_ . "\n" if ($_); } $doc .= "}\n"; return $doc; } sub packFields { my($self, $rec) = @_; my @rec= @{$rec->{'vec'} || []}; my %vals= (); foreach my $i (0..$#rec) { my($k,$v) = split(/\|/,$rec[$i],2); my $ki= $vals{$k}; if ($ki) { $rec[$ki] .= "\n|" . $v; $rec[$i]= ''; } else { $vals{$k} = $i; } } $rec->{'vec'}= \@rec; ##?? make sure its there? } sub putComment { my $self= shift; # my @comments= @_; my $alib= $self->{alib}; # my $aidx= $self->{aidx}; foreach (@_) { print $alib '#' . $_ . "\n"; } } sub putRec { my($self, $mainr, $id, $retefld) = @_; ## get $reckey, mainr from $self my $reckey= $mainr->{'key'}; my @mainr= @{$mainr->{'vec'} || []}; my $alib= $self->{alib}; my $aidx= $self->{aidx}; $id= $self->{'id'} if !defined($id); ## or $mainr->{'id'} ?? $retefld= $self->getRecordTableEntry($mainr) if (!$retefld); my $at= tell($alib); print $alib "$reckey\n{\n"; print $alib $retefld . "\n" if ($retefld); foreach (@mainr) { ## missing subrecs here?? print $alib $_ . "\n" if ($_); } print $alib "}\n# EOR\n"; ## EOR comment isn't required ## my $nf= scalar(@mainr); ## print STDERR "Acodes.putRec( $reckey, $id ) = $nf\n" if $debug; ## print STDERR " $retefld\n" if $debug; if ($id =~ /(\d+)/) { $id= $1; } if ($id && $id !~ /\D/) { my $size= tell($alib) - $at; my $record= pack("LL", $at, $size); my $idloc = $id * length($record); seek($aidx, $idloc, 0); print $aidx $record; } } sub addTableEntryKey { my($self, $key, $val) = @_; # %retef= %{$self->{'retef'}}; ${$self->{'retef'}}{$key}= $val; } sub addTableEntryKeys { my($self, $retehash) = @_; my($key,$val); # %retef= %{$self->{'retef'}}; while (($key,$val) = each (%{$retehash})) { ${$self->{'retef'}}{$key}= $val; ## print STDERR "add RETE $key=$val\n" if $debug; } } sub getRecordTableEntry { my($self, $rec) = @_; my $retefld= $rec->{'retefld'}; return $retefld if ($retefld); # %retef= %{$self->{'retef'}}; ## print STDERR "get RETE hash ".join(',',keys %retef)."\n" if ($debug && $firsth<5); $firsth++; return undef unless(scalar(%{$self->{'retef'}})); $retefld= "RETE|"; my($k,$v); my %nc= (); my %nv= (); my %av= (); my $nf= 0; my @rec= @{$rec->{'vec'} || []}; foreach (@rec) { ($k,$v) = split(/\|/,$_,2); my $iste= ${$self->{'retef'}}{$k}; next unless ($iste); if ($iste == $Acodes::RETE_COUNT) { $nc{$k}++; } elsif ($iste == $Acodes::RETE_ID) { $v =~ s/\n.*//g; ## drop newlines & continuation lines $retefld .= "\t" if ($nf>0); $retefld .= "$k 1 $v"; $nf++; } elsif ($iste == $Acodes::RETE_VALUE) { $v =~ s/\n.*//g; ## drop continuation lines $nv{$k}++; $av{$k}= $v if ($nv{$k} < 2); } } foreach $k (sort keys(%nv)) { $retefld .= "\t" if ($nf>0); $retefld .= "$k $nv{$k} $av{$k}"; $nf++; } foreach $k (sort keys(%nc)) { $retefld .= "\t" if ($nf>0); $retefld .= "$k $nc{$k}"; $nf++; } ## print STDERR "got RETE = $retefld\n" if ($debug && $firstr<5); $firstr++; return undef if ($nf==0); $rec->{'retefld'}= $retefld; return $retefld; } sub fieldKeyProcessing { my($self, $key, $val) = @_; # %fldkeys= %{$self->{'fldkeys'}}; ${$self->{'fldkeys'}}{$key}= $val; ##? $val or 1 ?? this is on/off flag } sub fieldKeysProcessing { my($self, $rekvhash) = @_; my($key,$val); # %fldkeys= %{$self->{'fldkeys'}}; ##if ( !scalar(%{$rekvhash}) ) { $self->{'fldkeys'}= undef; } else while (($key,$val) = each (%{$rekvhash})) { ${$self->{'fldkeys'}}{$key}= $val; } # $self->{'fldkeys'}= \%fldkeys; ##? need to reset it? } sub subrecKeysProcessing { my($self, $rekvhash) = @_; my($key,$val); # %subkeys= %{$self->{'subkeys'}}; while (($key,$val) = each (%{$rekvhash})) { ${$self->{'subkeys'}}{$key}= $val; } # $self->{'subkeys'}= \%subkeys; ##? need to reset it? } sub dataProcessing { my($self, $refproc) = @_; $self->{'testdata'}= $refproc; } sub printrec { my($self, $data, $size) = @_; my($lastkey,$isendrec); my($lev)= 0; my($putout)= 1; my($infld) = 1; my($subkeylev) = -1; return if ($size<=0); ## HACK! drop non-Dmelanogaster, and lethals? # return if ( $data =~ /GSYM\|[A-z]+\\/ ); ## GSYM|Dsim\&bgr # return if ( $data =~ /GSYM\|l\(/ ); ## GSYM|l( # %fldkeys= %{$self->{'fldkeys'}}; # %subkeys= %{$self->{'subkeys'}}; my(@d) = split(/\n/,$data); foreach (@d) { if (/^\s*\#/) { ## comment line print OUTF "$_\n" if ($putout>0); if ($lev!=0 && /# EOR/) { print STDERR "Error nesting records: EOR , lev=$lev\n"; $lev= 0; } } elsif ( /^\{/ ) { $lev++; print OUTF "$_\n" if ($putout>0); } elsif ( /^\}/ ) { $lev--; $isendrec=1 if ($lev==0); print OUTF "$_\n" if ($putout>0); if ($lev == $subkeylev && $putout<0) { $putout= 1; $subkeylev= -1; } } ##elsif (/^\S/ && $lev == 0) { ## start of new main record ! ## print STDERR "Start of new main record\n"; ## } elsif (!/\S/) { } elsif (/^\s*\|/) { ## fld continuation line print OUTF "$_\n" if ($putout>0 && $infld); } else { my($key)= $_; $key =~ s=\|.*==; $lastkey= $key; my $ok= 0; $infld= 1; ##?? if ( scalar(%{$self->{'subkeys'}}) ) { if (${$self->{'subkeys'}}{$key} > 0) { ## print STDERR "$key = ".$subkey{$key}."\n" if $debug; $putout= 1; $ok= 1; $infld= 1; } elsif ($putout > 0 && ${$self->{'subkeys'}}{$key} < 0) { ## print STDERR "$key = ".$subkey{$key}."\n" if $debug; $putout= -1; $ok= -1; $subkeylev= $lev; } } if ($ok==0 && scalar(%{$self->{'fldkeys'}}) ) { if (${$self->{'fldkeys'}}{$key} > 0) { $ok= 1; $infld= 1; } elsif (${$self->{'fldkeys'}}{$key} < 0) { $ok= -1; $infld= 0; } elsif (${$self->{'fldkeys'}}{all} > 0) { $ok= 1; $infld= 1; } elsif (${$self->{'fldkeys'}}{all} < 0) { $ok= -1; $infld= 0; } } print OUTF "$_\n" if ($ok>=0 && $putout>0); } } } sub buildrec { my($self, $data, $size, $dontpack) = @_; my($lastkey,$lastuncodedkey,$lastfld,$isendrec,$subr,$fldmeth); my($lev)= 0; my($putout)= 1; my($infld) = 1; my($subkeylev) = -1; my $mainrec= undef; # my ($ok); return $mainrec if ($size<=0); my $testproc= $self->{'testdata'}; if (ref($testproc) =~ /CODE/) { my $result= &{$testproc}($data); return $mainrec if ($result<0); } my @recs= (); # %fldkeys= %{$self->{'fldkeys'}}; # %subkeys= %{$self->{'subkeys'}}; my(@d) = split(/\n/,$data); foreach (@d) { if (/^\s*\#/) { ## comment line if ($lev!=0 && /# EOR/) { print STDERR "Error nesting records: EOR , lev=$lev\n"; $lev= 0; } } elsif ( /^\{/ ) { $lev++; my $subflag= ${$self->{'subkeys'}}{$lastkey}; if ($subflag) { if ($subflag>0) { $putout= 1; } elsif ($putout>0) { $putout= -1; $subkeylev= $lev-1; } } elsif ($putout>0) { $putout= -1; $subkeylev= $lev-1; } if ($putout>0) { my $newr= $self->newRecord($lastkey); if ($subr) { ## $self->addSubrec($subr, $newr); ##? bad - need to add after $newr is full? push(@recs,$subr); $subr= $newr; } else { $mainrec= $newr; $subr= $mainrec; #? push(@recs,$subr); # not for main - see pop() } } } elsif ( /^\}/ ) { $lev--; $isendrec=1 if ($lev==0); if ($putout>0 && !$isendrec) { if (scalar(@recs)) { my $newr= $subr; $subr= pop(@recs) ; $self->addSubrec($subr, $newr); ## see above } } if ($lev == $subkeylev && $putout<0) { $putout= 1; $subkeylev= -1; } if ($lev == 1) { $putout= 1; } # bug fix !? } ##elsif (/^\S/ && $lev == 0) { ## start of new main record ! ## print STDERR "Start of new main record\n"; ## } elsif (!/\S/) { } elsif (/^\s*\|/) { ## fld continuation line if ($infld && $putout>0) { my $key= $lastuncodedkey; ## DANG - lastkey IS RECODED !!!!!!!! my $val= $_; $val =~ s/^\s*\|//; my $fldflag= ${$self->{'fldkeys'}}{$key}; if (ref( $fldflag) =~ m/CODE/) { ($key,$val)= &{$fldflag}($key,$val,2); # $ok= 1 if ($key); } if ($key) { my @rec= @{$subr->{'vec'} || []}; ## $lastfld is local var !! $self->appendField( $rec[$#rec], $val); $subr->{'vec'}= \@rec; ##?? make sure its there? } } } else { my($key,$val) = split(/\|/,$_,2); my $ok= 1; # default print $infld= 0; ## 1; ##?? $lastuncodedkey= $key; ## BEFORE RECODE !!!!!!! my $subflag= ${$self->{'subkeys'}}{$key}; if ($subflag) { $ok= -1; # never add as field? do this tag in addSubrec if ($subflag>0) { $putout= 1; } #$ok= 1; ## elsif ($subflag<0) { $putout= -1; $ok= -1; $subkeylev= $lev; } else { $putout= -1; $ok= -1; $subkeylev= $lev; } } else { my $fldflag= ${$self->{'fldkeys'}}{$key}; if ($fldflag) { if (ref( $fldflag) =~ m/CODE/) { ($key,$val)= &{$fldflag}($key,$val); $ok= ($key) ? 1 : 0 ; } elsif ($fldflag<0) { $ok= -1; } else { $ok= 1; } } else { my $fldall= ${$self->{'fldkeys'}}{all}; if ($fldall) { if ($fldall > 0) { $ok= 1; $infld= 1; } elsif ($fldall < 0) { $ok= -1; $infld= 0; } } } } if ($ok>0 && $putout>0) { $lastfld= $self->addField( $subr, $key, $val, $dontpack); $self->setId( $val, $subr) if ($key eq 'ID'); $infld= 1; } $lastkey= $key; } } return $mainrec; } sub readId { my($self, $id, $idtag) = @_; print "reading $id\n" if $debug; $idtag= '[A-Za-z]{4}' unless($idtag); my $idnum= $1 if ($id =~ m/$idtag(\d+)/); my $alib= $self->{inlib}; my $aidx= $self->{inidx}; my( $record); my $idloc = $idnum * $recsize; seek($aidx, $idloc, 0); read($aidx, $record, $recsize); my($at, $size) = unpack("LL", $record); # print "index of $id is $at: $size >> seek $idloc\n" if $debug; my $data; seek($alib, $at, 0); read($alib, $data, $size); my $outf= $self->{alib}; my $oidx= $self->{aidx}; if ($outf) { $at= tell($outf); print $outf $data; my $e2= tell($outf); $size= $e2 - $at; ## test size == size2 !? ##print $outf $endOfRecord; ##< is this now part of rec size? if ($oidx) { $record= pack("LL", $at, $size); $idloc = $idnum * $recsize; # print "new index of $id is $at2: $size2 >> seek $idloc\n" if $debug; seek($oidx, $idloc, 0); print $oidx $record; } } return $data; #? } # get field value(s) given $data string, field tag, and optional $dindex into $data sub getFieldValue { my($self, $data, $fldtag, $dindex) = @_; my ($at, $e, $x); ## get any more values my $pat= "\n$fldtag\|"; $dindex= 0 unless($dindex>0); $at= index($data, $pat, $dindex); my $val= ''; if ($at>=0) { $e= $at + length($pat); $at= $e; ## skip fldtag| while (($x= index($data,"\n", $e)) > 0) { if (substr($data,$x+1,1) eq '|') { $e = $x+1; } else { $e= $x; last; } } $val= substr($data, $at, $e-$at); $val =~ s/\n\|/\n/g; $dindex= $e; } else { $dindex= -1; } return ($val, $dindex); } # extract given ID's to new file sub extractrecs { my($self, $alib, $ridlist, $outfile, $fbidtag) = @_; my $aidx= $alib . '.idx'; my @ids= @$ridlist; unless(scalar(@ids)) { warn "No id's specified to extract"; return -1; } my($inlib,$inidx)= $self->open( $alib); unless($inlib && $inidx) { warn "Can't read from $alib"; return -1; } $outfile= $inlib.".2" unless($outfile); my($olib,$oidx)= $self->create($outfile); foreach (@ids) { $self->readId($_, $fbidtag); } $self->close(); } # read thru acode lib for random ID's # and process them sub randomrecs { my($self, $processproc, $maxn, $maxid, $minid) = @_; $maxn= 100 unless defined($maxn); $minid= 1 unless defined($minid); $maxid= 90000 unless defined($maxid); ## ? is FBgn max id ~ 92395 my(%gotid); my $alib= $self->{inlib}; my $aidx= $self->{inidx}; my $serr= seek($aidx,0,2); my $aidxlen= tell($aidx); ## print STDERR "Acodes::randomrecs: aidxlen=$aidxlen aidx=$aidx seekerr=$serr processrec=".ref($processproc)."\n" ## if ($debug); return unless ($aidxlen>0); my $recsize = length(pack("LL", 1, 500)); # store as unsigned long, unsigned long my ($data, $record, $nred); my($at, $size)= (0,0); my $nerr; for (my $i= 0; $i < $maxn; ) { my $id= int(rand($maxid)) + $minid; ## $id= sprintf("%07d",$id); next if ($gotid{$id}); $gotid{$id}= 1; $record= ''; my $idloc = $id * $recsize; next if ($idloc>$aidxlen); seek($aidx, $idloc, 0); $nred= read($aidx, $record, $recsize); if ($nred==0) { ## print STDERR "error reading index of $id is $at: $size >> seek $idloc\n"; $nerr++; return if ($nerr>100); next; } ($at, $size) = unpack("LL", $record); next if ($size <= 0 || $at < 0); ## print STDERR "index of $id is $at: $size >> seek $idloc\n" if $debug; seek($alib, $at, 0); read($alib, $data, $size); if ($processproc) { &{$processproc}($self,$data,$size); } else { $self->printrec($data,$size); } $i++; $nerr= 0; } } sub allrecs { my($self, $processproc) = @_; my ($data, $record, $size); my $alib= $self->{inlib}; my $aidx= $self->{inidx}; my($at, $i)= (0,0); $/= "# EOR\n"; while ($data= <$alib>) { $size= length($data); if ($processproc) { &{$processproc}($self,$data,$size); } else { $self->printrec($data,$size); } $i++; } $/= "\n"; ## this re-orders records according to ID# - change? to read recs in data file order? # my $recsize = length(pack("LL", 1, 500)); # store as unsigned long, unsigned long # while (read($aidx, $record, $recsize)) { # ($at, $size) = unpack("LL", $record); # next if ($size <= 0 || $at < 0); # seek($alib, $at, 0); # read($alib, $data, $size); # if ($processproc) { &{$processproc}($self,$data,$size); } # else { $self->printrec($data,$size); } # $i++; # } } sub indexLib { my($alib,$fbidtag) = @_; my $aidx= $alib . '.idx'; print STDERR "index acode library $alib\n"; local(*ALIB,*AIDX); CORE::open(ALIB,"<$alib") || die "Can't read $alib"; CORE::open(AIDX,">$aidx") || die "Can't write $aidx"; my $recid= undef; my $lev= 0; my $recstart= 0; my $curbyteindex= 0; $recsize= length(pack("LL", 1, 1)); ## 8 * 2 = 16 my ($fbid, $size, $record, $lastkey, $idloc); my @id2= (); while () { chomp(); ## newline my $isendrec= 0; my($key,$val) = split(/\|/,$_,2); if (/^\|/) { $key= $lastkey; } if (/^\s*\#/) { ## comment line if ($lev!=0 && /# EOR/) { print STDERR "Error nesting records: EOR $fbid, lev=$lev\n"; $lev= 0; } } elsif (/^\{/) { $lev++; } elsif (/^\}/) { $lev--; $isendrec=1 if ($lev==0); } elsif (/^\S/ && $lev == 0) { ## start of new main record ? $recstart= $curbyteindex; } elsif ($lev==1 && $key =~ /^[A-Z]*ID$/ && $val =~ /^$fbidtag(\d+)/ ) { $fbid= $val; $recid= $1; } elsif ($lev==1 && $key =~ /^[A-Z]*ID2$/ && $val =~ /^$fbidtag/ ) { if( $val =~ /$fbidtag.+$fbidtag/ ) { push(@id2, split(/\W+/,$val)); } else { push(@id2, $val); } } if ($isendrec && defined($recid)) { $size= tell(ALIB) - $recstart; $record= pack("LL", $recstart, $size); # store as unsigned long, unsigned long if (scalar(@id2)) { foreach my $d2 (@id2) { if ($d2 =~ /^$fbidtag(\d+)/) { $idloc = $1 * $recsize; seek(AIDX, $idloc, 0); # do not overwrite - if already present from primary ID my $data= undef; read(AIDX, $data, $recsize); print AIDX $record if( $data<=0 || $data eq undef ); print STDERR "ID2 $d2 index for $fbid/$recid\n" if $debug; } } @id2= (); } $idloc = $recid * $recsize; print STDERR "index of $fbid/$recid is $recstart: $size >> seek $idloc\n" if $debug; seek(AIDX, $idloc, 0); print AIDX $record; $recid= undef; } $lastkey= $key; $curbyteindex= tell(ALIB); } CORE::close(ALIB); CORE::close(AIDX); print STDERR "done indexing data\n"; } sub splitsubr { my($self, $data, $mainacode, $subacode) = @_; my($lastkey,$submain,$lastfld,$lastsubkey,$isendrec,$subr,$fldmeth); my($lev)= 0; my($infld) = 1; my($subkeylev) = -1; my $mainrec= undef; my $allok= 1; # $self->{keepallflds} || 1; ##? always 1? return $mainrec unless (length($data)>2); my @recs= (); # %fldkeys= %{$self->{'fldkeys'}}; # %subkeys= %{$self->{'subkeys'}}; ##? is this bad if ($debug) { print STDERR "subkeys:\n"; while ( my($key,$val)= each(%{$self->{'subkeys'}}) ) { print STDERR "$key => $val\n";} } my $mycode= $mainacode; my(@d) = split(/\n/,$data); foreach (@d) { if (/^\s*\#/) { ## comment line if ($lev!=0 && /# EOR/) { print STDERR "Error nesting records: EOR , lev=$lev\n"; $lev= 0; } } elsif ( /^\{/ ) { $lev++; $isendrec= 0; my $subflag= ${$self->{'subkeys'}}{$lastkey}; ##? $lastsubkey if ($subflag eq 'split') { if ($subr) { push(@recs,$subr);} ## save current $mycode= $subacode; my $newr= $mycode->newRecord($lastkey); $submain= $newr; $subr= $newr; $subkeylev= $lev-1; } else { my $newr= $mycode->newRecord($lastkey); if ($subr) { ## $mycode->addSubrec($subr, $newr); ## not yet! push(@recs,$subr); $subr= $newr; } else { $mainrec= $newr; $subr= $mainrec; push(@recs,$subr); } } } elsif ( /^\}/ ) { $lev--; if (($lev == $subkeylev) && $submain) { ## ($mycode eq $subacode) $mycode->putRec($submain); ## $id?, $retefld? print STDERR "put subrec ".$subacode->{id}."\n" if $debug; $mycode= $mainacode; $subr= ''; $submain= ''; $subkeylev= -1; } if (scalar(@recs)) { my $newr= $subr; $subr= pop(@recs) ; $mycode->addSubrec($subr, $newr) if ($newr); ## see above } $isendrec=1 if ($lev==0); if ($isendrec) { $mainacode->putRec($mainrec); $mainrec= undef; $subr= undef; @recs= (); print STDERR "put1 mainrec ".$mainacode->{id}."\n" if $debug; } } elsif (!/\S/) { } elsif (/^\s*\|/) { ## fld continuation line if ($infld) { my $key= $lastkey; my $val= $_; $val =~ s/^\s*\|//; # if (ref( $fldmeth) =~ m/CODE/) { # ($key,$val)= &{$fldmeth}($lastkey,$val,2); # $ok= 1 if ($key); # } if ($key) { my @rec= @{$subr->{'vec'} || []}; ## $lastfld is local var !! $mycode->appendField( $rec[$#rec], $val); $subr->{'vec'}= \@rec; ##?? make sure its there? } } } else { my($key,$val) = split(/\|/,$_,2); my $ok= $allok; $infld= 0; ## 1; ##?? # $fldmeth= ${$self->{'fldkeys'}}{$key}; # if ($fldmeth) { # if (ref( $fldmeth) =~ m/CODE/) { # ($key,$val)= &{$fldmeth}($key,$val); # $ok= 1 if ($key); # } # elsif ($fldmeth<0) { $ok= -1; } # else { $ok= 1; } # } # else { if (1) { ## dang, have REF subrec and REF field !?? my $subflag= ${$self->{'subkeys'}}{$key}; if ($subflag) { $ok= 0; ## put separately ! $lastsubkey= $key; } } if ($ok>0) { $lastfld= $mycode->addField( $subr, $key, $val, 1); $mycode->setId( $val, $subr) if ($key eq 'ID'); $infld= 1; } $lastkey= $key; } } unless ($isendrec) { $mainacode->putRec($mainrec); $mainrec= undef; print STDERR "put2 mainrec ".$mainacode->{id}."\n" if $debug; } ## return $mainrec; ##? } 1; __END__