#!/bin/sh # dgblast-ll.sh # blast a bunch of genomes # sample script for bioinformatics data-grid methods (parallelize by data splitting) # using ibm LoadLeveler blbin=$HOME/bio/blast/bin ncbibin=$HOME/bio/ncbi/bin mpiblast=$HOME/bio/iuparblast/ncbi/bin SHARED=$HOME/scratch ## huge dyak results, 10x others : all due to chrU repetitive parts? #ssplist="dana dere dgri dpse dvir " #ssplist="dpse" #ssplist="dmoj" #ssplist="dwil" #ssplist="dmel" #ssplist="dsim" #ssplist="dsec" #ssplist="dper" ssplist="dyak" qsplist="dana dere dgri dmel dmoj dper dpse dsec dsim dvir dwil dyak" ## redo ones with bad-blast errors; or all? #qsplist="dvir dsec dgri" for sspp in $ssplist { for qspp in $qsplist { [ $qspp == $sspp ] && continue; $blbin/dgfasplit.pl -n 10 -in $SHARED/chrs/$qspp/${qspp}_*.fa for qpart in $SHARED/chrs/$qspp/${qspp}_*.fa-?? { qp=`echo $qpart| sed -e's/.*.fa.//;'` jobn=run-${qspp}-${sspp}.$qp outn=$SHARED/out/chrs/${qspp}-${sspp}c.$qp cat > $jobn <