This file describes how we made the browser database on the
Dec 12, 2000 freeze.



CREATING DATABASE AND STORING mRNA/EST SEQUENCE AND AUXILIARRY INFO 

o - ln -s /projects/hg3/gs.7/oo.29 ~/oo
o - Create the database.
     - ssh cc94
     - Enter mysql via:
           mysql -u hguser -phguserstuff
     - At mysql prompt type:
	create database hg7;
	quit
     - make a semi-permanent alias:
        alias hg7 mysql -u hguser -phguserstuff -A hg7
o - Make sure there is at least 5 gig free on cc94:/usr/local/mysql 
o - Store the mRNA (non-alignment) info in database.
     - hgLoadRna new hg7
     - hgLoadRna add hg7 /projects/hg2/mrna.123/mrna.fa /projects/hg2/mrna.123/mrna.ra
     - hgLoadRna add hg7 /projects/hg2/mrna.123/est.fa /projects/hg2/mrna.123/est.ra
    The last line will take quite some time to complete.  It will count up to
    about 3,200,000 before it is done.

REPEAT MASKING 

o - Lift up the repeat masker coordinates as so:
      - ssh kks00
      - cd ~/gs/oo.29
      - edit jkStuff/liftOut.sh and update ooGreedy version number
      - source jkStuff/liftOut.sh

o - Copy over RepeatMasking for chromosomes 21 and 22:
      - cp /projects/cc/hg/gs.4/oo.18/21/chr21.fa.out ~/gs/oo.29/21

o - Load RepeatMasker output into database:
      - cd /projects/hg3/gs.7/oo.29
      - hgLoadOut hg7 ?/*.fa.out ??/*.fa.out
        (Ignore the "Strange perc. field warnings.  Maybe mention them
	 to Arian someday.)

This will keep things going until the sensitive RepeatMasker run comes in
from Victor or elsewhere.  When it does:

o - Cd ~/oo/bed/rmskVictor
o - Unzip and process Victor's stuff with doMerge.sh and victor_merge_rm_out1.pl.
o - Remove fragments as so:
       cd out_oo29
       rm */ctg*/ctg*.*.fa.out

o - Copy in Victor's repeat masking runs over the contig runs in ~/oo/*/ctg*
       rm ~/oo.29/contigOut.zip
       zip ~/oo.29/contigOut.zip */ctg*/ctg*.fa.out
       cd ~/oo.29
       unzip contigOut.zip

o - Lift these up as so:
      - ssh kks00
      - cd ~/gs/oo.29
      - source jkStuff/liftOut2.sh

o - Load RepeatMasker output into database again:
      - ssh cc94
      - cd /projects/hg3/gs.7/oo.29
      - hgLoadOut hg7 ?/*.fa.out ??/*.fa.out
        (Ignore the "Strange perc. field warnings.  Maybe mention them
	 to Arian someday.)


STORING O+O SEQUENCE AND ASSEMBLY INFORMATION

o - Create packed chromosome sequence files and put in database
     - hg7 < ~/src/hg/lib/chromInfo.sql
     - cd /projects/hg3/gs.7/oo.29
     - hgNibSeq hg7 /projects/hg3/gs.7/oo.29/nib ?/chr*.fa ??/chr*.fa


o - Store o+o info in database.
     - cd /projects/hg3/gs.7/oo.29
     - jkStuff/liftGl.sh ooGreedy.99.gl
     - hgGoldGapGl hg7 /projects/hg3/gs.7 oo.29 
     - cd /projects/hg3/gs.7
     - hgClonePos hg7 oo.29 ffa/sequence.inf /projects/hg3/gs.7
       (Ignore warnings about missing clones - these are in chromosomes 21 and 22)
       (Had to patch sequence.inf and fin/fa from Genbank for
       missing AL109841, AL132768, AL161945 in chromosome 20)
     - hgCtgPos hg7 oo.29

o - Make and load GC percent table
     - login to cc94
     - cd /projects/hg3/gs.7/oo.29/bed
     - mkdir gcPercent
     - cd gcPercent
     - hg7 < ~/src/hg/lib/gcPercent.sql
     - hgGcPercent hg7 ../../nib



MAKING AND STORING mRNA AND EST ALIGNMENTS

o - Load up the local disks of the cluster with refSeq.fa, mrna.fa and est.fa
    from /projects/hg2/mrna.123  into /var/tmp/hg/h/mrna

o - Use BLAT to generate refSeq, mRNA and EST alignments as so:
      ssh cc01
      cd ~/oo/jkStuff
      ls -1S /projects/hg3/gs.7/oo.29/?/ctg*/*.fa /projects/hg3/gs.7/oo.29/??/ctg*/*.fa > ctg.lst
      source setupPostBlat.sh
    check with checkPostBlat.sh.  Try uncommenting some of it next time....
      
o - Process refSeq mRNA and EST alignments into near best in genome.
      ssh kks00
      cd /projects/hg3/gs.7/oo.29/psl
      source ../jkStuff/postSort.sh
    (If the est alignments exceed 2 gig use ../jkStuff/postSortBig.sh instead)

o - Load mRNA alignments into database.
      - ssh cc94
      cd /projects/hg3/gs.7/oo.29/psl/mrna
      foreach i (*.psl)
          mv $i $i:r_mrna.psl
      end
      hgLoadPsl hg7 *.psl
      cd ..
      hgLoadPsl hg7 all_mrna.psl

o - Load EST alignments into database.
      ssh cc94
      cd /projects/hg3/gs.7/oo.29/psl/est
      foreach i (*.psl)
            mv $i $i:r_est.psl
      end
      hgLoadPsl hg7 *.psl
      cd ..
      hgLoadPsl hg7 all_est.psl

o - Create subset of ESTs with introns and load into database.
      - ssh kks00
      cd ~/oo
      source jkStuff/makeIntronEst.sh
      - ssh cc94
      cd ~/oo/psl/intronEst
      hgLoadPsl hg7 *.psl

PRODUCING KNOWN GENES (RefSeq) (done)

o - Go to the Entrez browser at http://www.ncbi.nlm.nih.gov/Entrez/
    and select Nucleotide in the search box.  Go to Limits and select
    'exclude all of the above', Molecule: mRNA, Only from: RefSeq,
    and then put "Homo sapiens" [organism] in the search box and hit 
    go.  Tweak things so can save it to /projects/hg3/refseq.123/refseq.gb
    in GenBank flat file format.
o - Download ftp://ncbi.nlm.nih.gov/refseq/LocusLink/loc2acc and
    mim2loc to /projects/hg3/refseq.123
o - Similarly download refSeq proteins in fasta format to refSeq.pep
o - Align these by processes described under mRNA/EST alignments above.
    pslSortAcc head chrom temp rsVsOo27Mixed.psl
    cd ~/oo/psl
    pslCat -nohead refSeq/*.psl | grep NM_ >/projects/hg3/refseq.123/known.oo29.psl
o - Produce refGene, refPep, refMrna, and refLink tables as so:
     ssh cc94
     cd /projects/hg3/refseq.123
     hgRefSeqMrna hg7 refSeq.fa refSeq.ra known.oo29.psl loc2ref refSeqPep.fa mim2loc
    



SIMPLE REPEAT TRACK

o - Execute the following (which will take about 
    8 hours).
	mkdir ~/oo/bed/simpleRepeat
	cd ~/oo/nib
	foreach i (*.nib)
	    echo processing $i
	    trfBig $i ~/oo/bed/simpleRepeat/$i -bed
	end
	cd ~/oo/bed/simpleRepeat
	rm *.nib
	cat c*.bed > all.bed
    Then log onto cc94 and
        cd ~/oo/bed/simpleRepeat 
	hg7 < ~/src/hg/lib/simpleRepeat.sql
    At the mysql> prompt type
        load data local infile 'all.bed' into table simpleRepeat
	quit


PRODUCING MOUSE ALIGNMENTS

o - Make sure that contig/ctg*.fa.masked files exist already.
    (May need to go to ~/oo and unzip contigFaMasked.zip.
o - Mask simple repeats in addition to normal repeats with:
        mkdir ~/oo/jkStuff/trfCon
	cd ~/oo/allctgs
	/bin/ls -1 | grep -v '\.' > ~/oo/jkStuff/trfCon/ctg.lst
        cd ~/oo/jkStuff/trfCon
	mkdir err log out
    edit ~/oo/jkStuff/trf.gsub to update gs and oo version
	gensub ctg.lst ~/oo/jkStuff/trf.gsub
	mv gensub.out trf.con
	condor_submit trf.con
    wait for this to finish.  Check as so
        cd ~/oo
	source jkStuff/checkTrf.sh
    there should be no output.
o - Load up the cluster with masked copies of the genome.
       ssh kks00
       cd ~/oo/allctgs
       zip -j -1 ../ctgFaTrf.zip */ctg*.fa.trf
       ssh cc01
    Do the following to unpack
       cd ~/cluster
    edit parOne to read
       mkdir /var/tmp/hg/gs.7/oo.29
       mkdir /var/tmp/hg/gs.7/oo.29/tanMaskNib
    and then
       parAll
    unpack as so:
       cd ~/oo
       ccCp ctgFaTrf.zip  /var/tmp/hg/gs.7/oo.29/ctgFaTrf.zip
    edit parOne to read
       cd /var/tmp/hg/gs.7/oo.29/tanMaskNib
       unzip ../ctgFaTrf.zip >/dev/null
    and then
       parAll
    Double check that all data is in place.
o - Do mouse/human alignments.
       ssh cc01
       cd ~/mouse/vsOo29
       mkdir err out log psl
       ls -1S /var/tmp/hg/gs.7/oo.29/tanMaskNib/*.fa.trf > human.lst
       ls -1S /projects/hg3/mouse/ncbiTrimmed/Mm*.fa > mouse.lst
     Copy over aliMouse.gsub from previous version and edit paths to
     point to current assembly.
       gensub2 mouse.lst human.lst aliMouse.gsub allCon
     Break into many files by moving the header part of allCon to a file
     called headCon and then
       splitFile allCon 21000 m -head=headCon
       foreach i (m??)
           mv $i $i.con
	end
     Submit a couple of the resulting mNN.con files  a day - trying to 
     keep condor queue size between 1000 and 10000.  There's 3000 jobs
     in each file.
     [This part went well.  As far as I can tell all 50,000 jobs finished
     the first time without a glitch.]
o - Check alignments as so:
       ssh kks00
       cd ~/mouse/vsOo29
       gensub2 mouse.lst human.lst fixMouse.gsub fixMouse.sh
       chmod a+x fixMouse.sh
       fixMouse.sh | tee fixMouse.out
o - If any output in fixMouse.out then resubmit jobs as so:
       ssh cc01
       cd ~/mouse/vsOo29
       mv log log.old; mkdir log
       mv err err.old; mkdir err
       mv out out.old; mkdir out
       condor_submit fix.con
o - Sort alignments as so 
       cd ~/mouse/vsOo29
       pslCat -dir -check psl | liftUp -type=.psl stdout ~/oo/jkStuff/liftAll.lft warn stdin | pslSortAcc nohead blatMouse temp stdin
       [This did not go well, though for oo.27 it was a breeze.  This
       time I got errors at fairly random lines.  One run it would
       report an error at line 200,000 and the next at error 2,000,000.
       I didn't see a bug anywhere.  I resorted to:
           ssh kk002
	   cd /cluster/work/mouse/vsOo29
	   pslCat -out=rawCtg.psl -dir -check ~/mouse/vsOo29/psl
	   pslSortAcc nohead ctg /projects/hg2/temp rawCtg.psl
	   cd ctg
	   foreach i (*.psl)
	       echo $i
	       mv $i xxx
	       pslUnpile -maxPile=150 xxx $i
	   rm xxx
	   cd ..
	   liftUp chrom.psl ~/oo/jkStuff/liftAll.lft warn ctg/*.psl
	   end
       which went ...]
o - Get rid of big pile-ups due to contamination as so:
       cd blatMouse
       foreach i (*.psl)
           echo $i
           mv $i xxx
           pslUnpile -maxPile=150 xxx $i
       rm xxx
       end
o - Remove long redundant bits from read names by making a file
    called subs.in with the following line:
        gnl|ti^ti
    and running the commands
        cd ~/mouse/vsOo29/blatMouse
	subs -e -c ^ *.psl > /dev/null
o - Rename to correspond with tables as so and load into database:
       ssh cc94
       cd ~/mouse/vsOo29/blatMouse
       foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_blatMouse.psl
       end
       hgLoadPsl hg7 *.psl
o - load sequence into database as so:
	ssh cc94
	hgLoadRna addSeq '-abbr=gnl|' hg7 /projects/hg3/mouse/ncbiTrimmed/fasta.*.fa
    This will take quite some time.  Perhaps an hour .
o - Produce 'best in genome' filtered version:
        ssh kks00
	cd ~/mouse/vsOo29
	pslSort dirs blatMouseAll.psl temp blatMouse
	pslReps blatMouseAll.psl bestMouseAll.psl /dev/null -singleHit -minCover=0.3
	pslSortAcc nohead bestMouse temp bestMouseAll.psl
	cd bestMouse
        foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_bestMouse.psl
        end
o - Load best in genome into database as so:
	ssh cc94
	cd ~/mouse/vsOo29/bestMouse
        hgLoadPsl hg7 *.psl

PRODUCING VERTEBRATE mRNA ALIGMENTS

Here you align vertebrate mRNAs against the masked genome on the
cluster you set up during the previous step.

o - Make sure that gbpri, gbmam, gbrod, and gbvert are downloaded from Genbank into
    /projects/hg2/genbank.125
o - Process these out of genbank flat files as so:
       ssh kks00
       cd /projects/hg2/genbank.125
       gunzip -c gbpri*.gz gbmam*.gz gbrod*.gz gbver*.gz | gbToFaRa ~/hg/h/xenoRna.fil ../mrna.125/xenoRna.fa ../mrna.125/xenoRna.ra ../mrna.125/xenoRna.ta stdin
       cd ../mrna.125
       compress -c xenoRna.fa > xenoRna.fa.Z
o - Copy onto the cluster as so:
       ssh cc01
       ccCp /projects/hg2/mrna.125/xenoRna.fa.Z /var/tmp/hg/h/xenoRna.fa.Z
       cd ~/cluster
    edit parOne to read
       uncompress /var/tmp/hg/h/xenoRna.fa.Z
    and then execute
       parAll
    Double check by editing serOne to read
       ls -l /var/tmp/hg/h/xenoRna.fa
    and do
       serAll | tee foo
    and inspect result for consistency.
o - Start the cluster run as so:
       ssh cc01
       cd ~/oo/jkStuff
       mkdir post.xenoRna
       cd post.xenoRna
       mkdir err log out psl
    Copy over all.gsub from previous version and then
       ls -1 /var/tmp/hg/h/xenoRna.fa > xeno.lst
       ls -1 /var/tmp/hg/gs.7/oo.29/tanMaskNib/*.fa.trf > homo.lst
       gensub2 xeno.lst homo.lst all.gsub all.con
       condor_submit all.con
    Wait for this to finish (take just a little over 24 hours) and
    check results, restarting jobs as needed.
o - Sort alignments as so:
       ssh kks00
       cd ~/oo/jkStuff/post.xenoRna
       pslSort dirs raw.psl /projects/hg2/temp psl
       pslReps raw.psl cooked.psl /dev/null -minAli=0.25
       liftUp contig.psl ../liftAll.lft warn cooked.psl
       pslSortAcc nohead chrom /projects/hg2/temp contig.psl
       pslCat -dir chrom > xenoMrna.psl
       rm -r chrom raw.psl cooked.psl contig.psl
o - Load into database as so:
       ssh cc94
       cd ~/oo/jkStuff/post.xenoRna
       hgLoadPsl hg7 xenoMrna.psl -tNameIx
       cd /projects/hg2/mrna.125
       hgLoadRna add hg7 /projects/hg2/mrna.125/xenoRna.fa xenoRna.ra





PRODUCING FISH ALIGNMENTS

o - Download sequence from ... and put it in 
	ssh kks00
       /projects/hg3/fish/seq15jun2001/bqcnstn_0106151510.fa
    then
	ln -s /projects/hg3/fish ~/fish
    split this into multiple files and compress original with
	cd ~/fish/seq15jun2001
        faSplit sequence bq* 100 fish
	compress bq*
o - Do fish/human alignments.
       ssh cc01
       cd ~/fish
       mkdir vsOo29
       cd vsOo29
       mkdir err out log psl
       ls -1S /var/tmp/hg/gs.7/oo.29/tanMaskNib/*.fa.trf > human.lst
       ls -1S /projects/hg3/fish/seq15jun2001/*.fa > fish.lst
     Copy over aliFish.gsub from previous version and edit paths to
     point to current assembly.
       gensub2 fish.lst human.lst aliFish.gsub allCon
     Break into many files by moving the header part of allCon to a file
     called headCon and then
       splitFile allCon 21000 m -head=headCon
       foreach i (m??)
           mv $i $i.con
	end
     Submit a several of the resulting mNN.con files  a day - trying to 
     keep condor queue size between 1000 and 10000.  There's 3000 jobs
     in each file.
o - Check alignments as so:
       ssh kks00
       cd ~/fish/vsOo29
       edit fixFish.sh as necessary to update versions.
       gensub2 fish.lst human.lst fixFish.gsub fixFish.sh
       chmod a+x fixFish.sh
       fixFish.sh | tee fixFish.out
o - Sort alignments as so 
       cd ~/fish/vsOo29
       pslCat -dir -check psl | liftUp -type=.psl stdout ~/oo/jkStuff/liftAll.lft warn stdin | pslSortAcc nohead chrom temp stdin
o - Rename to correspond with tables as so and load into database:
       ssh cc94
       cd ~/fish/vsOo29/chrom
       foreach i (*.psl)
	   set r = $i:r
           mv $i ${r}_blatFish.psl
       end
       hgLoadPsl hg7 *.psl


PRODUCING GENSCAN PREDICTIONS
    
o - Produce contig genscan.gtf genscan.pep and genscanExtra.bed files like so:
     ssh roar
     cd ~/oo.29
     source jkStuff/gsBig.sh &
    Wait about 4 days 1 hours for these to finish.

o - Convert these to chromosome level files as so:
     cd ~/oo.29
     mkdir bed/genscan
     liftUp bed/genscan/genscan.gtf jkStuff/liftAll.lft warn ?/ctg*/genscan.gtf ??/ctg*/genscan.gtf
     liftUp bed/genscan/genscanSubopt.bed jkStuff/liftAll.lft warn ?/ctg*/genscanSub.bed ??/ctg*/genscanSub.bed
     cat ?/ctg*/genscan.pep ??/ctg*/genscan.pep > bed/genscan/genscan.pep

o - Load into the database as so:
     ssh cc94
     cd ~/oo.29/bed/genscan
     ldHgGene hg7 genscan genscan.gtf
     hgPepPred hg7 generic genscanPep genscan.pep
     hgLoadBed hg7 genscanSubopt genscanSubopt.bed


      
LOADING IN EXTERNAL FILES

o - Load chromosome bands: (done)
      - login to cc94
      cd /projects/hg3/gs.7/oo.29/bed
      mkdir cytoBands
      cp /projects/cc/hg/mapplots/data/tracks/oo.29/cytobands.bed cytoBands
      hg7 < ~/src/hg/lib/cytoBand.sql
      Enter database with "hg7" command.
      - At mysql> prompt type in:
          load data local infile 'cytobands.bed' into table cytoBand;
      - At mysql> prompt type
          quit

o - Load STSs (done)
     - login to cc94
      cd ~/oo/bed
      hg7 < ~/src/hg/lib/stsMap.sql
      mkdir stsMap
      cd stsMap
      bedSort /projects/cc/hg/mapplots/data/tracks/oo.29/stsMap.bed stsMap.bed
      - Enter database with "hg7" command.
      - At mysql> prompt type in:
          load data local infile 'stsMap.bed' into table stsMap;
      - At mysql> prompt type
          quit

o - Load in Exonerate alignments (done)
      ssh cc94
      cd ~/oo/bed
      mkdir exonerate
      cd exonerate
      wget ftp.ebi.ac.uk/pub/software/ensembl/mouse_vs_ens110.tar.gz
      tar -zxvf mouse*.gz
      cat mouse_gff/*.gff | hgExonerate -elia hg7 exoMouse stdin
       
o - Load in Fiberglass alignments (done)
      ssh cc94
      cd ~/oo/bed
      mkdir fiberglass
      cd fiberglass
      wget http://waldo.wi.mit.edu/~danb/Chr22/all_hits
      hgFiberglass hg7 all_hits

o - Load SNPs into database.
      - ssh cc94
      - cd ~/oo/bed
      - mkdir snp
      - cd snp
      - Download SNPs from ftp://ftp.ncbi.nlm.nih.gov/pub/sherry/gp.oo29.gz
      - Unpack.
        grep RANDOM gp.oo29 > snpTsc.txt
        grep MIXED  gp.oo29 >> snpTsc.txt
        grep BAC_OVERLAP  gp.oo29 > snpNih.txt
        grep OTHER  gp.oo29 >> snpNih.txt
        awk -f filter.awk snpTsc.txt > snpTsc.contig.bed
        awk -f filter.awk snpNih.txt > snpNih.contig.bed
        liftUp snpTsc.bed ../../jkStuff/liftAll.lft warn snpTsc.contig.bed
        liftUp snpNih.bed ../../jkStuff/liftAll.lft warn snpNih.contig.bed
	hgLoadBed hg7 snpTsc snpTsc.bed
	hgLoadBed hg7 snpNih snpNih.bed

o - Load cpgIslands
     - login to cc94
     - cd /projects/hg3/gs.7/oo.29/bed
     - mkdir cpgIsland
     - cd cpgIsland
     - hg7 < ~kent/src/hg/lib/cpgIsland.sql
     - wget http://genome.wustl.edu:8021/pub/gsc1/achinwal/MapAccessions/cpg_dec12.oo27.tar
     - tar -xf cpg*.tar
     - awk -f filter.awk */ctg*/*.cpg.out > contig.bed
     - liftUp cpgIsland.bed ../../jkStuff/liftAll.lft warn contig.bed
     - Enter database with "hg7" command.
     - At mysql> prompt type in:
          load data local infile 'cpgIsland.bed' into table cpgIsland

o - Load Ensembl genes:
     cd ~/oo/bed
     mkdir ensembl
     cd ensembl
     wget http://www.sanger.ac.uk/~birney/all_april_ctg.gtf.gz
     gunzip all*.gz
     liftUp ensembl110.gtf ~/oo/jkStuff/liftAll.lft warn all*.gtf
     ldHgGene hg7 ensGene en*.gtf
o - Load Ensembl peptides:
     (poke around ensembl to get their peptide files as ensembl.pep)
     (substitute ENST for ENSP in ensemble.pep with subs)
     wget ftp://ftp.ensembl.org/pub/current/data/fasta/pep/ensembl.pep.gz
     gunzip ensembl.pep.gz
   edit subs.in to read: ENSP|ENST
     subs -e ensembl.pep
     hgPepPred hg7 generic ensPep ensembl.pep

o - Load Sanger22 genes:
      cd ~/oo/bed
      mkdir sanger22
      cd sanger22
      wget http://www.cs.columbia.edu/~vic/sanger2gbd/data/All_genes_plus.gene+cds.frameV2.noPseudogenes.GBD.gtf.txt
      wget http://www.cs.columbia.edu/~vic/sanger2gbd/data/All_genes_plus.gene+cds.frameV2.GBD.gtf.txt
      ldHgGene hg7 sanger22 *.gtf.txt

o - Load rnaGene table
      - login to cc94
      - cd ~kent/src/hg/lib
      - hg7 < rnaGene.sql
      - cd /projects/hg3/gs.7/oo.29/bed
      - mkdir rnaGene
      - cd rnaGene
      - download data from ftp.genetics.wustl.edu/pub/eddy/pickup/ncrna-oo27.gff.gz
      - gunzip *.gz
      - liftUp chrom.gff ../../jkStuff/liftAll.lft carry ncrna-oo27.gff
      - hgRnaGenes hg7 chrom.gff

o - Load exoFish table
     - login to cc94
     - cd /projects/hg3/gs.7/oo.29/bed
     - mkdir exoFish
     - cd exoFish
     - hg7 < ~kent/src/hg/lib/exoFish.sql
     - Put email attatchment from Olivier Jaillon (ojaaillon@genoscope.cns.fr)
       into /projects/hg3/gs.7/oo.29/bed/exoFish/oo27.ecore1.4jimkent
     - cp oo27.ecore1.4jimkent foo
     - Substitute "chr" for "CHR" in that file by
          subs -e foo >/dev/null
     - Copy in filter.awk from previous version's bed/exoFish dir.
     - Add dummy name column and convert to tab separated with
       awk -f filter.awk foo > exoFish.bed
     - Enter database with "hg7" command.
     - At mysql> prompt type in:
          load data local infile 'exoFish.bed' into table exoFish

o - Mouse synteny track (done)
     - login to cc94.
     - cd ~/kent/src/hg/lib
     - hg7 < mouseSyn.sql
     - mkdir ~/oo/bed/mouseSyn
     - cd ~/oo/bed/mouseSyn
     - Put Dianna Church's (church@ncbi.nlm.nih.gov) email attatchment as
       mouseSyn.txt
     - awk -f format.awk *.txt > mouseSyn.bed
     - delete first line of mouseSyn.bed
     - Enter database with "hg7" command.
     - At mysql> prompt type in:
          load data local infile 'mouseSyn.bed' into table mouseSyn


o - Load Genie predicted genes and associated peptides.
     - cat */ctg*/ctg*.affymetrix.gtf > predContigs.gtf
     - liftUp predChrom.gtf ../../jkStuff/liftAll.lft warn predContigs.gtf
     - ldHgGene hg7 genieAlt predChrom.gtf

     - cat */ctg*/ctg*.affymetrix.aa > pred.aa
     - hgPepPred hg7 genie pred.aa 

     - hg7
         mysql> delete * from genieAlt where name like 'RS.%';
         mysql> delete * from genieAlt where name like 'C.%';

o - Load Softberry genes and associated peptides:
     - cd ~/oo/bed
     - mkdir softberry
     - cd softberry
     - get ftp://www.softberry.com/pub/GP_SCOCT/*
     - ldHgGene hg7 softberryGene chr*.sgp.gff
     - hgPepPred hg7 softberry *.pro
     - hgSoftberryHom hg7 *.pro


FAKING DATA FROM PREVIOUS VERSION
(This is just for until proper track arrives.  Rescues about
97% of data  Just an experiment, not really followed through on).

o - Rescuing STS track:
     - log onto cc94
     - mkdir ~/oo/rescue
     - cd !$
     - mkdir sts
     - cd sts
     - bedDown hg3 mapGenethon sts.fa sts.tab
     - echo ~/oo/sts.fa > fa.lst
     - pslOoJobs ~/oo ~/oo/rescue/sts/fa.lst ~/oo/rescue/sts g2g
     - log onto cc01
     - cc ~/oo/rescue/sts
     - split all.con into 3 parts and condor_submit each part
     - wait for assembly to finish
     - cd psl
     - mkdir all
     - ln ?/*.psl ??/*.psl *.psl all
     - pslSort dirs raw.psl temp all
     - pslReps raw.psl contig.psl /dev/null
     - rm raw.psl
     - liftUp chrom.psl ../../../jkStuff/liftAll.lft carry contig.psl
     - rm contig.psl
     - mv chrom.psl ../convert.psl

LOADING PENN STATE MOUSE REFERENCE ALIGNMENTS
  o Load mouseRef track
     - cd /projects/hg3/mouse/penn
     - zcat alignments.gz | hgRefAlign webb hg7 mouseRef -

  o Load mouseRefHCons track
     - awk -f class-track.awk highly_conserved \
         | hgRefAlign webb hg7 mouseRefHCons -

  o Load mouseRefMDup track
     - awk -f class-track.awk moderately_duplicated \
         | hgRefAlign webb hg7 mouseRefMDup -
     
PATTERN HUNTER MOUSE ALIGNMENTS
  o Load phMouseAli track
     - cd /projects/hg3/mouse/patternHunter
     - zcat chr22.ph.gz | hgRefAlign -n tab hg7 phMouseAli -

TIGR GENE INDEX

  o - Download ftp://ftp.tigr.org/private/HGI_ren/*.tgz into
      ~/oo.29/bed/tgi and then execute the following commands:
          cd ~/oo.29/bed/tgi
	  mv cattleTCs.tgz cowTCs.tgz
	  foreach i (mouse cow human pig rat)
	      mkdir $i
	      cd $i
	      gtar -zxf ../${i}*.tgz
	      gawk -v animal=$i -f ../filter.awk * > ../$i.gff
	      cd ..
	  end
	  mv human.gff human.bak
	  sed s/THCs/TCs/ human.bak > human.gff
	  ldHgGene -exon=TCs hg7 tigrGeneIndex *.gff


PRODUCING PSUEDO_GENE INFO

o - Align with translated blat all human and non-human mRNAs into
    jkStuff/post.xenoRna and jkStuff/post.transRna.  
o - ssh kks00
    cd ~/oo.29/jkStuff/post.transRna/psl
    liftUp ../chrom.psl ../../liftAll.lft warn *.psl
    cd ../../post.xenoRna/psl
    liftUp ../chrom.psl ../../liftAll.lft warn *.psl
    cd ~/oo.29/psl
    pslSortAcc nohead psuedo /projects/hg2/temp ../jkStuff/post.transRna/chrom.psl ../jkStuff/post.xenoRna/chrom.psl
    cd psuedo
    foreach i (*.psl)
        mv $i $i:r_psl.psl
    end
    ssh cc94
    cd /projects/hg3/gs.7/oo.39/psl/psuedo
    hgLoadPsl hg7 *.psl

