# for emacs: -*- mode: sh; -*-

##############################################################################
# hg38 patch 2 build
##############################################################################

mkdir /hive/data/genomes/hg38/bed/hg38Patch2
cd /hive/data/genomes/hg38/bed/hg38Patch2
mkdir genbank
cd genbank
rsync -L -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCA_000001405.17_GRCh38.p2/ ./

# appears to be the entire assembly:
faSize GCA_000001405.17_GRCh38.p2_genomic.fna.gz
# 3221487035 bases (160028440 N's 3061458595 real 1898327994 upper 1163130601 lower) in 486 sequences in 1 files
# Total size: mean 6628574.1 sd 30567681.1 min 970 (KI270394.1) max 248956422 (CM000663.2) median 162429
# %36.11 masked total, %37.99 masked real

# so the question is, what is new here compared to what we have in hg38

cd /hive/data/genomes/hg38/bed/hg38Patch2
time faCount genbank/GCA_000001405.17_GRCh38.p2_genomic.fna.gz \
   > faCount.GRCH38.p2.txt
#  real    1m7.182s

~/kent/src/hg/makeDb/doc/hg38/scanAssemblyReport.pl ../../chrom.sizes \
  faCount.GRCH38.p2.txt genbank/GCA_000001405.17_GRCh38.p2_assembly_report.txt \
   | grep new | sed -e 's/^/# /'

# chr1_KN196472v1_fix   186494  KN196472.1      new
# chr1_KN196473v1_fix   166200  KN196473.1      new
# chr1_KN196474v1_fix   122022  KN196474.1      new
# chr1_KN538360v1_fix   460100  KN538360.1      new
# chr1_KN538361v1_fix   305542  KN538361.1      new
# chr2_KN538362v1_fix   208149  KN538362.1      new
# chr2_KN538363v1_fix   365499  KN538363.1      new
# chr3_KN196475v1_fix   451168  KN196475.1      new
# chr3_KN196476v1_fix   305979  KN196476.1      new
# chr3_KN538364v1_fix   415308  KN538364.1      new
# chr5_KN196477v1_alt   139087  KN196477.1      new
# chr6_KN196478v1_fix   268330  KN196478.1      new
# chr9_KN196479v1_fix   330164  KN196479.1      new
# chr10_KN196480v1_fix  277797  KN196480.1      new
# chr10_KN538365v1_fix  14347   KN538365.1      new
# chr10_KN538366v1_fix  85284   KN538366.1      new
# chr10_KN538367v1_fix  420164  KN538367.1      new
# chr11_KN538368v1_alt  203552  KN538368.1      new
# chr11_KN196481v1_fix  108875  KN196481.1      new
# chr12_KN196482v1_fix  211377  KN196482.1      new
# chr12_KN538369v1_fix  541038  KN538369.1      new
# chr12_KN538370v1_fix  86533   KN538370.1      new
# chr13_KN196483v1_fix  35455   KN196483.1      new
# chr13_KN538371v1_fix  206320  KN538371.1      new
# chr13_KN538372v1_fix  356766  KN538372.1      new
# chr13_KN538373v1_fix  148762  KN538373.1      new
# chr15_KN538374v1_fix  4998962 KN538374.1      new
# chr19_KN196484v1_fix  370917  KN196484.1      new
# chr22_KN196485v1_alt  156562  KN196485.1      new
# chr22_KN196486v1_alt  153027  KN196486.1      new
# chrY_KN196487v1_fix   101150  KN196487.1      new

# how much sequence:
~/kent/src/hg/makeDb/doc/hg38/scanAssemblyReport.pl ../../chrom.sizes \
  faCount.GRCH38.p2.txt genbank/GCA_000001405.17_GRCh38.p2_assembly_report.txt \
     | grep new | awk '{sum += $2; printf "%d\t%s\n", sum, $0}' | tail
#  12200930        chrY_KN196487v1_fix     101150  KN196487.1      new

~/kent/src/hg/makeDb/doc/hg38/scanAssemblyReport.pl ../../chrom.sizes \
  faCount.GRCH38.p2.txt genbank/GCA_000001405.17_GRCh38.p2_assembly_report.txt \
     | grep new > new.sequences.list

cut -f3 new.sequences.list > extract.new.list
awk '{printf "s/%s/%s/; ", $3,$1}' new.sequences.list > genbankToUCSC.sed

~/kent/src/hg/makeDb/doc/hg38/scanAssemblyReport.pl ../../chrom.sizes \
  faCount.GRCH38.p2.txt genbank/GCA_000001405.17_GRCh38.p2_assembly_report.txt \
     | grep -v new > existing.sequences.list

cut -f3 existing.sequences.list > extract.exist.list

faSomeRecords genbank/GCA_000001405.17_GRCh38.p2_genomic.fna.gz \
    extract.new.list stdout | sed -e 's/ .*//;' | \
      sed -f genbankToUCSC.sed | gzip -c > hg38Patch2.fa.gz

faSomeRecords genbank/GCA_000001405.17_GRCh38.p2_genomic.fna.gz \
  extract.exist.list stdout | sed -e 's/ .*//;' | gzip -c > existing.fa.gz

# verify same amount of sequence here as hg38:
faSize existing.fa.gz
# 3209286105 bases (159970322 N's 3049315783 real 1890811945 upper 1158503838 lower) in 455 sequences in 1 files
# check it is the same as hg38:  (masking is different, that is expected)
head -1 ../../faSize.hg38.2bit.txt 
# 3209286105 bases (159970322 N's 3049315783 real 1460684798 upper 1588630985 lower) in 455 sequences in 1 files

# verify correct amount of patch2 sequence here:
faSize hg38Patch2.fa.gz
# 12200930 bases (58118 N's 12142812 real 7516049 upper 4626763 lower) in 31 sequences in 1 files
# this is what was measured above:
#  12200930        chrY_KN196487v1_fix     101150  KN196487.1      new

# construct locations file:
~/kent/src/hg/makeDb/doc/hg38/regionScan.pl extract.new.list \
   genbank/GCA_000001405.17_GRCh38.p2_assembly_regions.txt \
     > patchLocations.bed

# separate haplotypes from fix patches for two tracks:
grep -v fix patchLocations.bed | sed -e 's/_alt//;' \
   | sed -e 's/\tchr.*_/\t/;' | sed -e 's/v/./;' > hg38Patch2Haplotypes.bed

hgLoadBed -type=bed4 hg38 hg38Patch2Haplotypes hg38Patch2Haplotypes.bed 
# Read 4 elements of size 4 from hg38Patch2Haplotypes.bed

grep fix patchLocations.bed | sed -e 's/_fix//;' \
   | sed -e 's/\tchr.*_/\t/;' | sed -e 's/v\([0-9]\)$/.\1/;' \
     > hg38Patch2Patches.bed

hgLoadBed -type=bed4 hg38 hg38Patch2Patches hg38Patch2Patches.bed 
# Read 27 elements of size 4 from hg38Patch2Patches.bed

# construct 2bit file:
faToTwoBit hg38Patch2.fa.gz hg38Patch2.2bit
twoBitInfo hg38Patch2.2bit stdout | sort -k2nr > hg38Patch2.chrom.sizes
# take a look at that to verify it looks OK:
cat hg38Patch2.chrom.sizes | sed -e 's/^/# /;'
# chr15_KN538374v1_fix  4998962
# chr12_KN538369v1_fix  541038
# chr1_KN538360v1_fix   460100
# chr3_KN196475v1_fix   451168
# chr10_KN538367v1_fix  420164
# chr3_KN538364v1_fix   415308
# chr19_KN196484v1_fix  370917
# chr2_KN538363v1_fix   365499
# chr13_KN538372v1_fix  356766
# chr9_KN196479v1_fix   330164
# chr3_KN196476v1_fix   305979
# chr1_KN538361v1_fix   305542
# chr10_KN196480v1_fix  277797
# chr6_KN196478v1_fix   268330
# chr12_KN196482v1_fix  211377
# chr2_KN538362v1_fix   208149
# chr13_KN538371v1_fix  206320
# chr11_KN538368v1_alt  203552
# chr1_KN196472v1_fix   186494
# chr1_KN196473v1_fix   166200
# chr22_KN196485v1_alt  156562
# chr22_KN196486v1_alt  153027
# chr13_KN538373v1_fix  148762
# chr5_KN196477v1_alt   139087
# chr1_KN196474v1_fix   122022
# chr11_KN196481v1_fix  108875
# chrY_KN196487v1_fix   101150
# chr12_KN538370v1_fix  86533
# chr10_KN538366v1_fix  85284
# chr13_KN196483v1_fix  35455
# chr10_KN538365v1_fix  14347

zcat genbank/GCA_000001405.17_GRCh38.p2_assembly_structure/PATCHES/alt_scaffolds/AGP/alt.scaf.agp.gz \
   | sed -f genbankToUCSC.sed > hg38Patch2.agp

checkAgpAndFa hg38Patch2.agp hg38Patch2.2bit | tail -1

# All AGP and FASTA entries agree - both files are valid

#############################################################################
# lastz alignments to hg38 (DONE - 2015-01-13 - Hiram)
#############################################################################

mkdir /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13

cat << '_EOF_' > DEF
# human vs human
BLASTZ=lastz
# maximum M allowed with lastz is only 254
BLASTZ_M=254
# lastz does not like the O= and E= lines in the matrix file
BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
BLASTZ_O=600
BLASTZ_E=150
# other parameters from hg18 vs venter1 lastz on advice from Webb
BLASTZ_K=10000
BLASTZ_Y=15000
BLASTZ_T=2

# TARGET: Human Hg19Patch10
SEQ1_DIR=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit
SEQ1_LEN=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.chrom.sizes
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_IN_CONTIGS=0
SEQ1_LIMIT=1

# QUERY: Human Hg19
SEQ2_DIR=/scratch/data/hg38/hg38.2bit
SEQ2_LEN=/scratch/data/hg38/chrom.sizes
SEQ2_CTGDIR=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit
SEQ2_CTGLEN=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.chrom.sizes
SEQ2_LIFT=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.lift
SEQ2_CHUNK=5000000
SEQ2_LAP=0
SEQ2_IN_CONTIGS=0
SEQ2_LIMIT=1

BASE=/hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13
TMPDIR=/dev/shm
'_EOF_'
    # << happy emacs

# prepare bits of hg38 sequence to lastz align to the patches,
# this is selecting out the specific section of hg38 where the patch
# is supposed to match, and setting up lastz parameters

rm -fr hg38Bits run.blastz run.blastz/tParts run.blastz/qParts psl
mkdir -p hg38Bits run.blastz run.blastz/tParts run.blastz/qParts psl
rm -f ../hg38Bits.lift
cut -f4 ../patchLocations.bed | while read FIX
do
  chr=`grep "${FIX}" ../patchLocations.bed | cut -f1`
  start=`grep "${FIX}" ../patchLocations.bed | cut -f2`
  end=`grep "${FIX}" ../patchLocations.bed | cut -f3`
  bitSize=`echo ${end} ${start} | awk '{printf "%d", $1-$2}'`
  chrSize=`grep -w "${chr}" ../../../chrom.sizes | cut -f2`
  fixSize=`grep "${FIX}" ../hg38Patch2.chrom.sizes | cut -f2`
  echo ${chr}:${start}-${end} vs. ${FIX}:0-${fixSize}
  twoBitToFa /gbdb/hg38/hg38.2bit:${chr}:${start}-${end} stdout \
    | sed -e "s/${chr}:/${FIX}_/g" > hg38Bits/${FIX}.fa
  echo -e "${start}\t${FIX}_${start}-${end}\t${chr}\t${chrSize}" >> ../hg38Bits.lift
  echo -e "/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit:${FIX}:0-${fixSize}" > run.blastz/tParts/${FIX}.lst
  echo -e "/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit:${FIX}_${start}-${end}:0-${bitSize}" > run.blastz/qParts/${FIX}.lst
  echo -e "/cluster/bin/scripts/blastz-run-ucsc -outFormat psl tParts/${FIX}.lst qParts/${FIX}.lst ../DEF {check out exists ../psl/${FIX}.psl}"  >> run.blastz/jobList
done

faToTwoBit hg38Bits/*.fa ../hg38Bits.2bit
twoBitInfo ../hg38Bits.2bit stdout | sort -k2n > ../hg38Bits.chrom.sizes

ssh ku
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13/run.blastz
para create jobList
para try ... check ... push ... etc
para time > run.time
# Completed: 31 of 31 jobs
# CPU time in finished jobs:         31s       0.51m     0.01h    0.00d  0.000 y
# IO & Wait Time:                    86s       1.44m     0.02h    0.00d  0.000 y
# Average job time:                   4s       0.06m     0.00h    0.00d
# Longest finished job:              18s       0.30m     0.01h    0.00d
# Submission to last job:            42s       0.70m     0.01h    0.00d

# put together the individual results
mkdir pslParts
cat psl/chr*.psl | gzip -c > pslParts/hg38Patch2.hg38.psl.gz

#   constructing a chain from those results
mkdir -p /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13/axtChain/run
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13/axtChain/run
time zcat ../../pslParts/hg38Patch2.hg38.psl.gz \
| axtChain -psl -verbose=0 -scoreScheme=/scratch/data/blastz/human_chimp.v2.q -minScore=2000 -linearGap=medium stdin \
    /hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit \
    /hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit \
    stdout \
| chainAntiRepeat /hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit \
    /hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit  \
    stdin hg38Patch2.hg38.preLift.chain
    #   real    0m44.175s
liftUp -chainQ hg38Patch2.hg38.lifted.chain \
    ../../../hg38Bits.lift carry hg38Patch2.hg38.preLift.chain

# constructing the net files:
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38.2015-01-13/axtChain

chainMergeSort run/hg38Patch2.hg38.lifted.chain \
        | gzip -c > hg38Patch2.hg38.all.chain.gz
chainSplit chain hg38Patch2.hg38.all.chain.gz
# Make nets ("noClass", i.e. without rmsk/class stats which are added later):
time chainPreNet  hg38Patch2.hg38.all.chain.gz \
    ../../hg38Patch2.chrom.sizes \
    /hive/data/genomes/hg38/chrom.sizes stdout \
    | chainNet  stdin -minSpace=1 ../../hg38Patch2.chrom.sizes \
        /hive/data/genomes/hg38/chrom.sizes stdout /dev/null \
    | netSyntenic stdin noClass.net
    #   real    0m1.338s

# Make liftOver chains:
netChainSubset -verbose=0 noClass.net hg38Patch2.hg38.all.chain.gz stdout \
    | chainStitchId stdin stdout | gzip -c > hg38Patch2.hg38.over.chain.gz

# Make axtNet for download: one .axt per hg38Patch2 seq.
netSplit noClass.net net
cd ..
mkdir -p axtNet
foreach f (axtChain/net/*.net)
netToAxt $f axtChain/chain/$f:t:r.chain \
  ../hg38Patch2.2bit \
        /hive/data/genomes/hg38/hg38.2bit stdout \
  | axtSort stdin stdout \
  | gzip -c > axtNet/$f:t:r.hg38Patch2.hg38.net.axt.gz
end

# Make mafNet for multiz: one .maf per hg38Patch2 seq.
mkdir -p mafNet
foreach f (axtNet/*.hg38Patch2.hg38.net.axt.gz)
  axtToMaf -tPrefix=hg38Patch2. -qPrefix=hg38. $f \
        ../hg38Patch2.chrom.sizes \
        /hive/data/genomes/hg38/chrom.sizes \
        stdout \
  | gzip -c > mafNet/$f:t:r:r:r:r:r.maf.gz
end

#############################################################################
# run this same business with hg38 as target, Patch2 sequence as query
mkdir /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16

cat << '_EOF_' > DEF
# human vs human
BLASTZ=lastz
# maximum M allowed with lastz is only 254
BLASTZ_M=254
# lastz does not like the O= and E= lines in the matrix file
BLASTZ_Q=/scratch/data/blastz/human_chimp.v2.q
BLASTZ_O=600
BLASTZ_E=150
# other parameters from hg18 vs venter1 lastz on advice from Webb
BLASTZ_K=10000
BLASTZ_Y=15000
BLASTZ_T=2

# TARGET: Human Hg38
SEQ1_DIR=/scratch/data/hg38/hg38.2bit
SEQ1_LEN=/scratch/data/hg38/chrom.sizes
SEQ1_CTGDIR=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit
SEQ1_CTGLEN=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.chrom.sizes
SEQ1_LIFT=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.lift
SEQ1_CHUNK=5000000
SEQ1_LAP=10000
SEQ1_IN_CONTIGS=0
SEQ1_LIMIT=1

# QUERY: Human Hg38Patch2
SEQ2_DIR=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit
SEQ2_LEN=/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.chrom.sizes
SEQ2_CHUNK=5000000
SEQ2_LAP=0
SEQ2_IN_CONTIGS=0
SEQ2_LIMIT=1

BASE=/hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16
TMPDIR=/dev/shm
'_EOF_'
    # << hapy emacs


rm -f ../hg38Bits.lift
rm -fr hg38Bits run.blastz
mkdir -p hg38Bits run.blastz/tParts run.blastz/qParts
cut -f4 ../patchLocations.bed | while read FIX
do
  chr=`grep "${FIX}" ../patchLocations.bed | cut -f1`
  start=`grep "${FIX}" ../patchLocations.bed | cut -f2`
  end=`grep "${FIX}" ../patchLocations.bed | cut -f3`
  bitSize=`echo ${end} ${start} | awk '{printf "%d", $1-$2}'`
  chrSize=`grep -w "${chr}" ../../../chrom.sizes | cut -f2`
  fixSize=`grep "${FIX}" ../hg38Patch2.chrom.sizes | cut -f2`
  echo ${chr}:${start}-${end} vs. ${FIX}:0-${fixSize} 1>&2
  twoBitToFa /gbdb/hg38/hg38.2bit:${chr}:${start}-${end} stdout \
    | sed -e "s/${chr}\(:${start}-${end}\)*/${FIX}_${start}-${end}/g"  \
      > hg38Bits/${FIX}.fa
  echo -e "${start}\t${FIX}_${start}-${end}\t${chr}\t${chrSize}" >> ../hg38Bits.lift
  echo -e "/hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit:${FIX}_${start}-${end}:0-${bitSize}" > run.blastz/tParts/${FIX}.lst
  echo -e "/hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit:${FIX}:0-${fixSize}" > run.blastz/qParts/${FIX}.lst
  echo -e "/cluster/bin/scripts/blastz-run-ucsc -outFormat psl tParts/${FIX}.lst qParts/${FIX}.lst ../DEF {check out exists ../psl/${FIX}.psl}" >> run.blastz/jobList
done

ssh ku
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16/run.blastz
para create jobList
para try ... check ... push ... etc
para time
# Completed: 31 of 31 jobs
# CPU time in finished jobs:         31s       0.51m     0.01h    0.00d  0.000 y
# IO & Wait Time:                    73s       1.22m     0.02h    0.00d  0.000 y
# Average job time:                   3s       0.06m     0.00h    0.00d
# Longest finished job:              17s       0.28m     0.00h    0.00d
# Submission to last job:            22s       0.37m     0.01h    0.00d

# put together the individual results
mkdir pslParts
cat psl/chr*.psl | gzip -c > pslParts/hg38.hg38Patch2.psl.gz

#   constructing a chain from those results
mkdir -p /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16/axtChain/run
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16/axtChain/run

time zcat ../../pslParts/hg38.hg38Patch2.psl.gz \
| axtChain -psl -verbose=0 -scoreScheme=/scratch/data/blastz/human_chimp.v2.q -minScore=2000 -linearGap=medium stdin \
    /hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit \
    /hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit \
    stdout \
     | chainAntiRepeat /hive/data/genomes/hg38/bed/hg38Patch2/hg38Bits.2bit \
     /hive/data/genomes/hg38/bed/hg38Patch2/hg38Patch2.2bit \
    stdin hg38.hg38Patch2.preLift.chain
    #   real    0m44.175s
liftUp hg38.hg38Patch2.lifted.chain \
    ../../../hg38Bits.lift carry hg38.hg38Patch2.preLift.chain

# constructing the net files:
cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16/axtChain

chainMergeSort run/hg38.hg38Patch2.lifted.chain \
        | gzip -c > hg38.hg38Patch2.all.chain.gz
chainSplit chain hg38.hg38Patch2.all.chain.gz
# Make nets ("noClass", i.e. without rmsk/class stats which are added later):
time chainPreNet hg38.hg38Patch2.all.chain.gz \
    /hive/data/genomes/hg38/chrom.sizes \
    ../../hg38Patch2.chrom.sizes stdout \
    | chainNet  stdin -minSpace=1 /hive/data/genomes/hg38/chrom.sizes \
        ../../hg38Patch2.chrom.sizes stdout /dev/null \
    | netSyntenic stdin noClass.net
    #   real    0m0.424s

# Make liftOver chains:
netChainSubset -verbose=0 noClass.net hg38.hg38Patch2.all.chain.gz stdout \
    | chainStitchId stdin stdout | gzip -c > hg38.hg38Patch2.over.chain.gz

# Make axtNet for download: one .axt per hg38Patch2 seq.
netSplit noClass.net net
cd ..
mkdir -p axtNet
foreach f (axtChain/net/*.net)
netToAxt $f axtChain/chain/$f:t:r.chain \
  /hive/data/genomes/hg38/hg38.2bit \
        ../hg38Patch2.2bit stdout \
  | axtSort stdin stdout \
  | gzip -c > axtNet/$f:t:r.hg38.hg38Patch2.net.axt.gz
end

# Make mafNet for multiz: one .maf per hg38Patch2 seq.
mkdir -p mafNet
foreach f (axtNet/*.hg38.hg38Patch2.net.axt.gz)
  axtToMaf -tPrefix=hg38. -qPrefix=hg38Patch2. $f \
        /hive/data/genomes/hg38/chrom.sizes \
        ../hg38Patch2.chrom.sizes \
        stdout \
  | gzip -c > mafNet/$f:t:r:r:r:r:r.maf.gz
end

cd /hive/data/genomes/hg38/bed/hg38Patch2/lastzHg38Patch2.2015-01-16/axtChain
mkdir -p queryChains
chainSplit -q queryChains hg38.hg38Patch2.all.chain.gz

# then run a 'lift over' chain/net on each single one
mkdir -p singleLiftOver

for F in queryChains/*.chain
do
    C=`basename ${F}`
    B=`echo ${C} | sed -e "s/.chain//"`
    chainPreNet -inclHap ${F} /hive/data/genomes/hg38/chrom.sizes \
        ../../hg38Patch2.chrom.sizes stdout \
    | chainNet -inclHap stdin -minSpace=1 /hive/data/genomes/hg38/chrom.sizes \
        ../../hg38Patch2.chrom.sizes singleLiftOver/${B}.raw.net \
        /dev/null
    netSyntenic singleLiftOver/${B}.raw.net singleLiftOver/${B}.noClass.net
    netFilter -chimpSyn singleLiftOver/${B}.noClass.net > singleLiftOver/${B}.chimpSyn.net
    netChainSubset -verbose=0 singleLiftOver/${B}.noClass.net \
        ${F} stdout \
    | chainStitchId stdin stdout > singleLiftOver/${C}
    echo "${F} -> singleLiftOver/${C}"
done

# put the chains together into one file
chainMergeSort singleLiftOver/chr*.chain | gzip -c \
        > hg38.hg38Patch2.single.over.chain.gz

 # construct psl files from those chains
chainToPsl hg38.hg38Patch2.single.over.chain.gz \
   /hive/data/genomes/hg38/chrom.sizes \
    ../../hg38Patch2.chrom.sizes \
     /hive/data/genomes/hg38/hg38.2bit \
       ../../hg38Patch2.2bit \
        hg38.hg38Patch2.over.psl
# chainToPsl appears to have a problem, note errors from pslCheck:
pslCheck -db=hg38 hg38.hg38Patch2.over.psl
# checked: 38 failed: 0 errors: 0

pslRecalcMatch hg38.hg38Patch2.over.psl \
  /hive/data/genomes/hg38/hg38.2bit \
    ../..//hg38Patch2.2bit \
      fixup.hg38.hg38Patch2.over.psl
pslCheck -db=hg38 fixup.hg38.hg38Patch2.over.psl
# checked: 38 failed: 0 errors: 0

# load this PSL track
#     this table name prefix altSeqLiftOverPsl is recognized in hgc clicks
hgLoadPsl hg38 -table=altSeqLiftOverPslP2 fixup.hg38.hg38Patch2.over.psl

mkdir /hive/data/genomes/hg38/bed/hg38Patch2/seqExt
cd /hive/data/genomes/hg38/bed/hg38Patch2/seqExt
twoBitToFa ../hg38Patch2.2bit hg38Patch2.fa
mkdir -p /gbdb/hg38/hg38Patch2 hg38Patch2
faSplit byname hg38Patch2.fa ./hg38Patch2/
ln -s `pwd`/hg38Patch2/*.fa /gbdb/hg38/hg38Patch2
hgLoadSeq -drop -seqTbl=seqHg38Patch2 -extFileTbl=extHg38Patch2 hg38 \
        /gbdb/hg38/hg38Patch2/*.fa

#############################################################################
