

mkdir /cluster/data/hg38/bed/gdc
cd /cluster/data/hg38/bed/gdc

mkdir data
#https://portal.gdc.cancer.gov/repository?filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.access%22%2C%22value%22%3A%5B%22open%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.analysis.workflow_type%22%2C%22value%22%3A%5B%22MuTect2%20Variant%20Aggregation%20and%20Masking%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.data_format%22%2C%22value%22%3A%5B%22MAF%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.data_type%22%2C%22value%22%3A%5B%22Masked%20Somatic%20Mutation%22%5D%7D%7D%5D%7D

tar xvf gdc_download_20190130_190717.328881.tar.gz

echo "hub hub1" > trackDb.txt
echo "shortLabel GDC Cancers" >> trackDb.txt
echo "longLabel GDC Cancers" >> trackDb.txt
echo "useOneFile on" >> trackDb.txt
echo "email genome-www@soe.ucsc.edu" >> trackDb.txt
echo "" >> trackDb.txt
echo "genome hg38" >> trackDb.txt
echo "" >> trackDb.txt
echo "track gdcComp" >> trackDb.txt
echo "shortLabel NCI-GDC " >> trackDb.txt
echo "longLabel NCI Genomic Data Commons (GDC)." >> trackDb.txt
echo "type bigBed 4 1 ." >> trackDb.txt
echo "compositeTrack on" >> trackDb.txt
echo "" >> trackDb.txt

for i in */*.maf.gz
do
id=`dirname $i`
name=`grep $id MANIFEST.txt | sed 's/.*TCGA.//' | sed 's/\..*//' | head -1`
zcat $i | sed '/^#/d' | tail -n +2 | tawk '$102 != "" {print $5,$6,$7,$1,$102}' | sort -k1,1 -k2,2n > $name.bed
bedToBigBed -as=bigFreq.as -type=bed4+1 $name.bed /cluster/data/hg38/chrom.sizes $name.bb
longName=`grep $name project2LongLabel.txt | tawk '{print $2}'`
echo "track $name"
echo "shortLabel $name"
echo "longLabel $longName"
echo "bigDataUrl $name.bb"
echo "type bigBed 4 1 ."
echo "lollipopField 4"
echo "parent gdcComp"
echo 
done >> trackDb.txt

