
2012-05-17: import and UCSC GENCODE group process of GENCODE V12 (markd)
    # Due to UCSC Genome Browser using the NC_001807 mitochondrial genome sequence
    # (chrM) and GENCODE annotating the NC_012920 mitochondrial sequence, the
    # GENCODE mitochondrial sequences are lifted to UCSC chrM.

    # download files
    mkdir -p /hive/groups/encode/dcc/data/gencodeV12/release
    cd /hive/groups/encode/dcc/data/gencodeV12/release

    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.2wayconspseudos.gtf.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.annotation.gtf.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.long_noncoding_RNAs.gtf.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.pc_transcripts.fa.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.pc_translations.fa.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.polyAs.gtf.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode.v12.tRNAs.gtf.gz
    wget -nv ftp://ftp.sanger.ac.uk/pub/gencode/release_12/gencode12_GRCh37.tgz

    # silly sanity check:
    for f in * ; do zcat $f >/dev/null ; done

    # untar main distribution
    tar -zxf gencode12_GRCh37.tgz

    # obtain transcription support level analysis from UCSC GENCODE group (markd/rachel)
    mkdir -p data
    cp /cluster/home/markd/compbio/ccds/branches/transSupV12.1/modules/gencodeTransSupport/exprs/classDev/runs/2012-05-18/results/gencode.v12.transcriptionSupportLevel.{tab,tsv} data/

    # create Makefile from previous one
    cd /hive/groups/encode/dcc/data/gencodeV12/
    cp ../gencodeV11/Makefile .
    # edit as needed
   
    # on code in the CCDS subversion tree:
    #   svn+ssh://hgwdev.soe.ucsc.edu/projects/compbio/svnroot/hausslerlab/ccds/trunk
    # and markd's python library (it will be moved to the hausslerlab
    # repository soon)
    # may need to update
        ccds2/modules/gencode/src/lib/gencode/data/gencodeGenes.py
    # to add new biotypes, use this command to verify and update as needed
    make checkAttrs

    # build and load tables
    (time nice make -j 10) >&all.out&

    ## copy and update trackDb files from previous release
    ## Important to make sure filter attrs.transcriptType matches current set
    ## figured out with:
    select distinct transcriptType from wgEncodeGencodeAttrsV12 order by transcriptType;
    kent/src/hg/makeDb/trackDb
    cp human/hg19/wgEncodeGencodeV10.ra human/hg19/wgEncodeGencodeV12.ra
    cp human/hg19/wgEncodeGencodeV10.html human/hg19/wgEncodeGencodeV12.html

    ### IMPORTANT: make sure that hgTracks/simpleTracks.c registers
    ### track handler for this version of gencode:
    registerTrackHandlerOnFamily("wgEncodeGencodeV12", gencodeGeneMethods);

------------------------------------------------------------------------------
