#!/usr/bin/env python2.7

import sys, os, shutil, argparse, urllib2, re
from ucscGb.gbData.ra import raFile
from ucscGb.encode import track
from ucscGb.encode import styles
from ucscGb.externalData.geo import submission

def filesize(val):
    if val > 1099511627776:
        return str(round(float(val) / 1099511627776, 2)) + 'TB'
    if val > 1073741824:
        return str(round(float(val) / 1073741824, 2)) + 'GB'
    if val > 1048576:
        return str(round(float(val) / 1048576, 2)) + 'MB'
    if val > 1024:
        return str(round(float(val) / 1024, 2)) + 'KB'
    else:
        return str(val) + 'B'


def printTrackRefLine(args):
    print 'Total expIds: ' + str(args.totalsubmitted) + '/' + str(args.totalexps) + ' out of tracks: ' + str(args.submittedtracks) + '/' + str(args.totaltracks)
    print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases'
        
def trackLine(args, t):
    nametext = t.name.ljust(25)
    try:
        sub = 0
        tot = 0
        wantSubmit = 1
        
        for exp in t.alphaMetaDb.experiments.iterkeys():
            submitted = 0
            samples = 0
            for stanza in t.alphaMetaDb.experiments[exp]:
                if 'objStatus' in stanza:
                    continue
                if 'Aln' not in stanza['metaObject']:
                    if 'geoSampleAccession' in stanza:
                        submitted += 1
                    samples += 1
            if (not args.pessimistic and submitted > 0) or (submitted == samples and submitted != 0):
                sub += 1
                args.totalsubmitted += 1
            if samples > 0:
                tot += 1
                args.totalexps += 1
                
        
        #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues():
        #    if 'geoSampleAccession' in stanza:
        #        sub += 1
        #    tot += 1
            
        #subtext = '%d%%' % int((float(sub) / tot) * 100)
        subtext = ('%d/%d' % (sub, tot)).ljust(10)
        #subtext = ' ' * (4 - len(subtext)) + subtext
        filestext = '%d' % tot
        filestext = ' ' * (4 - len(filestext)) + filestext + ' files'
        if sub == 0: 
            subtext = styles.style(subtext, 'red')
        elif sub < tot:
            subtext = styles.style(subtext, 'yellow')
        else:
            subtext = styles.style(subtext, 'green')
            wantSubmit = 0
    
        status = styles.style('Public    ', 'green')
        #just randomization for testing, replace with
        # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra
        if (tot % 2 == 0):
            status = styles.style('Unreleased', 'blue')
            wantSubmit = 0
    
        datatype = styles.style('Not Found'.ljust(16), 'red')
        if t.alphaMetaDb.dataType != None:
            datatype = t.alphaMetaDb.dataType.name
            if t.alphaMetaDb.dataType.type == 'MicroArray':
                datatype = styles.style(datatype.ljust(16), 'yellow')
                args.totaltracks += 1
            elif t.alphaMetaDb.dataType.valid:
                datatype = styles.style(datatype.ljust(16), 'green')
                args.totaltracks += 1
            else:
                if t.alphaMetaDb.dataType.shouldSubmit:
                    datatype = styles.style(datatype.ljust(16), 'red')
                else:
                    datatype = styles.style(datatype.ljust(16), 'blue')
                    #subtext = ''
                    args.totalexps -= tot
                    wantSubmit = 0
        if datatype == '' and len(t.alphaMetaDb.experiments) == 0:
            datatype = styles.style('no expIds'.ljust(16), 'red')
                
        #if wantSubmit:
        #    nametext = styles.style(nametext, 'green')
            
        geouptodate = ''    
        if args.geo and t.geo != None:
            exps = dict()
            local = 0
            offsite = 0
            mismatch = 0
            matched = 0
            for expId in t.alphaMetaDb.experiments.iterkeys():
                k = t.alphaMetaDb.experiments[expId].title
                exps[k] = t.alphaMetaDb.experiments[expId]
                if k in t.geo.accessions.iterkeys():
                    happened = 0
                    for stanza in exps[k]:
                        if 'geoSampleAccession' in stanza and stanza['geoSampleAccession'] != t.geo.accessions[k]:
                            mismatch += 1
                            happened = 1
                            break
                    if happened == 0:
                        for stanza in exps[k]:
                            if 'geoSampleAccession' not in stanza:
                                offsite += 1
                                happened = 1
                                break
                        if happened == 0:
                            matched += 1
                else:
                    local += 1
            geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch)
                
        releases = ''
        for release in t.releaseObjects:
            if release.onPublic:
                releases += styles.style(str(release.index), 'green')
            else:
                releases += styles.style(str(release.index), 'red')

        if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza:
            args.submittedtracks += 1
            return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate
        else:
            return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases
    except KeyError as e:
        return styles.style(nametext, 'red') + styles.style('Error ' + str(e), 'red')
        
def expIdLine(args, expId, t):
    
    samplefiles = 0
    samplesize = 0
    submittedfiles = 0
    for stanza in expId:
        for fname in stanza['fileName'].split(','):
            if fname in t.files:
                file = t.files[fname]
                samplesize = samplesize + file.size
                samplefiles = samplefiles + 1
                
                if 'geoSampleAccession' in stanza:
                    submittedfiles = submittedfiles + 1

    if expId.title == None:
        title = styles.style('Inconsistent', 'red')
    else:
        title = expId.title

    return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles 
        
def displayAll(args, tracks):
    
    args.totalsubmitted = 0
    args.totalexps = 0
    args.totaltracks = 0
    args.submittedtracks = 0
    display = dict()
    for t in tracks.itervalues():
        display[t.name] = trackLine(args, t)
        
    keys = display.keys()
    keys.sort()
    
    nongeo = list()
    marray = list()
    unsub = list()
    subm = list()
    err = list()
    
    printTrackRefLine(args)
    for k in keys:
        try:
            if not tracks[k].alphaMetaDb.dataType.shouldSubmit:
                nongeo.append(display[k])
            elif tracks[k].alphaMetaDb.dataType.type == 'MicroArray':
                marray.append(display[k])
            elif tracks[k].alphaMetaDb.dataType.valid:
                if 'geoSeriesAccession' in tracks[k].alphaMetaDb.compositeStanza:
                    subm.append(display[k])
                else:
                    unsub.append(display[k])
            else:
                err.append(display[k])
        except AttributeError as e:
            err.append(display[k])
            
    for i in unsub:
        print i
    for i in subm:
        print i
    for i in marray:
        print i
    for i in nongeo:
        print i
    for i in err:
        print i
        
def displayTrack(args, t, expIds=None):
    print trackLine(args, t)
    
    if expIds == None:
        expIds = map(int, t.alphaMetaDb.experiments.keys())
        expIds.sort()
        expIds = map(str, expIds)
        
    for expId in expIds:
        print expIdLine(args, t.alphaMetaDb.experiments[expId], t)
    
def main():

    parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO')
    #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted')
    #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files')
    parser.add_argument('-p', '--pessimistic', action='store_true', default=False, help='Only count completed samples')
    parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes')
    parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
    parser.add_argument('database', help='The database, typically hg19 or mm9')
    parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
    parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
    
    if len(sys.argv) == 1:
        parser.print_usage()
        return
    
    args = parser.parse_args(sys.argv[1:])
    
    tracks = track.TrackCollection(args.database)
    
    if args.composite == None:
        if args.geo:
            for t in tracks.itervalues():
                t.geo = None
                try:
                    if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza:
                        t.geo = submission.Submission(t.alphaMetaDb.compositeStanza['geoSeriesAccession'])
                except KeyError:
                    pass
        displayAll(args, tracks)
    else:
        if args.expIds == None or len(args.expIds) == 0:
            displayTrack(args, tracks[args.composite])
        else:
            ids = list()
            for id in args.expIds:
                if '-' in id:
                    start, end = id.split('-', 1)
                    ids.extend(range(int(start), int(end) + 1))
                else:
                    ids.append(int(id))
            if args.geo:
                tracks[args.composite].geo = None
                if 'geoSeriesAccession' in tracks[args.composite].alphaMetaDb.compositeTrack:
                    tracks[args.composite].geo = submission.Submission(tracks[args.composite].alphaMetaDb.compositeTrack['geoSeriesAccession'])
            displayTrack(args, tracks[args.composite], ids)        
                    
                    
                    
                    
if __name__ == '__main__':
    main()                   
                    