#!/hive/groups/encode/dcc/bin/python
import sys, os, re, argparse, subprocess, httplib

def main():
    parser = argparse.ArgumentParser(
    prog='qaTableCheck',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description='Compares table on the RR against a list of table',
    epilog=
"""Example:
qaTableCheck hg19 tables.list
qaTableCheck hg18 tables.list
"""
    )
    parser.add_argument('database', help='The database, typically hg19 or mm9')
    parser.add_argument('tables', help='The list of tables')


    if len(sys.argv) == 1:
        parser.print_help()
        return

    args = parser.parse_args(sys.argv[1:])

    dates = sorted(os.listdir("/cluster/data/genbank/var/tblstats/hgnfs1"))
    #print sorted(dates)
    recent = dates[-1]
    tablefile = "/cluster/data/genbank/var/tblstats/hgnfs1/%s/%s.tbls" % (recent, args.database)
    f = open(tablefile, "r")
    lines = f.readlines()
    f.close()
    lines.pop()
    RRTables = set()
    DevTables = set()
    for i in lines:
        words = i.split("\t")
        RRTables.add(words[0])
    f = open(args.tables, "r")
    lines = f.readlines()
    f.close()
    for i in lines:
        DevTables.add(i.rstrip("\n"))

    missingFromRR = DevTables - RRTables
    if missingFromRR:
        print "Tables missing from RR (%s):" % len(missingFromRR)
        for i in missingFromRR:
            print i
    else:
        print "No Tables missing from RR"

if __name__ == "__main__":
    main()

