# Mouse Development Timecourse bulk RNA-seq (2026-04-10 Gerardo)
# Converting track hub from Wold Lab (Caltech) into native tracks.
# Hub URL: http://woldlab.caltech.edu/~diane/mouse_development_bulk/mouse_development.hub.txt
# Publication: https://www.encodeproject.org/publications/e0d01543-9965-4edb-933c-778a40575cd9/
# mm39 data are liftOver'd from mm10 GENCODE M21 alignments.
# 17 tissues, up to 8 timepoints (e10.5-e16.5, P0), two replicates per condition.
# bigBarChart tracks with FPKM and TPM from RSEM.

# Download data files from hub to /hive/data/outside/
mkdir -p /hive/data/outside/woldlab/mouseDevTimecourse/mm39
cd /hive/data/outside/woldlab/mouseDevTimecourse/mm39

for f in mouse_development_TPM_M21_mm39.bb mouse_development_FPKM_M21_mm39.bb \
    mouse_development_M21.categories mouse_development_M21.facets; do
    curl -o "$f" "http://woldlab.caltech.edu/~diane/mouse_development_bulk/$f"
done

# Create symlinks in /gbdb/mm39/
mkdir -p /gbdb/mm39/mouseDevTimecourse
cd /gbdb/mm39/mouseDevTimecourse
for f in /hive/data/outside/woldlab/mouseDevTimecourse/mm39/*; do
    ln -s "$f" .
done

# Add trackDb entry
cd ~/kent/src/hg/makeDb/trackDb/mouse/mm39
# Created mouseDevTimecourse.ra (superTrack with 2 bigBarChart subtracks)
# Created HTML description files:
#   mouseDevTimecourse.html (supertrack)
#   developmentTimecourseM21mm39TPM.html
#   developmentTimecourseM21mm39FPKM.html
# Added "include mouseDevTimecourse.ra" to trackDb.ra

# Load trackDb
cd ~/kent/src/hg/makeDb/trackDb
make DBS=mm39

# 2026-05-13 (Gerardo): Reorder tissue rows in .facets files per author
# request. Redmine #36998 note-43. Peng (lead author) asked
# for the tissues to be displayed in his biological order instead of the
# alphabetical order from the hub. The reorder is made to the files in
# /hive/data/outside/woldlab/mouseDevTimecourse/mm39/ only; the hub at
# woldlab.caltech.edu still has the alphabetical order, so re-running the
# curl above will overwrite the reorder and require re-running the snippet
# below.

cd /hive/data/outside/woldlab/mouseDevTimecourse/mm39

python3 <<'EOF'
TISSUE_ORDER = [
    "thymus",
    "spleen",
    "liver",
    "heart",
    "skeletal muscle tissue",
    "urinary bladder",
    "adrenal gland",
    "kidney",
    "lung",
    "stomach",
    "intestine",
    "limb",
    "embryonic facial prominence",
    "forebrain",
    "midbrain",
    "hindbrain",
    "neural tube",
]
FILES = ["mouse_development_M21.facets"]

for path in FILES:
    with open(path) as f:
        lines = f.readlines()
    header = lines[0]
    data = []
    for ln in lines[1:]:
        if ln.strip():
            data.append(ln)
    groups = {}
    for ln in data:
        cols = ln.rstrip("\n").split("\t")
        groups.setdefault(cols[2], []).append(ln)
    out = [header]
    for tissue in TISSUE_ORDER:
        for ln in groups[tissue]:
            out.append(ln)
    with open(path, "w") as f:
        f.writelines(out)
    print("reordered: " + path + "  (" + str(len(data)) + " rows)")
EOF

# 2026-05-21 (Gerardo): Update colors in .facets and .categories files per
# author request via the Cell Browser team (Brittney Wick). Redmine #37001
# note-23. Peng asked that the bar colors match the per-sample colors used in
# the Cell Browser. Brittney provided a tab-separated file with the colors at
# /hive/data/inside/cells/datasets/mouse-encode-rna/sample_colors.tsv (78 rows,
# one color per (tissue, timepoint) pair, encoded as gradients within each
# tissue from light at the earliest time point to dark at P0). The update is
# made to the files in /hive/data/outside/woldlab/mouseDevTimecourse/mm39/
# only; the hub at woldlab.caltech.edu still has the old per-tissue colors,
# so re-running the curl above will overwrite the colors and require re-running
# the snippet below.

cd /hive/data/outside/woldlab/mouseDevTimecourse/mm39

python3 <<'EOF'
TISSUE_MAP = {
    "adrenal": "adrenal gland",
    "bladder": "urinary bladder",
    "face": "embryonic facial prominence",
    "forebrain": "forebrain",
    "heart": "heart",
    "hindbrain": "hindbrain",
    "intestine": "intestine",
    "kidney": "kidney",
    "limbs": "limb",
    "liver": "liver",
    "lung": "lung",
    "midbrain": "midbrain",
    "muscle": "skeletal muscle tissue",
    "neuraltube": "neural tube",
    "spleen": "spleen",
    "stomach": "stomach",
    "thymus": "thymus",
}

# Build (tissue, timepoint) -> color from Brittney's TSV
tsv = {}
with open("/hive/data/inside/cells/datasets/mouse-encode-rna/sample_colors.tsv") as f:
    f.readline()
    for line in f:
        line = line.rstrip("\n")
        if not line:
            continue
        sample, color = line.split("\t")
        tissue_part, _, tp = sample.rpartition("_")
        our_tissue = TISSUE_MAP[tissue_part]
        our_tp = "P0" if tp == "p0" else tp
        tsv[(our_tissue, our_tp)] = color

# Update .facets files (column 5 is the color)
for facets in ["mouse_development_M21.facets"]:
    with open(facets) as f:
        lines = f.readlines()
    out = [lines[0]]
    for line in lines[1:]:
        if not line.strip():
            continue
        cols = line.rstrip("\n").split("\t")
        cols[4] = tsv[(cols[2], cols[3])]
        out.append("\t".join(cols) + "\n")
    with open(facets, "w") as f:
        f.writelines(out)
    print("updated .facets: " + facets)

# Update .categories files (column 2 is the color). Look up each label's
# color in the matching .facets file (column 5).
for facets, cats in [("mouse_development_M21.facets",
                      "mouse_development_M21.categories")]:
    label_color = {}
    with open(facets) as f:
        f.readline()
        for line in f:
            if not line.strip():
                continue
            cols = line.rstrip("\n").split("\t")
            label_color[cols[0]] = cols[4]
    with open(cats) as f:
        lines = f.readlines()
    out = []
    for line in lines:
        if not line.strip():
            out.append(line)
            continue
        cols = line.rstrip("\n").split("\t")
        cols[1] = label_color[cols[0]]
        out.append("\t".join(cols) + "\n")
    with open(cats, "w") as f:
        f.writelines(out)
    print("updated .categories: " + cats)
EOF

##############################################################################
