#!/usr/bin/perl -w
# DO NOT EDIT the /cluster/bin/scripts copy of this file -- 
# edit ~/kent/src/utils/lodToBedScore instead.

# $Id: lodToBedScore,v 1.4 2004/10/11 00:18:03 acs Exp $

use strict;

# This script does Adam's log transform of input scores into the bed score 
# "spectrum" for browser display: 0-1000.  The transform is 
# f(x) = a * log x + b, s.t. f(x_med) = 300 and f(x_max) = 1000.  
# where x_max is the max input score and x_med is the median input score.
# Solving for a and b, you obtain 
# b = (300 log x_max - 1000 log x_med) / (log x_max - log x_med)
# a = (1000 - b) / log x_max.  

sub usage {
  die "
Usage: $0 [input.bed]
    Transform scores in input.bed (which must have at least 5 columns,
    whitespace-delimited) by the function
    f(x) = a * log x + b,  where f(x_med) = 300 and f(x_max) = 1000
    to stdout.
    Inputs must be positive, and max must be greater than median,
    in order for this transform to work.
\n";
}

if (grep /^-h/, @ARGV) {
  &usage();
}

# slurp input into memory (duplicate scores so they can be sorted separately):
my @lines = ();
my @scores = ();
while (<>) {
  next if (/^#/);
  chomp;
  my @words = split();
  if (scalar(@words) < 5) {
    die "\nError: input must have at least 5 columns. (input line $.)\n\n";
  }
  if ($words[4] <= 0) {
    printf STDERR "\nWarning: original scores must be positive; value of %f will be transformed to 0 (input line $.)\n\n", $words[4];
  }
  else {			# don't push non-positive scores
    push @scores, $words[4];
  }
  push @lines, \@words;
}

# sort scores; extract and check max and median.
@scores = sort {$a <=> $b} @scores;
my $n = scalar(@scores);
my $max = $scores[$n-1];
my $med = $scores[int($n / 2)];

if ($max <= $med) {
  die "
Error: max score ($max) must be greater than median ($med) 
in order for this transform to apply.
\n";
}

# calculate coefficients a and b.
my $b = ( ((300 * log($max)) - (1000 * log($med))) /
	  (log($max) - log($med)) );
my $a = (1000 - $b) / log($max);

# Do the transform on the score column of saved input:
foreach my $wRef (@lines) {
  my @words = @{$wRef};
  if ($words[4] > 0) { 
    $words[4] = int(($a * log($words[4])) + $b);
  }
  if ($words[4] < 0) { $words[4] = 0 };
				# applies either if $words[4] was
				# originally negative or if it is made
				# negative by the transformation
  print join("\t", @words) . "\n";
}

