#!/usr/bin/env perl

# DO NOT EDIT the /cluster/bin/scripts copy of this file --
# edit ~/kent/src/hg/stsMarkers/sorttbl instead.

use strict;
use warnings;

$0 =~ s-.*/-- ;

# Modified by markd@soe.ucsc.edu to use gsort -g
# Modified by karplus@soe.ucsc.edu to look for gsort then sort

use English;
use File::Basename;

my $HelpInfo = <<EOH ;

	    RDB operator: $0

Usage:  $0  [options]  [-r]  column  [[-r]  column]  ...

Options:
    -c       Check that the rdbtable is sorted on the selected columns.
    -d       Dictionary order. Only letters, digits, and SPACE are significant
	     in comparisons.
    -f       Fold in lower case. Treat upper- and lower-case letters equally.
    -help    Print this help info.
    -i       Ignore characters outside the ASCII range 040-0176 in non-numeric
	     comparisons.
    -r       Reverse order. Applys to the following column only.
    -T dir   Place temporary files in directory 'dir'.
    -u       Make rows unique on selected columns.

Sorts an rdbtable on one or more columns. Each column may be sorted in normal
(ascending) or reverse (descending) order. Note that the '-r' on the command
line applies only to the next column; the absense of a '-r' means the next
column will sort in normal order.

Also a column of monthnames (Jan, Apr, ...) in any case letters, may be sorted.

This operator reads an rdbtable via STDIN and writes an rdbtable via STDOUT.
Options may be abbreviated.

NOTE: This version is modified to use gsort with the -g option for numerics
to support scientific notation.

EOH

my $CHK = 0;
my $sarg = "";
my $XBUG = 0;
my $cf = "";

if (scalar(@ARGV) < 1) {
    print $HelpInfo; exit 255;
}

while ( $ARGV[0] =~ /^-/ ){				# Get args
    $_ = shift ;
    if( /-c.*/ ){
	$CHK++ ;
	$sarg .= '-c ' ;  		# check sort order
	next ; }
    if( /-d.*/ ){			# Dictionary order
	$sarg .= '-d ' ; next ; }
    if( /-f.*/ ){			# fold up/low case
	$sarg .= '-f ' ; next ; }
    if( /^-h.*/ ){
	print $HelpInfo ; exit 1 ; }
    if( /-i.*/ ){			# Ignore bad characters
	$sarg .= '-i ' ; next ; }
    if( /-r.*/ ){			# descending order
	$cf .= 'r' ;
	$sarg .= '-r ' ; next ; }
    if( /-T.*/ ){			# temp dir
	$sarg .= '-T ' . shift(@ARGV) . " " ;
	next ; }
    if( /-u.*/ ){			# unique
	$sarg .= '-u ' ; next ; }
    if( /-x.*/ ){ $XBUG++ ; next ; }
    die "\nBad arg: $_\n", "For help type \"$0 -help\".\n" ; 
}
if( @ARGV ){
    $sarg =~ s/\-r// ; }	# unless reverse sort by entire rows
$|++ ;		# to force out 1st 2 lines (in pipe)

#  global variables
my @H;
my @F;
my $lln = 0;

while(<STDIN>){
    my $ADDCOL = 0;
    if( $lln < 2 ){
	print unless $CHK ;
	if( /^\s*#/ ){ next ; }		# comment
	chop ;
	@H = split( /\t/, $_ );
	if( ++$lln == 1 ){		# header line
	    @F = @H ;
	    &get_col_x ; }  	# chk column names
	else{				# definition line
	    $| = 0 ;
	    for my $arg ( @ARGV ){
		if( $arg =~ /^-r.*/ ){
		    $cf .= 'r' ;
		    next ; }
		for( my $f=0, my $g=1 ; $f <= $#F ; $f++, $g++ ){
		    if( $arg eq $F[$f] ){
			if( $H[$f] =~ /(\S+)/ && $1 =~ /N/i ){ # numeric
			    $cf .= 'g' ; }
			if( $H[$f] =~ /(\S+)/ && $1 =~ /M/i ){ # month
			    $cf .= 'M' ; }
			$sarg .= "+$f -$g$cf " ;
			$cf = '' ;
			last ; }
		}
	    }
	    if( $CHK && $sarg =~ /\+/ ){
		$ADDCOL++ ;
		my $f = @H ;
		my $g = $f +1 ;
		$cf = 'g' ;
		$sarg .= "+$f -$g$cf " ; }
	    print STDERR "sort arg:  -t\"\t\" $sarg\n" if $XBUG ;
	    
            # try gsort first for non-linux systems without GNU sort
	    my $arch = `uname -m`;
	    chomp $arch;
	    my $gsort = "/projects/compbio/bin/$arch/gsort";
            my $sortCmd;
            if (-e $gsort) {
                $sortCmd = $gsort;
            } else {
                $sortCmd = "sort";
            }
	    open( SS, "| $sortCmd -t\"\t\" $sarg" ) ||	
		die "\nCan't open pipe to gsort or sort\n" ;
	}
	next ;
    }
    if( $ADDCOL ){	# add dummy column for check option
	chop ;
	print SS $_, "\t", $., "\n" ; }
    else{		# normal case
	print SS $_ ; }
}
close( SS ) || die("sort failed");
sub get_col_x {		# get, chk column indexes, inc -v, die if bad column
			# uses @H, $INV, put indexes in @n.
			# modified for sorttbl.
    my $f;
    my $ok;
    my @nn;
    my $INV = 0;
    my @n;
    for my $arg (@ARGV){
	next if $arg =~ /^-/ ;
	for( $ok=$f=0 ; $f < @H ; $f++ ){
	    if( $arg eq $H[$f] ){	# match existing column
		$ok++ ;
		push( @n, $f );
		last ; }
	}
	die "\n$0: Bad column name: $arg\n" if ! $ok ;
    }
    if( $INV ){					# inverse option
	loop: for( $f=0 ; $f < @H ; $f++ ){
	    for my $i (@n){
		next loop if $i eq $f ; }
	    push( @nn, $f ); }
	@n = @nn ;
    }
}
