#!/usr/bin/perl
########################################################################
#
# measure -- get the raw numerical data (nuclear F1 and F2, offglide
#            F1 and F2) from the formant tracks.
#
#
# Invocation:  perl measure FILE 'SPEAKER-CODE' BW
#
# Where FILE is the .wav filename and BW is the widest permitted bandwidth
# for a formant ("formants" with smaller BW will be ignored).  SPEAKER CODE
# is the speaker code that will be put on every line.
#
# Input:  FILE.TextGrid, File.FormantTier
# where the Text Grid tiers are
#  1.  Words  (interval tier, word boundaries, labelled with words)
#  2.  Diphthongs (interval tier, diphthong boundaries, labelled with words)
#  3.  Overrides (point tier, points marked "F1max" or whatever, to make
#      the pgm put F1max where you want it)
#  4.  Landmarks (point tier; the pgm's guesses for F1max, F2max, F2min)
#
# Output format (FILE.dat)
#
# SPEAKER-CODE Word Nuc NucF1 NucF2 F2max F2maxF1 F2maxF2
#
# E. Moreton  *  UNC-Chapel Hill Linguistics  *  Last update 2004 July 6
########################################################################

($File, $Spkr, $CRAZY_BW) = @ARGV;

{    
    ($Frame1Time, $FrameLength) = getFrameLength();

    open (TEXTGRID, "<./$File.TextGrid") || 
	die "measure: Can't open $File.TextGrid";
    getDiphthongs ();
    getOverrides();
    close (TEXTGRID);

    open (FORMANTS, "<./$File.FormantTier") ||
	die "measure: Can't open $File.FormantTier";
    open (DAT, ">./$File.dat") ||
	die "measure: Can't open $File.dat";

    open (CHECK, ">./$File.Check.TextGrid") ||
	die "writeCheck:  Couldn't create $File.Check.TextGrid";
    copyTextGrid ();         # Copy input text grid to .Check.TextGrid;

    $WordNum = 0;
    foreach $Diphthong (@Diphthongs) {
	($Word, $StartFrame, $EndFrame) = split / /, $Diphthong;
	$DataRef = getData ($StartFrame, $EndFrame);
	writeData ($WordNum++, $Word, $DataRef);
    }

    close (FORMANTS);
    close (DAT);
}


# Get frame-time correspondences
sub getFrameLength {
    my $FrameLength, $Frame1Time, $Frame2Time;
    open (FRAMES, "cat $File.FormantTier | egrep '^[ ]+time = [0-9]+\.[0-9]+'|") ||
	die "measure: getFrameLength: Can't open $File.FormantTier";

    $_ = <FRAMES>;
    ($foo, $Frame1Time) = split / = /, $_;
    $_ = <FRAMES>;
    ($foo, $Frame2Time) = split / = /, $_;
    $FrameLength = $Frame2Time - $Frame1Time;

    ($FrameLength > 0) ||
	die "measure: getFrameLength: Formant Tier frames appear to have zero length.  Is the FT file corrupt?";
    
    ($Frame1Time + 0, $FrameLength);
}



sub TimeToFrame {
    my $Time = shift (@_);
    int ( ($Time - $Frame1Time) / $FrameLength);
}


sub FrameToTime {
    my $Frame = shift (@_);
    $Frame * $FrameLength + $Frame1Time;
}



sub getDiphthongs {

    # Diphthong tier (interval tier, beg. and end of each diphthong, labelled
    # with word) gets stored as array of "word first_frame last_frame".

    # Scan to beginning of Diphthong tier in file
    while ($_ = <TEXTGRID>) {
	if (eof (TEXTGRID)) {
	    die "measure: getDiphthongs: Couldn't find Diphthong tier in $File.TextGrid";
	}
	last if (/Diphthong/);
    }
    
    # Read it in
    my $i = 0;
    while ($_ = <TEXTGRID>) {

	# Quit when we get to the Overrides tier
	last if (/item \[3\]/);
	
	# Find next interval specification
	next unless (/intervals \[([0-9]*)\]:/);
	my %Buf = {};
	
	# Get beginning, end, and label
	foreach my $var ('xmin', 'xmax', 'text') {
	    $_ = <TEXTGRID>;
	    s/\"//g; s/\n//g; tr/A-Z/a-z/;
	    my ($foo, $varname, $value) = split /\s+=*\s*/;
	    if ($var ne $varname) {
		die "measure:getDiphthongs:$File:Expected $var, found $varname in interval $i";
	    }
	    $Buf {$var} = $value;
	}
	
	# Non-empties only
	if ($Buf {"text"}) {
	    push @Diphthongs, $Buf {text} . ' ' . 
		TimeToFrame ($Buf {xmin}) . ' ' .
		TimeToFrame ($Buf {xmax});
	}
    }
}

sub getOverrides {

    # Override tier (point tier, each pt. labelled "F1max" or "F2max"
    # or "F2min") gets stored as an array where the index is the frame
    # number and the contents are a space-delimited concatenation of 
    # the labels
    
    # Scan to beginning of Overrides tier in file
    while ($_ = <TEXTGRID>) {
	if (eof (TEXTGRID)) {
	    die "measure: getOverrides: Couldn't find Overrides tier in $File.TextGrid";
	}
	last if (/Overrides/);
    }
    
    # Read it in
    my $i = 0;
    while ($_ = <TEXTGRID>) {

	last if (/Landmarks/);
	
	# Find next point specification
	next unless (/points \[([0-9]*)\]:/);
	my %Buf = {};
	
	# Get beginning, end, and label
	foreach my $var ('time', 'mark') {
	    $_ = <TEXTGRID>;
	    s/\"//g; s/\n//g; tr/A-Z/a-z/;
	    my ($foo, $varname, @values) = split /\s+=*\s*/;
	    if ($var ne $varname) {
		die "measure:getOverrides:$File:Expected $var, found $varname in interval $i";
	    }
	    $Buf {$var} = join ' ', @values;
	}
	
        # Non-empties only
        if ($Buf {"mark"}) {
	    $Overrides [TimeToFrame ($Buf {time})] .= $Buf {mark} . ' ';
        }
    }
}

sub getData {
    my ($StartFrame, $EndFrame) = @_;
    my %Data;

    @Tracks = getFormantTracks ($StartFrame, $EndFrame);

    foreach my $i (1, 2) {
	my $FormantPointName = "F$i" . 'max';
	my $Frame;

	# Locate F1max and F2max (frame numbers); process time overrides
	($Frame = 
	 getOverrideFrame ($i, 'max', $StartFrame, $EndFrame)) ||
	 ($Frame  =
	  getExtremeFrame ($i, 'max', @Tracks) + $StartFrame);    

	$Data {$FormantPointName} = $Frame;

	# Get relevant formant value at point
	foreach my $j (1, 2) {
	    $Data {$FormantPointName . "F$j"} = 
		getFormant ($j, $Tracks [$Frame - $StartFrame]);

	    # Process formant-value overrides, e.g., "F1=451"
	    if ($Overrides [$Frame] =~ /f$j\=[0-9\.]+/) {
		my ($fore, $aft) = split /f$j\=/, $Overrides [$Frame];
		my ($fval, $foo) = split /\s+/, $aft;
		$Data {$FormantPointName . "F$j"} = $fval;
	    }
	}
    }

    \%Data;
}


sub getFormantTracks {

    # Get formant tier between specified frames, inclusive.
    # Returns an array originating at 0, so you'll need to add $StartFrame
    # to any subscript to recover the actual formant-tier frame number.

    my ($StartFrame, $EndFrame) = @_;
    my @Tracks, @Formants, $Frame;

    for ($Frame = $StartFrame; $Frame <= $EndFrame; $Frame++) {
	push @Tracks, readFormantFrame ($Frame);
    }

    @Tracks;
}


sub readFormantFrame {
    my $Frame = $_[0];

    while ($_ = <FORMANTS>) {
	last if (/^points \[$Frame\]:/);
    }

    $_ = <FORMANTS>;
    $_ = <FORMANTS>;  /numberOfFormants/ ||
	die "measure:getNextFrame:$File: Expected numberOfFormants but found $_ at time $time";
    my ($foo, $Fmts) = split /\s+=\s* /;
    
    # Read all formants and associated bandwidths
    my @Fs = getFormantTier ('formant', $Fmts);
    my @Bs = getFormantTier ('bandwidth', $Fmts);
    
    # Eliminate formants with crazy bandwidths
    my @Formants = ();
    my $F; my $B;
    while (@Fs) {
	$F = shift (@Fs); $B = shift (@Bs);
	next if ($B > $CRAZY_BW);
	push @Formants, "$F $B";
    }
    
    \@Formants;
}


sub getFormantTier {
# Read one point (formants and bandwidths) from the Formant Tier

    my ($type, $Fmts) = @_;
    my @vals = (), $foo;

    $_ = <FORMANTS>; /$type \[\]/ ||
	die "measure:getFormantTier:$File: Expected $type but found $_ at time $time";

    for (my $i = 0; $i < $Fmts; $i++) {
	$_ = <FORMANTS>; /$type \[$i\] = / ||
	    die "measure:getFormantTier:$File: Expected $type but found $_ at time $time";
        ($foo, $vals [$i]) = split /=/;
    }

    @vals;
}


sub getExtremeFrame {
    my ($Formant, $Extremum, @Tracks) = @_;

    if ($Formant !~ /[1-6]/ || $Extremum !~ /(min|max)/) {
	die "measure: getExtremeFrame: What kind of a formant extremum is F$Formant$Extremum ?!";
    }

    my $extFrame = 0;  my $extFormant = ${$Tracks [0]}[$Formant -1];
    my $currFormant;
    for (my $Frame = 0; $Frame <= $#Tracks; $Frame++) {
        $currFormant = ${$Tracks [$Frame]}[$Formant -1];
	if ($Extremum =~ /min/) {
	    next if ($currFormant >= $extFormant);
        } else {
	    next if ($currFormant <= $extFormant);
        }
	$extFrame = $Frame;
        $extFormant = $currFormant;
    }

    $extFrame;
}


sub getOverrideFrame () {
    my ($Formant, $Extremum, $StartFrame, $EndFrame) = @_;
    my $Frame = 0;

    for (my $i = $StartFrame; $i <= $EndFrame; $i++) {
	if ($Overrides [$i] =~ /f$Formant$Extremum/) {
	    $Frame = $i;
	    last;
	}
    }
    $Frame;
}


sub getFormant {
    # Extract a formant value from the specified frame of @Tracks

    my ($Fnum, $TrackFrame) = @_;
    my ($F, $B) = split ' ', $ {$TrackFrame} [$Fnum - 1];

    $F;
}





# Write the results
sub writeData {
    my ($WordNum, $Word, $DataRef) = @_;

    print DAT "$Spkr ";
    printf DAT "%-10s ", $Word;

    my $ORs = '';              # Indicate overrides in output
    foreach my $i (1, 2) {
	my $maxFrame = $ {$DataRef} {"F$i".'max'};
	my $maxTime = FrameToTime ($maxFrame);
	my $F1 = $ {$DataRef} {"F$i".'max'.'F1'};
	my $F2 = $ {$DataRef} {"F$i".'max'.'F2'};

	printf DAT "%7.3f ", $maxTime;
	printf DAT "%4d %4d ", $F1, $F2;

	$ORs .= $Overrides [$maxFrame];

	writeLandmarksTier ($WordNum, $i, $maxFrame, $maxTime, $F1, $F2);
    }

    print DAT "$ORs\n";

}

# Make a TextGrid object for checking the results
sub copyTextGrid {

    # Copy the original TextGrid, but without its Landmarks tier

    open (TEXTGRID, "<./$File.TextGrid") ||
	die "writeCheck:  Couldn't reopen $File.TextGrid";
    while ($_ = <TEXTGRID>) {
	print CHECK $_;
	last if (/points: size/);    # Start of Overrides tier
    }
    while ($_ = <TEXTGRID>) {
	last if (/points: size/);    # Start of Landmarks tier
	print CHECK $_;
    }

    print CHECK "        points: size = ", 2*(1+$#Diphthongs), "\n";

    close (TEXTGRID);
}


sub writeLandmarksTier {

    # Print info on Formant $i max to .Check.TextGrid

    my ($WordNum, $i, $maxFrame, $maxTime, $F1, $F2) = @_;
    my $pointNum = 2 * $WordNum + $i - 1;

    print CHECK "        points [$pointNum]:\n";	
    printf CHECK "            time = %7.3f\n", $maxTime;
    printf CHECK "            mark = " . '"F%1dmax %4d %4d"'."\n", $i, $F1, $F2;
}    
