2 # fit.c - experiments in curve fitting
3 # for pazpar'2 ranking normalizing
5 # We have a number of data points ( position, score) from
6 # different sources. The task is to normalize them so that
7 # they all fall near the curve y=1/p, where p is the position
8 # This is done by adjusting the ranks R so that Rn = aR+b
9 # We need to find parameters a,b so as to minimize the chi-
10 # squared difference from y=1/p
13 my $plotnr = 1; # number the tmp files for plotting
14 my $plotcmd = ""; # the plot commands for gnuplot
16 # Calculate the (squared) difference from the normalized rank to the 1/n function
19 # r = rank, not normalized
20 # a,b normalizing params
22 my ( $p, $r, $a, $b ) = @_;
23 my $rn = $r * $a + $b;
24 my $f = 1.0 / $p; # target value
29 # Read and process one data file
30 # Just one float number per line, nothing else
34 open F, $fn or die "Could not open $fn: $!\n";
35 my $n = 1; # number of data points
41 $title = $_ unless defined($title);
42 next unless /^[0-9]/; # skip comments etc
44 $first = $v unless defined($first);
46 #print "Data $n is $v\n";
49 $title =~ s/^[# ]+//; # clean the '#' and leading space
50 print "$fn: '$title' $n points: $first - $last \n";
51 # Initial guess Rn = a*R + b
54 # step sizes for a and b
61 # 5 sums: at (a,b) (a+,b), (a-,b), (a,b+), (a,b-)
62 my $sab = 0.0; # at a,b
63 my $sap = 0.0; # at a+da,b
64 my $sam = 0.0; # at a-da,n
65 my $sbp = 0.0; # at a, b+db
66 my $sbm = 0.0; # at a, b-db
67 for ( my $p = 1 ; $p < $n; $p++ ) {
68 $sab += diff( $p, $d[$p], $a, $b );
69 $sap += diff( $p, $d[$p], $a+$da, $b );
70 $sam += diff( $p, $d[$p], $a-$da, $b );
71 $sbp += diff( $p, $d[$p], $a, $b+$db );
72 $sbm += diff( $p, $d[$p], $a, $b-$db );
74 my $dif = $sab - $prev;
75 #print "iteration $iteration: a=$a +- $da b=$b +- $db chisq=$sab dif=$dif\n";
76 if ( (abs($da) < abs($a)/100.0 && abs($db) < abs($b)/100.0) ||
77 ($iteration >= 100 ) ||
78 (abs($dif) < 0.00001 ) ) {
79 print "it-$iteration: a=$a +- $da b=$b +- $db chisq=$sab dif=$dif\n";
84 if ( $sap < $sab && $sap < $sam ) {
86 } elsif ( $sam < $sab && $sam < $sap ) {
93 if ( $sbp < $sab && $sbp < $sbm ) {
95 } elsif ( $sbm < $sab && $sbm < $sbp ) {
104 my $pf = "/tmp/plot.$plotnr.data";
106 open PF, ">$pf" or die "Could not open plot file $pf: $!\n";
107 for ( my $p = 1 ; $p < $n; $p++ ) {
108 my $rn = $d[$p] * $a + $b;
112 $plotcmd .= "," if ($plotcmd);
113 $plotcmd .= "\"$pf\" using 1:2 with points title \"$title\"";
121 if ( !defined($ARGV[0]) ) {
122 die "Need at least one file to plot\n";
130 "set out \"plot.png\" \n" .
135 open GP, "| gnuplot" or die "Could not open a pipe to gnuplot: $!\n";