#!/usr/bin/perl
# This is a test of a wincvs commit under NT
%settings = check_args(@ARGV);
if($settings{help}) {
print << 'EOH';
restructure.pl
restructures a ascii text file which is delineated into columns
separated by spaces or columns.
-r "[colnum[:tranform]]+" -> restructure
colnum = column number for the output file
transform = factor to transform the output by
exe: -r "3:10 4 5 1:.01 3" would generate an output with the 3rd, 4th,
5th, 1st and 3rd columns of the input with each element of the first
3rd column multiplied by 10 and each element of the original 1st
column (the 4th in the output) multiplied by .01.
-p -> pad
exe: -p would generate output columns padded to the length of the
longest element in the column
-d "separator" -> output column separator; default: " "
-i "filename" -> input filename
-s "rexex" -> input separator regex; default: ",\s+"
-x -> xmlize
exe: -> -x would generate output of the form:
123456
123456
-n "dataname" -> title for the data
exe: -n "2001Earnings" would put the string 2001Earnings as the name
property for the xml output or would use the table 2001Earnings as
the table to insert into in the database.
-b "[user][:pass]@[hostname][/dbname]" -> insert columns into database url
exe: will:mypass@localhost/samples will insert the columns into the
table specified by -n; the table must already be created and have
column names matching the columns of the input.
-h -> help
EOH
exit 0;
}
if($settings{filename}) {
if($settings{verbose}) {
print "Opening file: $settings{filename}\n";
}
open(STDIN, '<', $settings{filename}) or warn "Error opening: \"$settings{filename}\" not found\n.";
}
$line_count = 0;
while() {
chomp;
s/^\s+//;
if($settings{columns}) {
@line = split/$settings{input_separator}/;
for($i = 0; $i <= $#{$settings{columns}}; $i++) {
if($line[$settings{columns}[$i]{index}] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
$output[$line_count][$i] = $line[$settings{columns}[$i]{index}] * $settings{columns}[$i]{magnitude};
} else {
$output[$line_count][$i] = $line[$settings{columns}[$i]{index}];
}
}
} else {
$output[$line_count] = [ split/$settings{input_separator}/ ];
}
$line_count++;
}
if($settings{verbose}) {
print "$line_count line" . ($line_count != 1 ? "s" : "") . " read\n\n";
}
if($settings{column}) {
if($settings{pad}) {
for($i = 0; $i <= $#output; $i++) {
for($j = 0; $j <= $#{$output[$i]}; $j++) {
if($output[$i][$j] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
@parts = split /\./, $output[$i][$j];
if(@parts > 2) {
warn "\"$output[$i][$j]\" split into " . ($#parts + 1) . " parts\n";
}
$max_length[$j]{int} = max($max_length[$j]{int}, length $parts[0]);
$max_length[$j]{float} = max($max_length[$j]{float}, length $parts[1]);
}
$max_length[$j]{whole} = max($max_length[$j]{whole},
$max_length[$j]{int} + $max_length[$j]{float} + 1,
length $output[$i][$j]);
}
}
for($i = 0; $i <= $#max_length; $i++) {
$print_directive[$i]{float} = "%" . $max_length[$i]{whole} . "." . $max_length[$i]{float} . "f";
$print_directive[$i]{string} = "%" . $max_length[$i]{whole} . "s";
}
if($settings{verbose}) {
print "Max lengths of columns:\n";
for($i = 0; $i <= $#max_length; $i++) {
printf " %20s : %2d %6s : %2d.%02d %9s\n",
$output[0][$i],
$max_length[$i]{whole},
"($print_directive[$i]{string})",
$max_length[$i]{int},
$max_length[$i]{float},
"($print_directive[$i]{float})";
}
}
} else {
for($i = 0; $i <= $#{$output[0]}; $i++) {
$print_directive[$i]{float} = "%s";
$print_directive[$i]{string} = "%s";
}
}
for($i = 0; $i <= $#output; $i++) {
for($j = 0; $j <= $#{$output[$i]}; $j++) {
if($output[$i][$j] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
printf $print_directive[$j]{float}, $output[$i][$j];
} else {
printf $print_directive[$j]{string}, $output[$i][$j];
}
if($j != $#{$output[$i]}) {
print $settings{output_separator};
} else {
print "\n";
}
}
}
}
if($settings{xml}) {
print "\n";
for($i = 0; $i <= $#{$output[0]}; $i++) {
print " \n";
for($j = 1; $j <= $#output; $j++) {
if($output[$j][$i]) {
print " $output[$j][$i]\n";
} else {
print " \n";
}
}
print " \n";
}
print "\n";
}
if($settings{database}) {
if(!$settings{name}) {
warn "In order to insert into a postgresql database -n must be set\n";
} else {
use Pg;
$options = Pg::conndefaults();
foreach $option ("host", "user", "password", "dbname") {
$statement .= "$option=$settings{database}{$option} " if $settings{database}{$option};
}
if($settings{verbose}) {
print "Database: $statement\n";
}
$db = Pg::connectdb($statement);
die $db->errorMessage unless PGRES_CONNECTION_OK eq $db->status;
if($settings{database}{create}) {
$statement = "create table $settings{name} (";
for($i = 0; $i <= $#{$output[0]}; $i++) {
$statement .= "\"$output[0][$i]\" numeric";
if($i < $#{$output[0]}) {
$statement .= ", ";
} else {
$statement .= ");";
}
}
if($settings{verbose}) {
print " creating table with command: \"$statement\"\n";
}
$result = $db->exec($statement);
die $db->errorMessage unless PGRES_COMMAND_OK eq $result->resultStatus;
}
for($i = 1; $i <= $#output; $i++) {
$statement = "insert into $settings{name} values (";
for($j = 0; $j <= $#{$output[$i]}; $j++) {
$statement .= $output[$i][$j];
if($j < $#{$output[$i]}) {
$statement .= ", ";
} else {
$statement .= ");";
}
}
if($settings{verbose}) {
print " inserting values with command: \"$statement\"\n";
}
$result = $db->exec($statement);
die $db->errorMessage unless PGRES_COMMAND_OK eq $result->resultStatus;
}
}
}
sub max {
my $max = shift(@_);
foreach (@_) {
$max = $_ if $max < $_;
}
return $max;
}
sub check_args {
$settings{output_separator} = " ";
$settings{input_separator} = qr(,\s*);
for($i = 0; $i <= $#_; $i++) {
if($_[$i] =~ /-[h]/i) {
$settings{help} = 1;
} elsif($_[$i] =~ /-[v]/i) {
$settings{verbose} = 1;
} elsif($_[$i] =~ /-[x]/i) {
$settings{xml} = 1;
} elsif($_[$i] =~ /-[e]/i) {
$settings{exp} = 1;
} elsif($_[$i] =~ /-[p]/i) {
$settings{pad} = 1;
} elsif($_[$i] =~ /-[b]/i) {
$settings{database}{create} = 1;
$settings{database}{dbname} = "graph";
$settings{database}{user} = "will";
$settings{database}{password} = "";
$settings{database}{host} = "localhost";
$i++;
} elsif($_[$i] =~ /-[c]/i) {
$settings{column} = 1;
} elsif($_[$i] =~ /-[n]/i) {
$settings{name} = $_[$i + 1];
$i++;
} elsif($_[$i] =~ /-[i]/i) {
$settings{filename} = $_[$i + 1];
} elsif($_[$i] =~ /-[d]/i) {
$settings{output_separator} = $_[$i + 1];
$i++;
} elsif($_[$i] =~ /-[s]/i) {
$settings{input_separator} = qr($_[$i + 1]);
$i++;
} elsif($_[$i] =~ /-[r]/i) {
@cols = split /\s/, $_[$i + 1];
for($j = 0; $j <= $#cols; $j++) {
@args = split /:/, $cols[$j];
if(@args > 2) {
warn "\"$cols[$j]\" split into parts: (@args). More than one :? \n";
}
$settings{columns}[$j]{index} = $args[0] - 1;
$settings{columns}[$j]{magnitude} = (($args[1]) ? $args[1] : 1);
$count++;
}
$i++;
}
}
if($settings{verbose} && $settings{columns}) {
print "Setting column restructuring information; output columns:\n";
for($j = 0; $j <= $#{$settings{columns}}; $j++) {
print " output $j = input " . ($settings{columns}[$j]{index} + 1) . " * " . $settings{columns}[$j]{magnitude} . "\n";
}
}
if(!($settings{column} || $settings{xml} || $settings{database})) {
die "At least one output method has to be selected (-c, -x or -b)\n";
}
return %settings;
}