#!/usr/bin/perl

=begin metadata

Name: sum
Description: display file checksums and block counts
Author: Theo Van Dinter, felicity@kluge.net
License:

=end metadata

=cut


#
# An implementation of the 'cksum' utility in Perl.  Written for the Perl
# Power Tools (PPT) project by Theo Van Dinter (felicity@kluge.net).

use strict;
use integer;

use File::Basename qw(basename);
use Getopt::Std qw(getopts);

use constant BUFLEN => 4096;
use constant EX_SUCCESS => 0;
use constant EX_FAILURE => 1;
use constant MASK32 => 0xffffffff;
use constant MASK16 => 0xffff;
use constant MASK8  => 0xff;

my $Program = basename($0);

my %opt;
help() unless getopts('a:o:', \%opt);

# crc default for cksum; bsd default for sum
my $alg;
if ($Program =~ m/cksum/) {
	$alg = \&crc32;
} else {
	$alg = \&sum1;
}
if (defined $opt{'a'}) {
	help() if defined $opt{'o'};
	my %codetab = (
		'blake224' => \&do_blake,
		'blake256' => \&do_blake,
		'blake384' => \&do_blake,
		'blake512' => \&do_blake,
		'crc'    => \&crc32,
		'jh224' => \&do_jh,
		'jh256' => \&do_jh,
		'jh384' => \&do_jh,
		'jh512' => \&do_jh,
		'haval256' => \&do_haval256,
		'md2'    => \&do_md2,
		'md4'    => \&do_md4,
		'md5'    => \&do_md5,
		'sha1'   => \&sha_all,
		'sha224' => \&sha_all,
		'sha256' => \&sha_all,
		'sha384' => \&sha_all,
		'sha512' => \&sha_all,
		'sha3-224' => \&do_sha3,
		'sha3-256' => \&do_sha3,
		'sha3-384' => \&do_sha3,
		'sha3-512' => \&do_sha3,
		'whirlpool' => \&do_whirlpool,
	);
	if (!exists($codetab{$opt{'a'}})) {
		warn "$Program: invalid algorithm name\n";
		help();
	}
	$alg = $codetab{$opt{'a'}};
}
if (defined $opt{'o'}) {
	if ($opt{'o'} ne '1' && $opt{'o'} ne '2') {
		warn "$Program: -o expects 1 or 2\n";
		exit EX_FAILURE;
	}
	if ($opt{'o'} eq '1') {
		$alg = \&sum1;
	} else {
		$alg = \&sum2;
	}
}

@ARGV = ( "-" ) unless ( @ARGV ); # STDIN if no files specified.

my $exitval = EX_SUCCESS;

foreach (@ARGV) {
	my $fh;
	if ($_ eq '-') {
		$fh = *STDIN;
	} elsif (-d $_) {
		warn "$Program: '$_' is a directory\n";
		$exitval = EX_FAILURE;
		next;
	} elsif (!open($fh, '<', $_)) {
		warn "$Program: failed to open '$_': $!\n";
		$exitval = EX_FAILURE;
		next;
	}
	my ($rval, $crc, $len) = $alg->($fh);
	unless ( defined($rval) && ($rval == 0) ) {
		warn "$Program: '$_': $!\n";
		$exitval = EX_FAILURE;
		next;
	}

	# Display output information
	if (defined $len) {
		printf "%lu %lu", $crc, $len;
	} else {
		print $crc, ' '; # for md5
	}
	if ($_ ne '-') {
		print " $_";
		close $fh;
	}
	print "\n";
}

exit $exitval;

sub sum1 {
	my($fh) = shift;
	my($crc) = my($len) = 0;
	my($buf,$num,$i);
	my @chars;

	while($num = sysread $fh, $buf, BUFLEN) {
		$len += $num;
		@chars = unpack 'C*', $buf;
		foreach $i (@chars) {
			if ($crc & 1) {
				$crc |= 0x10000; # get ready for rotating the 1 below
			}
			$crc = ($crc >> 1) + $i;
			$crc &= MASK16; # keep to 16-bit
		}
	}

	return $num,$crc,($len+1023)/1024; # round # of blocks up ...
}

sub sum2 {
	my($fh) = shift;
	my($crc) = my($len) = 0;
	my($buf,$num);

	while($num = sysread $fh, $buf, BUFLEN) {
		$len += $num;
		$crc += unpack("%32C*", $buf);
	}

	# crc = s (total of bytes)
	$crc = ($crc & MASK16) + ($crc & MASK32) / 0x10000; # r
	$crc = ($crc & MASK16) + ($crc / 0x10000); # cksum

	return $num,$crc,($len+511)/512; # round # of blocks up ...
}

sub do_md2 {
	my $fh = shift;

	eval {
		require Digest::MD2;
		1;
	} or do {
		die "The md2 algorithm is not available on your system\n";
	};
	my $ctx = Digest::MD2->new;
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_md4 {
	my $fh = shift;

	eval {
		require Digest::MD4;
		1;
	} or do {
		die "The md4 algorithm is not available on your system\n";
	};
	my $ctx = Digest::MD4->new;
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_md5 {
	my $fh = shift;

	require Digest::MD5;
	my $ctx = Digest::MD5->new;
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_haval256 {
	my $fh = shift;

	eval {
		require Digest::Haval256;
		1;
	} or do {
		die "The haval256 algorithm is not available on your system\n";
	};
	my $ctx = Digest::Haval256->new;
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_blake {
	my $fh = shift;

	eval {
		require Digest::BLAKE;
		1;
	} or do {
		die "The $opt{a} algorithm is not available on your system\n";
	};
	my $sz;
	if ($opt{'a'} =~ m/\Ablake([0-9]+)\Z/) {
		$sz = $1;
	} else {
		die "Unknown digest size: " . $opt{'a'};
	}
	my $ctx = Digest::BLAKE->new($sz);
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_sha3 {
	my $fh = shift;

	eval {
		require Digest::SHA3;
		1;
	} or do {
		die "The $opt{a} algorithm is not available on your system\n";
	};
	my $sz;
	if ($opt{'a'} =~ m/\Asha3\-([0-9]+)\Z/) {
		$sz = $1;
	} else {
		die "Unknown digest size: " . $opt{'a'};
	}
	my $ctx = Digest::SHA3->new($sz);
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_jh {
	my $fh = shift;

	eval {
		require Digest::JH;
		1;
	} or do {
		die "The $opt{a} algorithm is not available on your system\n";
	};
	my $sz;
	if ($opt{'a'} =~ m/\Ajh([0-9]+)\Z/) {
		$sz = $1;
	} else {
		die "Unknown digest size: " . $opt{'a'};
	}
	my $ctx = Digest::JH->new($sz);
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub do_whirlpool {
	my $fh = shift;

	eval {
		require Digest::Whirlpool;
		1;
	} or do {
		die "The whirlpool algorithm is not available on your system\n";
	};
	my $ctx = Digest::Whirlpool->new;
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

sub sha_all {
	my $fh = shift;

	require Digest::SHA;
	my $ctx = Digest::SHA->new($opt{'a'});
	$ctx->addfile($fh);
	return (0, $ctx->hexdigest, undef);
}

# does a bunch of ands to keep the answers within 32-bits
sub crc32 {
	my($fh) = shift;
	my($crc) = my($len) = 0;
	my($buf,$num,$i);
	my @chars;

	# crctable/crc32 alg converted from openbsd's cksum program ...
	my(@crctable) = (
		0x0,
		0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
		0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6,
		0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
		0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac,
		0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f,
		0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a,
		0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
		0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58,
		0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033,
		0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe,
		0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
		0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4,
		0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0,
		0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5,
		0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
		0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07,
		0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c,
		0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1,
		0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
		0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b,
		0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698,
		0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d,
		0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
		0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f,
		0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34,
		0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80,
		0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
		0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a,
		0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629,
		0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c,
		0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
		0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e,
		0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65,
		0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8,
		0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
		0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2,
		0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
		0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74,
		0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
		0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21,
		0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a,
		0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087,
		0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
		0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d,
		0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce,
		0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb,
		0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
		0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09,
		0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662,
		0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf,
		0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
	);

	while($num = sysread $fh, $buf, BUFLEN) {
		$len = ($len + $num) & MASK32;
		@chars = unpack 'C*', $buf;
		foreach $i (@chars) {
			my $j = $crc >> 24 ^ $i;
			$crc = $crc << 8 ^ $crctable[$j];
			$crc &= MASK32;
		}
	}

	my($rlen) = $len; # for reporting ...
	for(;$len!=0;$len>>=8) { # MSB first
		$i = ($crc >> 24) ^ ($len & MASK8);
		$crc = ($crc << 8) ^ $crctable[$i];
		$crc &= MASK32;
	}
	$crc = ~$crc;
	$crc &= MASK32;
	return $num, $crc, $rlen;
}

sub help {
	print "
usage: $Program [-a alg] [-o 1|2] [file ...]

 -a alg    Select algorithm: crc, md5, sha1, sha224, sha256, sha384, sha512
 -o alg    Select historic algorithm: 1 (BSD), 2 (SYSV)

Optional alorithms: blake224 blake256 blake384 blake512 jh224 jh256
  jh384 jh512 haval256 md2 md4 sha3-224 sha3-256 sha3-384 sha3-512
  whirlpool
";
	exit EX_FAILURE;
}

=head1 NAME

sum - display file checksums and block counts

=head1 SYNOPSIS

    sum [-a alg] [-o 1|2] [file ...]

=head1 DESCRIPTION

sum outputs three space separated values:  file checksum, file size, and
file name.  The output can be useful for indicating errors in transmitted files.
You should not use sum for security checks as they are easily fooled.
If no file names are specified, the standard input will be used.

=head1 OPTIONS

=over 4

=item -a blake224

BLAKE/224 algorithm (requires Digest::BLAKE)

=item -a blake256

BLAKE/256 algorithm (requires Digest::BLAKE)

=item -a blake384

BLAKE/384 algorithm (requires Digest::BLAKE)

=item -a blake512

BLAKE/512 algorithm (requires Digest::BLAKE)

=item -a crc

CRC32 algorithm

=item -a jh224

JH/224 algorithm (requires Digest::JH)

=item -a jh256

JH/256 algorithm (requires Digest::JH)

=item -a jh384

JH/384 algorithm (requires Digest::JH)

=item -a jh512

JH/512 algorithm (requires Digest::JH)

=item -a haval256

Haval256 algorithm (requires Digest::Haval256)

=item -a md2

MD2 algorithm (requires Digest::MD2)

=item -a md4

MD4 algorithm (requires Digest::MD4)

=item -a md5

MD5 algorithm

=item -a sha1

SHA-1 algorithm

=item -a sha224

SHA-2/256 algorithm

=item -a sha256

SHA-2/256 algorithm

=item -a sha384

SHA-2/384 algorithm

=item -a sha512

SHA-2/512 algorithm

=item -a sha3-224

SHA-3/224 algorithm (requires Digest::SHA3)

=item -a sha3-256

SHA-3/256 algorithm (requires Digest::SHA3)

=item -a sha3-384

SHA-3/384 algorithm (requires Digest::SHA3)

=item -a sha3-512

SHA-3/512 algorithm (requires Digest::SHA3)

=item -a whirlpool

Whirlpool algorithm (requires Digest::Whirlpool)

=item -o 1

Historic BSD algorithm

=item -o 2

Historic SYSV algorithm

=back

=head1 NOTES

sum returns 0 on success or 1 if an error occurred.

If the program was executed as "cksum" the default algorithm is CRC32,
otherwise the default algorithm is BSD.

The BSD and SYSV algorithms round up to the next block count for partial blocks.

The CRC32 algorithm was ported directly from OpenBSD cksum C source code.

=head1 HISTORY

Perl version rewritten for the Perl Power Tools project from the
description of the cksum program in OpenBSD.

=head1 AUTHOR

Theo Van Dinter (felicity@kluge.net)

=head1 SEE ALSO

md5sum(1)
