#!/usr/bin/perl
use warnings ;
use strict ;

# IdDiff -WRSomsky <somsky@uw.edu> 2017

# === IdDiff ==================================================================
#
# This script compares a "before" and "after" IdDump looking for any
# errors/anomalies from the expected IdChange results.
#
# NOTE: This script may take a *lot* of memory and CPU cycles
# for large id dumps.
#
# Usage:
#
#   IdDiff old-iddump new-iddump [id-map-file]
#
# Arguments & Options:
#
#   old-iddump		Old, pre IdChange id-dumpfile
#
#   new-iddump		New, post IdChange id-dumpfile
#
#   id-map-file		File defining the old-to-new uid/gid mappings.
#                       Will be found automatically if named 'IdDiff.map'
#                       in the same directory as the IdChange script.
#			May be a symlink to IdChange.map

# --- Command Options & Arguments ---------------------------------------------

use File::Basename ;
my $prog = basename $0 ;

my $Usage = <<EOD ;
Usage: $prog [-vq] old-iddump new-iddump [id-map-file]
EOD

die $Usage unless @ARGV == 2 or @ARGV == 3 ;

use Cwd qw[abs_path] ;
my $old_ids = abs_path $ARGV[0] ;
my $new_ids = abs_path $ARGV[1] ;
my $mapfile = $ARGV[2] || "$0.map" ; ## mapfile defaults to "$0.map"

# --- Utility -----------------------------------------------------------------

sub printable
  {
  $_ = shift ;
  s/[[:^graph:]\\]/sprintf("\\%02X",unpack("c",$&))/eg ;
  return $_ ;
  }

sub comma
  {
  scalar reverse join ',', unpack '(A3)*', scalar reverse ($_[0]) ;
  }

sub HMS
  {
  my $s = int (shift) || 0 ;
  my $m = int ($s/60) ; $s %= 60 ;
  my $h = int ($m/60) ; $m %= 60 ;
  return sprintf "%d:%02d:%02d", $h, $m, $s ;
  }

sub timestamp
  {
  use POSIX ;
  my $time = shift || time ;
  strftime "%F %T", localtime $time ;
  }

# --- Load UID/GID Maps -------------------------------------------------------

my %uid_map ;
my %gid_map ;

open MAP, '<', $mapfile
  or die "Open error: $!: $mapfile\n" ;

while (<MAP>)
  {
  chomp ;
  next if /^\s*$/ or /^\s*#/ ;
  if (/^([UG])(\d+)\s+(\d+)\s*$/)
    {
    $uid_map{$2} = $3 if $1 eq 'U' ;
    $gid_map{$2} = $3 if $1 eq 'G' ;
    }
  else
    {
    die "Bad UID/GID map line:\nl.$.: $_\n" ;
    }
  }

# --- Load IdDump Files -------------------------------------------------------

my %ids ;

# --- Load Old IdDump ---------------------------------------------------------

my $old_cnt ;

open OLD_IDS, '<', $old_ids
  or die "Open error: $!: $old_ids\n" ;
sysread OLD_IDS, $_, 2 or die "Read error on $old_ids: $!\n" ;

if ($_ eq "\x1f\x8b") # is a gzip file
  {
  open OLD_IDS, "/bin/zcat $old_ids |"	## Should be vetted against shell chars
    or die "Zcat error: $!: $old_ids\n" ;
  }
else # is not a gzip file
  {
  open OLD_IDS, '<', $old_ids
    or die "Open error: $!: $old_ids\n" ;
  }

print STDERR "## Reading Old IdDump file:\n   " ;

while (<OLD_IDS>)
  {
  chomp ;
  next if /^\s*$/ or /^\s*#/ ;
  print STDERR '.' if ++$old_cnt % 10_000 == 0 ;
  print STDERR "[@{[comma $old_cnt]}]\n   " if $old_cnt % 500_000 == 0 ;
  my @fields = split /\s+/ ;
  die "\n!! Bad line from old IdDump ($old_ids:$.):\n$_\n" unless @fields == 6 ;
  my ($type,$perm,$uid,$gid,$nlink,$name) = @fields ;
  $ids{$name}->[0] = [ $type, oct $perm, int $uid, int $gid, int $nlink ] ;
  }

print STDERR "[@{[comma $old_cnt]}]\n" ;


# --- Load New IdDump ---------------------------------------------------------

my $new_cnt ;

open NEW_IDS, '<', $new_ids
  or die "Open error: $!: $new_ids\n" ;
sysread NEW_IDS, $_, 2 or die "Read error on $new_ids: $!\n" ;

if ($_ eq "\x1f\x8b") # is a gzip file
  {
  open NEW_IDS, "/bin/zcat $new_ids |"	## Should be vetted against shell chars
    or die "Zcat error: $!: $new_ids\n" ;
  }
else # is not a gzip file
  {
  open NEW_IDS, '<', $new_ids
    or die "Open error: $!: $new_ids\n" ;
  }

print STDERR "## Reading New IdDump file:\n   " ;

while (<NEW_IDS>)
  {
  chomp ;
  next if /^\s*$/ or /^\s*#/ ;
  print STDERR '.' if ++$new_cnt % 10_000 == 0 ;
  print STDERR "[@{[comma $new_cnt]}]\n   " if $new_cnt % 500_000 == 0 ;
  my @fields = split /\s+/ ;
  die "\n!! Bad line from new IdDump ($new_ids:$.):\n$_\n" unless @fields == 6 ;
  my ($type,$perm,$uid,$gid,$nlink,$name) = @fields ;
  $ids{$name}->[1] = [ $type, oct $perm, int $uid, int $gid, int $nlink ] ;
  }

print STDERR "[@{[comma $new_cnt]}]\n" ;

# --- Sort Names --------------------------------------------------------------

print STDERR "## Sorting filenames ... " ;
my @names = sort keys %ids ;
print STDERR "[@{[comma scalar @names]}] Done\n" ;

# --- Display Differences -----------------------------------------------------

my $add_cnt = 0 ;
my $del_cnt = 0 ;
my $diff_cnt = 0 ;

for my $name (@names)
  {
  my $ids = $ids{$name} ;

  if ($ids->[0] and not $ids->[1]) # missing file
    {
    $del_cnt++ ;
    printf "- %s  %04o      %10u  %10u  %3d  %s\n", @{$ids->[0]}, $name ;
    }

  elsif ($ids->[1] and not $ids->[0]) # new file
    {
    $add_cnt++ ;
    printf "+ %s  %04o      %10u  %10u  %3d  %s\n", @{$ids->[1]}, $name ;
    }

  else # examine for differences
    {
    my ($ot,$op,$ou,$og,$on) = @{$ids->[0]} ;	# Old values
    my ($nt,$np,$nu,$ng,$nn) = @{$ids->[1]} ;	# New values

    # --- Predicted
    my $pt = $ot ;
    my $pp = $op ;
    my $pu = $uid_map{$ou} // $ou ;
    my $pg = $gid_map{$og} // $og ;
    my $pn = $on ;

    $pp &= ~04000 if ($pu != $ou) and $nt ne "d" ;
    $pp &= ~02000 if ($pu != $ou or $pg != $og) and $op & 010 and $nt ne "d" ;

    # -- Mismatches
    my $xt = $nt ne $pt ;
    my $xp = $np != $pp ;
    my $xu = $nu != $pu ;
    my $xg = $ng != $pg ;
    my $xn = $nn != $pn ;

    if ($xt or $xp or $xu or $xg or $xn)
      {
      $diff_cnt++ ;
      printf "! %s%s %04o%s %10u%s %10u%s %3d%s %s\n",
	$nt, $xt ? "|$pt" : ' ', 
	$np, $xp ? "|@{[sprintf '%04o', $pp]}" : ' ', 
	$nu, $xu ? "|$pu" : ' ', 
	$ng, $xg ? "|$pg" : ' ', 
	$nn, $xn ? "|$pn" : ' ', 
	$name ;
      }
    }
  }

print "## ADDS: $add_cnt\n" ;
print "## DELS: $del_cnt\n" ;
print "## DIFFS: $diff_cnt\n" ;

__END__ # =====================================================================
