#!/usr/bin/perl
use warnings ;
use strict ;

# IdDump -WRSomsky <somsky@uw.edu> 2017

# === IdDump ==================================================================
#
# Given a starting directory, this script produces a (recursive) dump
# of the permission, owner, and group of all files contained beneath it,
# printing the (possibly compressed) results to standard out.
#
# Usage:
#
#   IdDump [-s] [-z] base-dir
#
# Arguments & Options:
#
#   base-dir	The base directory which to dump
#
#   -s		Process the contents of each directory in asciibetical
#		order.  (By default, uses unsorted, directory order.)
#
#   -z		Compress the output w/ gzip.  (Dumps can be quite large,
#		and are highly compressible.)
#

# --- Command Options & Arguments ---------------------------------------------

use File::Basename ;
my $prog = basename $0 ;

use Sys::Hostname ;
my $host = hostname ; $host =~ s/\.(phys|astro)\.washington\.edu$// ;

use Getopt::Long qw(:config gnu_getopt) ;

my $Usage = <<EOD ;
Usage: $prog [-s] [-z] base-dir
EOD

my $sort = 0 ;
my $gzip = 0 ;

GetOptions (
  's|sort' => \$sort,	# process sorted
  'z|gzip' => \$gzip,	# gzip the output
  ) or die $Usage ;

die $Usage unless @ARGV == 1 ;

use Cwd qw[abs_path] ;
my $basedir = abs_path $ARGV[0] ;

-d $basedir
  or die "Directory not found: $basedir\n" ;

# --- Utility -----------------------------------------------------------------

sub printable
  {
  $_ = shift ;
  s/[[:^graph:]\\]/sprintf("\\%02X",unpack("c",$&))/eg ;
  return $_ ;
  }

sub comma
  {
  scalar reverse join ',', unpack '(A3)*', scalar reverse ($_[0]) ;
  }

sub HMS
  {
  my $s = int (shift) || 0 ;
  my $m = int ($s/60) ; $s %= 60 ;
  my $h = int ($m/60) ; $m %= 60 ;
  return sprintf "%d:%02d:%02d", $h, $m, $s ;
  }

sub timestamp
  {
  use POSIX ;
  my $time = shift || time ;
  strftime "%F %T", localtime $time ;
  }

# --- Setup Output FileHandle -------------------------------------------------

my $out ;

if ($gzip)
  {
  open $out, '|/bin/gzip'
    or die "Cannot open gzip pipe: $!\n" ;
  ## ---
  ## use IO::Compress::Gzip qw[ $GzipError ] ;
  ## $out = new IO::Compress::Gzip "-",
  ##   or die "Cannot open gzip filter: $GzipError\n" ;
  }
else
  {
  open $out, '>&', STDOUT
    or die "Cannot create handle to STDOUT: $!\n" ;
  }

# --- Walk File Tree ----------------------------------------------------------

my $count ;

use File::Find ;
use File::stat ;

sub file_handler
  {
  $count++ ;
  my $path  = $File::Find::name  ;
  my $stat  = lstat $path
    or return print $out "#! Stat failed: $!: @{[printable $path]}\n" ; 

  my $perm  = $stat->mode & 07777 ;
  my $type  = $stat->mode >> 12 ;
  my $nlink = $stat->nlink ;
  my $uid   = $stat->uid ;
  my $gid   = $stat->gid ;

  if    ($type == 010) { $type = 'f' ; }
  elsif ($type == 012) { $type = 'l' ; }
  elsif ($type == 004) { $type = 'd' ; }
  elsif ($type == 014) { $type = 's' ; }
  elsif ($type == 006) { $type = 'b' ; }
  elsif ($type == 002) { $type = 'c' ; }
  elsif ($type == 001) { $type = 'p' ; }
  else                 { $type = '?' ; }

  printf $out "%s %04o %10u %10u %3d %s\n",
    $type, $perm, $uid, $gid, $nlink, printable $path ;
  }

my $start = time ;

my $find_opts = { wanted => \&file_handler, no_chdir => 1 } ;
$find_opts->{preprocess} = sub {sort @_} if $sort ;

find ( $find_opts, $basedir) ;

my $end = time ;

printf $out "## %s DONE [$host:$basedir]  %s / %s (%s/s)\n", 
  timestamp,
  comma ($count),
  HMS ($end - $start),
  $end <= $start ? "-" : comma int ($count/($end - $start)) ;

close $out ;

__END__ # =====================================================================
