#! /usr/bin/perl
#
# "unp" runs the correct unpack program depending on the file extension
# of the given parameter.
# 
# Author: Eduard Bloch <blade@debian.org>, 2010
#
# UI modelled after unp versions 1.x by Eduard Bloch (2000-2009) and original
# unp by Andre Karwath (1997).
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# -----------------------------------------------------------------------
# If you make changes to this script, please feel free to forward the new 
# version to the author.
# -----------------------------------------------------------------------

require 5.002;

use Getopt::Long qw(:config no_ignore_case bundling pass_through);
use File::Basename;
use Cwd;

use strict;
use warnings;
#use diagnostics;

my @ifiles;
my @xargs;
my @tools;
my $retcode=0;

my $opt_quiet;
my $opt_debug;
my $opt_help;
my $opt_force;
my $opt_formats;
my $opt_umode;
my $opt_umode_smart;
my $opt_verbose;

BEGIN {
  eval 'use Locale::gettext';
  if ($@) {
    eval q{ sub gettext { return shift; } };
  } else {
    textdomain('unp');
  }
}

my $catmode = $0=~/(^|\/ucat)$/;

&init_formats;
&parse_cli;
&process_files;
print STDERR "WARNING: There were errors while processing files!\n" if $retcode;
print STDERR "retcode: $retcode\n" if $opt_debug;
# return something meaning, i.e. it may be single unpacker's only return code
# if the status is bad but the value is zero, return at least 1
exit ( $retcode ? ( $retcode < 256 ? $retcode : 1 ) : 0 );

sub show_help 
{
printf(gettext("
USAGE:
   %s [ options ] file [ files... ]
   file: compressed file(s) to expand/extract

   Use -- [ ARGUMENTS ] to pass arguments to external programs, eg. some tar options:
   unp fastgl.tgz xmnt.tgz -- -C /tmp

   Options:
   -f Continue even if program availability checks fail or directory collision occurs
   -u Special helper mode.
      For most archive types:
      - create directory <filename without suffix>/
      - extract contents there
      For Debian/Ubuntu packages:
      - extract data.tar.gz after each operation in local directory
      - extract control.tar.gz into control/<package_version_arch>/
   -U Smart mode, acts like -u (see above) if archive contains multiple
      elements but if there is only one file/directory element then it's stored 
      in the current directory.
   -s Show the list of supported formats
   -v More verbosity
   -h Show this help
"), $0) if !$catmode;
printf(gettext("
USAGE:
   %s [ options ] file [ files... ]
   Uncompress multiple files to STDOUT

   Options:
   -s  Show the list of supported formats
   -h  Show this help
   -v  More verbosity, to STDERR
"), $0) if $catmode;
exit 1;
}


sub init_formats
{
   my $sh="/bin/sh";

# Database format: 
# - providing packages (human readable)
# - filename suffix,
# - libmagic (file tool) pattern, 
# - process flags (bitfield), 
# - array of command sets (=arrays of command and arguments). If the first arg
#   of the command set is an array then it's interpreted as a list of
#   requirements which need to be checked.
#
# Flag values:
# 2:  retry other tools calls if the first candidate failed
# 4:  pass source and extra args in different order (src args...), good for
#     shell commands using $0 and $@
# 8:  stream filters, contents may be extracted if tar format is detected. Only
#     simple syntax allowed.
# 16: append suggested target name to calling arguments
use constant 
{
   TRY_OTHER_TOOLS => 2,
   SHELL_STYLE_ARGS => 4,
   IS_STREAM_FILTER => 8,
   APPEND_GEN_NAME => 16
};

   @tools = (
      [ "tar", "tar", "tar.archive", 0,
      [ "tar", "-x", "-v", "-f"]
      ],

      # shortcuts, no extra scripting needed
      [ gettext("tar with gzip"), "tgz|tar.gz", undef, 0,
      [[ "gzip" ], "tar",  "-z", "-x", "-v", "-f"]
      ],

      [ gettext("tar with bzip2"), "tar.bz2|tbz2", undef, 0,
      [ [ "bzip2" ], "tar", "--bzip2", "-x", "-v", "-f"]
      ],

      [ gettext("tar with xz-utils"), "tar.xz|txz", undef, 0,
      
      [ [ "xz" ], "tar", "--xz", "-x", "-v", "-f"]
      ],

      [gettext( "tar with lzip"), "tar.lzip", undef, 0,
      
      [[ "lzip" ], "tar", "--lzip", "-x", "-v", "-f"]
      ],

      [gettext( "tar with lzop"), "tar.lzop|tzo", undef, 0,
      
      [[ "lzop" ], "tar", "--lzop", "-x", "-v", "-f"]
      ],

      [gettext( "tar with compress"), "tar.z", undef, 0,
      
      [ [ "compress" ], "tar", "-Z", "-x", "-v", "-f"]
      ],

      # XXX: that's ok for now but if support for other unpackers is needed
      # (like multithreaded implementations) than ucat code needs to be
      # extended to check them
      [ "gzip", "gz", "gzip.compressed.data", IS_STREAM_FILTER,
      [ "gzip", "-cd" ]
      ],

      [ "bzip2", "bz2", "bzip2.compressed", IS_STREAM_FILTER,
      [ "bzip2", "-cd" ]
      ],

      [ "lzop", "lzo", "lzop.compressed", IS_STREAM_FILTER,
      [ "lzop", "-cd" ]
      ],

      [ "xz-utils", "xz", "xz.compressed", IS_STREAM_FILTER,
      [ "xzcat" ]
      ],

      [ "lzip", "lz", "lzip.compressed", IS_STREAM_FILTER,
      [ "lzip", "-cd" ]
      ],

      [ gettext("xz-utils or lzma"), "lzma", "lzma.compressed", IS_STREAM_FILTER|TRY_OTHER_TOOLS,
      [ "xzcat" ],
      [ "lzcat" ]
      ],

      [ gettext("cpio or afio"), "cpio|afio",  "cpio", SHELL_STYLE_ARGS,
      [ "afio", "-Z", "-v", "-i" ],
      [ ["cpio"], $sh, "-c", 'cpio -i -d	--verbose "$@" < "$0"' ]
      ],

      [ gettext("rpm2cpio and cpio"), "rpm", 'PPM\ v', SHELL_STYLE_ARGS,
      [ ["rpm2cpio", "cpio"], $sh, "-c", 'rpm2cpio < "$0" | cpio -i -d	--verbose "$@"' ]
      ],

      [ gettext("formail and mpack"), "mbox", "(mail.text)|news", SHELL_STYLE_ARGS,
      [ ["formail","munpack"], $sh, "-c", 'formail -s munpack "$@" < "$0"' ]
      ],

      [ gettext("libchm-bin or archmage"), "chm", "Windows HtmlHelp Data", APPEND_GEN_NAME,
      [ 'extract_chmLib' ],
      [ 'archmage']
      ],

      [ gettext("rar or unrar or unrar-free"), "rar",  "RAR.*archive", 0,
      [ "rar", "x" ],
      [ "unrar", "x" ]
      ]
      ,
      [ "binutils", "ar|deb", "(Debian binary package|\ ar.*archive)", 0,
      [ "ar", "-x", "-v" ]
      ]
      ,
      [ "unzip", "zip|cbz|cbr|jar|war|ear|xpi|adf", "Zip.*archive", 0,
      [ "unzip" ]
      ]
      ,
      [ "lha", "lha|lzh", "LHa.*archive", 0,
      [ "lha", "x" ]
      ]
      ,
      [ "arj", "arj", "ARJ.*archive", 0,
      [ "arj", "x" ],
      [ "unarj", "x" ],
      ]
      ,
      [ "ppmd", "pmd", "PPmd.*archive", 0,
      [ "PPMd", "x" ]
      ]
      ,
      [ "zoo", "zoo", "Zoo.*archive", 0,
      [ "unzoo", "-x" ]
      ]
      ,
      [ "sharutils", "shar", "shell.*archive", 0,
      [ "unshar" ]
      ]
      ,
      [ "sharutils", "uu", "uuencoded", 0,
      [ "undecode" ]
      ]
      ,
      [ "tnef", "dat", "Transport Neutral Encapsulation Format", 0,
      [ "tnef", "-v" ]
      ]
      ,
      [ gettext("p7zip or p7zip-full"), "7z", "7-zip.*archive", 0,
      [ "7z", "x" ]
      ]
      ,
      [ "cabextract", "cab", "CAB file", 0,
      [ "cabextract" ]
      ]
      ,
      [ "unace", "ace", "ACE.*archive", 0,
      [ "unace", "e" ]
      ]
      ,
      [ "xdms", "dms", "DMS.*archive", 0,
      [ "xdms", "x" ]
      ]
      ,
      [ "unlzx", "lzx", "LZX.*archive", 0,
      [ "unace", "e" ]
      ]
      ,
      [ "macutils", "sea|sea\.bin", "SEA.*archive", 0,
      [ "macutils", "-v" ]
      ]
      ,
      [ "macutils", "hqx", "BinHex.binary", 0,
      [ "hexbin", "-v" ]
      ]
      ,
      [ "maybe orange or unzip or unrar or unarj or lha ", "exe", "executable", 3,
      [ "orange" ],
      [ "unzip" ],
      [ "unrar", "x" ],
      [ "rar", "x" ],
      [ "arj", "x" ],
      [ "lha", "x" ]
      ]

);
}

sub show_formats
{
   print "Known archive formats and tools:\n";
   my %t;
   my $len=0;
   foreach my $line (@tools)
   {
      my @dset = @$line;
      my $sux = $dset[1];
      $sux=~s/\|/,/g;
      $t{$sux}=$dset[0];
      $len=length($sux) if(length($sux)>$len && length($sux)<20);
      #print $sux . ":\t\t$dset[0]\n";
   }
   foreach (sort (keys %t))
   {
      print "$_:";
      my $diff=$len-length($_);
      print " "x$diff." $t{$_}\n";
   }
   exit 1;
}

sub parse_cli
{
   my %options = (
      "q|quiet"                => \$opt_quiet,
      "d|debug"                => \$opt_debug,
      "h|help"                => \$opt_help,
      "f|force"               => \$opt_force,
      "s|show-formats"        => \$opt_formats,
      "u|to-subdir"           => \$opt_umode,
      "U|smart-subdir"        => \$opt_umode_smart,
      "v|verbose"             => \$opt_verbose
   );
   &show_help unless ( GetOptions(%options));
   &show_help if ($opt_help);
   &show_formats if ($opt_formats);
   while(@ARGV)
   {
      if("--" eq $ARGV[0])
      {
         shift(@ARGV);
         @xargs = @ARGV;
         last;
      }
      push(@ifiles, shift(@ARGV));
   }
   print STDERR join(" ; ", "ifiles", @ifiles, "xargs", @xargs, "argv", @ARGV, "\n") if $opt_debug;
}

sub try_unarch
{
   my $ifile=shift;
   my $magicdata=shift;
   print STDERR "magic string: $magicdata\n" if $opt_debug && $magicdata;
   UNPIFILE: foreach my $line (@tools)
   {
      my ($name, $suxARG, $patARG, $cmdflags) = @$line;

      print STDERR "hm, $magicdata vs. $patARG\n" if ( $opt_debug && $magicdata);

      # needs magic data to test against
      next if(defined($magicdata) && !defined($patARG));

      next if($catmode && ! ($cmdflags & IS_STREAM_FILTER));

      if(
         (defined($magicdata) && $magicdata=~/$patARG/i)
         || ( !defined($magicdata) && $ifile =~ /.*\.($suxARG)$/i) )
      {
         print STDERR "got unpacker description for $ifile\n" if $opt_debug;
         my $misscount=0;
         my @dset = @$line;

         TOOL: foreach my $pArgs (@dset[4..$#dset])
         {
            my @args=@$pArgs;
            my @prqs = ($args[0]);

            # there is a list of prqs prepended, use that one and weed out the ref
            @prqs=@{shift(@args)} if( ref($args[0]) eq "ARRAY");

            my $misscountCur=0;
            foreach(@prqs)
            {
               if(!which($_))
               {
                  $misscountCur++;
                  $misscount++;
               }
            }

            # if all tools are here? Let's start...
            if(! $misscountCur)
            {
               my $rcodeprev=$retcode;
               $retcode += ($catmode ? &cat_one($ifile, $line, @args) : &unpack_one($ifile, $line, @args));
               
               return 1 if($rcodeprev == $retcode);

               # try other tools if hinted but never in cat mode in order to prevent data corruption
               return 0 if($catmode);
               next TOOL if($cmdflags & TRY_OTHER_TOOLS);
            }
         }
         if($misscount)
         {
            print STDERR gettext("Error, following packages must be installed in order to proceed:\n").$name."\n";
            exit 1;
         }
      }
   }
   return 0;
}

sub getmagic
{
   my $path=shift;
   print STDERR "getting magic value from $path\n" if $opt_debug;
   if(open(my $fd, "-|", "file", "-L", $path))
   {
      my $fileret=scalar <$fd>;
      #print STDERR "got: $fileret\n" if $opt_debug;
      close $fd;
      chomp $fileret;
      return $fileret;
   }
   return "G.N.D.N.";
}

sub cat_file
{
   my $file=shift;
   if(open(my $fd, $file) )
   {
      my $buf;
      while(my $len=sysread($fd, $buf, 1<<16, 0))
      {
         my $off=0;
         my $res;
         while($res=syswrite(STDOUT, $buf, $len, $off))
         {
            die "Failed to print: $!\n" if !defined($res);
            last if 0==$res;
            $off+=$res;
            last if $off>=$len;
         }
      }
      close $fd || $retcode++;
   }
   else { $retcode++; }
}

sub process_files
{
   IFILE:
   foreach my $ifile (@ifiles)
   {
      if(!-r $ifile)
      {
         printf STDERR gettext("Cannot read %s, skipping...\n"), $ifile;
         $retcode++;
         next IFILE;
      }

      next if (&try_unarch($ifile) or &try_unarch($ifile, getmagic($ifile)));

      printf STDERR gettext("Failed to detect file type of %s.\n"), $ifile;

      # print the file as is in cat mode, otherwise remember that problem
      $retcode += ($catmode ? system("cat", $ifile) : 1);
   }
}

sub which
{
   my $prog=shift;
   for(split(/:/,$ENV{"PATH"})) {
      if(-x "$_/$prog") {
         return 1;
      }
   }
   return undef;
}

sub cat_one
{
   print STDERR "\ncat_one: @_\n" if $opt_debug;
   my $file=shift;
   my $toolRef=shift;
   my $flags=$toolRef->[3];
   my @cmd = (SHELL_STYLE_ARGS & $flags) ? (@_, $file, @xargs) : (@_, @xargs, $file);
   print STDERR join(" ", "test cmd line: ", @cmd, "\n") if $opt_debug;
   return (system(@cmd) >> 8);
}

sub unpack_one
{
   print STDERR "unpack_one: @_\n" if $opt_debug;

   my $file=shift;
   my $toolRef=shift;
   my $cwd=getcwd;

   my $ret=1;
   my $flags=@{$toolRef}[3];

   my $sufpat=@{$toolRef}[1];
   my $tgtname=basename($file);
   $tgtname=~s/(.*)\.($sufpat)$/$1/i;

   return special_debmode($file) if($opt_umode && $file=~/\.deb$/i);

   # make sure that target file/directory for certain types is not occupied
   if( ( $opt_umode_smart || $opt_umode || ( $flags & (IS_STREAM_FILTER|APPEND_GEN_NAME)) ) && -e $tgtname)
   {
      printf STDERR (gettext(
            "Cannot create target %s: file already exists. Trying alternative targets...\n"),
         $tgtname);
      $tgtname.=".unp";
      if(-e $tgtname)
      {
         print STDERR sprintf(gettext(
         "Cannot create target %s: file already exists\n"), $tgtname);
         $tgtname.=".".rand;
      }
      if(-e $tgtname)
      {
         print STDERR sprintf(gettext(
               "Cannot create target %s: file already exists\n"), $tgtname);
         exit 1 if $opt_force;
      }
      print STDERR "Suggested target name: $tgtname\n";
   }
   print STDERR "tgtname: $tgtname\n" if $opt_debug;

   my $tmpdir;

   if($opt_umode || $opt_umode_smart)
   {
      $file=Cwd::abs_path($file);
      print STDERR "set abs.path to $file\n" if $opt_debug;
      $tmpdir="unp.".rand;
      mkdir $tmpdir;
      chdir($tmpdir) || return 23;
   }

   my @cmd = (SHELL_STYLE_ARGS & $flags) ? (@_, $file, @xargs) : (@_, @xargs, $file);


   print STDERR "temp.cmd: ".join("\t", @cmd, "\n") if $opt_debug;

   # filter commands... use shell to keep our code simple. Tar stream may be
   # inside, detect it and unpack it.
   if(IS_STREAM_FILTER & $flags)
   {
      my $magic="";
      @cmd=("sh", "-c", join(" ", @_).' "$0" | file -', $file);
      if(open(my $fh, "-|", @cmd))
      {
         $magic = join('', <$fh>);
         close($fh);
      }
      print STDERR "Internal magic: $magic\n" if $opt_debug;
      if($magic=~/tar.archive/)
      {
         @cmd=("sh", "-c", join(" ", @_).' "$0" | tar -v -x -f - "$@" ', $file, @xargs);
      }
      else
      {
         @cmd=("sh", "-c", join(" ", @_).' "$0" > "$1"', $file, $tgtname);
      }
   }

   push(@cmd, $tgtname) if(APPEND_GEN_NAME & $flags);

   print STDERR join(" ", "test cmd line: ", @cmd, "\n") if $opt_debug;
   
   $ret = (system(@cmd) >> 8);

   if( $opt_umode_smart)
   {
      chdir "..";
      my @cont=(<$tmpdir/*>, <$tmpdir/.*>);
      # . and .. and one element?
      if(3==@cont)
      {
         # use same name as target, fall back to checked tgtname if that is already occupied
         my $cand=basename($cont[0]);
         if (-e $cand)
         {
            print STDERR gettext("Cannot create target directory (already exists), using alternative name\n");
            $cand=$tgtname ;
         }
         return 44 if ! (rename($cont[0], $cand) && rmdir $tmpdir);
      }
      else
      {
         rename $tmpdir, $tgtname || return 43;
      }

   }
   elsif ($opt_umode)
   {
      chdir "..";
      rename($tmpdir, $tgtname) || return 42;
   }

   chdir $cwd;

   return $ret;
}

sub special_debmode
{
   my $file=shift;
   basename($file)=~/^(.*)\.deb$/;
   my $bname=$1;
   die "cannot recognice package name\n" if !$bname;

   mkdir "control";
   my $contgt="control/$bname";
   mkdir $contgt;

   return (system("ar", "x", $file) >> 8 ) +
   (system("tar", "-z", "-x", "-v", "-f", "control.tar.gz", "-C", 
         $contgt) >> 8 ) +
   ( system("tar", "-z", "-x", "-v", "-f", "data.tar.gz") >> 8 );
}