office_unzip.pl
use warnings;
use strict;
use Archive::Extract;
use File::Copy;
use File::Basename;
use File::Find;
use XML::Tidy;
use open ':encoding(utf8)';
my $office_file = shift;
my $dest_dir = shift;
my $file_base = basename($office_file) . ".zip";
copy($office_file, $file_base);
die "$office_file not found" unless -f $office_file;
die "$dest_dir already exists" if -e $dest_dir;
my $archive = new Archive::Extract(archive => $file_base);
$archive->extract (to => $dest_dir) or die "Could not extract $office_file to $dest_dir!";
find(sub {
my $file = $_;
return unless -f $file;
if ($file eq 'printerSettings1.bin') {
print "deleting $file\n";
unlink $file;
return;
}
if ($file =~ m!\.png$!) {
print "not processing media file $file\n";
return;
}
# print "$file\n";
my $xml_tidy = new XML::Tidy (filename => $file);
$xml_tidy -> tidy();
$xml_tidy -> write();
# XML Tidy doesn't seem to write in utf8...
#
my $enc = 'latin1';
open my $IN, "<:encoding($enc)", $file or die $!;
open my $OUT, '>:utf8', "$file.utf8" or die $!;
print $OUT $_ while <$IN>;
close $OUT;
unlink $file;
move "$file.utf8", $file;
}, $dest_dir);
unlink $file_base;
office_zip.pl
office_zip.pl
is executed in a command line as follows:
office_zip.pl path/to/xml/root docx
The suffix (in the previous example docx
) might as well be xlsx
.
#!/usr/bin/perl
use warnings;
use strict;
use File::Find;
use File::Spec::Functions qw(tmpdir abs2rel);
use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
my $src_dir = shift;
my $suffix = shift or die;
die unless -d $src_dir;
die unless $suffix eq 'xlsx' or $suffix eq 'docx';
my $office_file = "$src_dir.$suffix";
my $zip = new Archive::Zip;
$zip -> addTree($src_dir);
$zip -> writeToFileNamed($office_file) == AZ_OK or die "Could not write $office_file";
system("$office_file");
cmp_tree.pl
use warnings;
use strict;
use File::Find;
use List::Compare;
my $dir_1 = shift or die;
my $dir_2 = shift or die;
my @files_1 = find_files($dir_1);
my @files_2 = find_files($dir_2);
my $cmp = new List::Compare(\@files_1, \@files_2);
my @only_in_dir_1 = $cmp->get_unique;
my @only_in_dir_2 = $cmp->get_complement;
my @common = $cmp->get_intersection;
print_only_in($dir_1, @only_in_dir_1);
print_only_in($dir_2, @only_in_dir_2);
print "\n";
for my $file (@common) {
print "gvim -d $dir_1\\$file $dir_2\\$file\n";
}
sub find_files { # {{{
my $dir = shift;
my $files_ref = shift;
find(sub {
return unless -f $_;
(my $file = $File::Find::name) =~ s/$dir//;
push @{$files_ref}, $file;
}, $dir);
return @$files_ref;
} # }}}
sub print_only_in { # {{{
my $dir = shift;
my @only_in_dir = @_;
if (@only_in_dir) {
print "\nOnly in $dir\n ";
print join "\n ", @only_in_dir;
print "\n";
}
} # }}}