Office Open XML: helpers (Perl scripts)

office_unzip.pl

use warnings;
use strict;


use Archive::Extract;
use File::Copy;
use File::Basename;
use File::Find;
use XML::Tidy;

use open ':encoding(utf8)';
my $office_file = shift;
my $dest_dir    = shift;

my $file_base   = basename($office_file) . ".zip";

copy($office_file, $file_base);

die "$office_file not found" unless -f $office_file;
die "$dest_dir already exists"   if -e $dest_dir;

my $archive = new Archive::Extract(archive => $file_base);
$archive->extract (to => $dest_dir) or die "Could not extract $office_file to $dest_dir!";


find(sub {

  my $file = $_;
  return unless -f $file;

  if ($file eq 'printerSettings1.bin') {
    print "deleting $file\n";
    unlink $file;
    return;
  }
  if ($file =~ m!\.png$!) {
    print "not processing media file $file\n";
    return;
  }
# print "$file\n";

  my $xml_tidy = new XML::Tidy (filename => $file);
  $xml_tidy -> tidy();
  $xml_tidy -> write();

# XML Tidy doesn't seem to write in utf8...
#

  my $enc = 'latin1';
  open my $IN,  "<:encoding($enc)", $file        or die $!;
  open my $OUT, '>:utf8',          "$file.utf8"  or die $!;
  print $OUT $_ while <$IN>;
  close $OUT;

  unlink $file;
  move "$file.utf8", $file;

}, $dest_dir);

unlink $file_base;

Github repository about-Office-Open-XML, path: /_helpers/office_unzip.pl

office_zip.pl

office_zip.pl creates an Office Open XML document from a set of XML documents that are stored in a specific directory.

office_zip.pl is executed in a command line as follows:

office_zip.pl path/to/xml/root docx

The suffix (in the previous example docx) might as well be xlsx.

#!/usr/bin/perl
use warnings;
use strict;
use File::Find;
use File::Spec::Functions qw(tmpdir abs2rel);

use Archive::Zip qw(:ERROR_CODES :CONSTANTS);

my $src_dir = shift;
my $suffix  = shift or die;
die unless -d $src_dir;
die unless $suffix eq 'xlsx' or $suffix eq 'docx';

my $office_file = "$src_dir.$suffix";

my $zip = new Archive::Zip;

$zip -> addTree($src_dir);
$zip -> writeToFileNamed($office_file) == AZ_OK or die "Could not write $office_file";

system("$office_file");

Github repository about-Office-Open-XML, path: /_helpers/office_zip.pl

cmp_tree.pl

use warnings;
use strict;

use File::Find;
use List::Compare;

my $dir_1 = shift or die;
my $dir_2 = shift or die;


my @files_1 = find_files($dir_1);
my @files_2 = find_files($dir_2);

my $cmp = new List::Compare(\@files_1, \@files_2);

my @only_in_dir_1 = $cmp->get_unique;
my @only_in_dir_2 = $cmp->get_complement;
my @common        = $cmp->get_intersection;

print_only_in($dir_1, @only_in_dir_1);
print_only_in($dir_2, @only_in_dir_2);

print "\n";
for my $file (@common) {
  print "gvim -d $dir_1\\$file $dir_2\\$file\n";
}

sub find_files { # {{{

  my $dir       = shift;
  my $files_ref = shift;

  find(sub {

      return unless -f $_;

     (my $file = $File::Find::name) =~ s/$dir//;
      push @{$files_ref}, $file;

  }, $dir);

  return @$files_ref;

} # }}}

sub print_only_in { # {{{

  my $dir         = shift;
  my @only_in_dir = @_;

  if (@only_in_dir) {
    print "\nOnly in $dir\n  ";
    print join "\n  ", @only_in_dir; 
    print "\n";
  }

} # }}}

Github repository about-Office-Open-XML, path: /_helpers/cmp_tree.pl