//proc/self/root/usr/share/perl5/File/package File::GlobMapper;
use strict;
use warnings;
use Carp;
our ($CSH_GLOB);
BEGIN
{
if ($] < 5.006)
{
require File::BSDGlob; import File::BSDGlob qw(:glob) ;
$CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
*globber = \&File::BSDGlob::csh_glob;
}
else
{
require File::Glob; import File::Glob qw(:glob) ;
$CSH_GLOB = File::Glob::GLOB_CSH() ;
#*globber = \&File::Glob::bsd_glob;
*globber = \&File::Glob::csh_glob;
}
}
our ($Error);
our ($VERSION, @EXPORT_OK);
$VERSION = '1.000';
@EXPORT_OK = qw( globmap );
our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
$noPreBS = '(? '([^/]*)',
'?' => '([^/])',
'.' => '\.',
'[' => '([',
'(' => '(',
')' => ')',
);
%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
sub globmap ($$;)
{
my $inputGlob = shift ;
my $outputGlob = shift ;
my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
or croak "globmap: $Error" ;
return $obj->getFileMap();
}
sub new
{
my $class = shift ;
my $inputGlob = shift ;
my $outputGlob = shift ;
# TODO -- flags needs to default to whatever File::Glob does
my $flags = shift || $CSH_GLOB ;
#my $flags = shift ;
$inputGlob =~ s/^\s*\<\s*//;
$inputGlob =~ s/\s*\>\s*$//;
$outputGlob =~ s/^\s*\<\s*//;
$outputGlob =~ s/\s*\>\s*$//;
my %object =
( InputGlob => $inputGlob,
OutputGlob => $outputGlob,
GlobFlags => $flags,
Braces => 0,
WildCount => 0,
Pairs => [],
Sigil => '#',
);
my $self = bless \%object, ref($class) || $class ;
$self->_parseInputGlob()
or return undef ;
$self->_parseOutputGlob()
or return undef ;
my @inputFiles = globber($self->{InputGlob}, $flags) ;
if (GLOB_ERROR)
{
$Error = $!;
return undef ;
}
#if (whatever)
{
my $missing = grep { ! -e $_ } @inputFiles ;
if ($missing)
{
$Error = "$missing input files do not exist";
return undef ;
}
}
$self->{InputFiles} = \@inputFiles ;
$self->_getFiles()
or return undef ;
return $self;
}
sub _retError
{
my $string = shift ;
$Error = "$string in input fileglob" ;
return undef ;
}
sub _unmatched
{
my $delimeter = shift ;
_retError("Unmatched $delimeter");
return undef ;
}
sub _parseBit
{
my $self = shift ;
my $string = shift ;
my $out = '';
my $depth = 0 ;
while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
{
$out .= quotemeta($1) ;
$out .= $mapping{$2} if defined $mapping{$2};
++ $self->{WildCount} if $wildCount{$2} ;
if ($2 eq ',')
{
return _unmatched "("
if $depth ;
$out .= '|';
}
elsif ($2 eq '(')
{
++ $depth ;
}
elsif ($2 eq ')')
{
return _unmatched ")"
if ! $depth ;
-- $depth ;
}
elsif ($2 eq '[')
{
# TODO -- quotemeta & check no '/'
# TODO -- check for \] & other \ within the []
$string =~ s#(.*?\])##
or return _unmatched "[" ;
$out .= "$1)" ;
}
elsif ($2 eq ']')
{
return _unmatched "]" ;
}
elsif ($2 eq '{' || $2 eq '}')
{
return _retError "Nested {} not allowed" ;
}
}
$out .= quotemeta $string;
return _unmatched "("
if $depth ;
return $out ;
}
sub _parseInputGlob
{
my $self = shift ;
my $string = $self->{InputGlob} ;
my $inGlob = '';
# Multiple concatenated *'s don't make sense
#$string =~ s#\*\*+#*# ;
# TODO -- Allow space to delimit patterns?
#my @strings = split /\s+/, $string ;
#for my $str (@strings)
my $out = '';
my $depth = 0 ;
while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
{
$out .= quotemeta($1) ;
$out .= $mapping{$2} if defined $mapping{$2};
++ $self->{WildCount} if $wildCount{$2} ;
if ($2 eq '(')
{
++ $depth ;
}
elsif ($2 eq ')')
{
return _unmatched ")"
if ! $depth ;
-- $depth ;
}
elsif ($2 eq '[')
{
# TODO -- quotemeta & check no '/' or '(' or ')'
# TODO -- check for \] & other \ within the []
$string =~ s#(.*?\])##
or return _unmatched "[";
$out .= "$1)" ;
}
elsif ($2 eq ']')
{
return _unmatched "]" ;
}
elsif ($2 eq '}')
{
return _unmatched "}" ;
}
elsif ($2 eq '{')
{
# TODO -- check no '/' within the {}
# TODO -- check for \} & other \ within the {}
my $tmp ;
unless ( $string =~ s/(.*?)$noPreBS\}//)
{
return _unmatched "{";
}
#$string =~ s#(.*?)\}##;
#my $alt = join '|',
# map { quotemeta $_ }
# split "$noPreBS,", $1 ;
my $alt = $self->_parseBit($1);
defined $alt or return 0 ;
$out .= "($alt)" ;
++ $self->{Braces} ;
}
}
return _unmatched "("
if $depth ;
$out .= quotemeta $string ;
$self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
$self->{InputPattern} = $out ;
#print "# INPUT '$self->{InputGlob}' => '$out'\n";
return 1 ;
}
sub _parseOutputGlob
{
my $self = shift ;
my $string = $self->{OutputGlob} ;
my $maxwild = $self->{WildCount};
if ($self->{GlobFlags} & GLOB_TILDE)
#if (1)
{
$string =~ s{
^ ~ # find a leading tilde
( # save this in $1
[^/] # a non-slash character
* # repeated 0 or more times (0 means me)
)
}{
$1
? (getpwnam($1))[7]
: ( $ENV{HOME} || $ENV{LOGDIR} )
}ex;
}
# max #1 must be == to max no of '*' in input
while ( $string =~ m/#(\d)/g )
{
croak "Max wild is #$maxwild, you tried #$1"
if $1 > $maxwild ;
}
my $noPreBS = '(?{OutputGlob}' => '$string'\n";
$self->{OutputPattern} = $string ;
return 1 ;
}
sub _getFiles
{
my $self = shift ;
my %outInMapping = ();
my %inFiles = () ;
foreach my $inFile (@{ $self->{InputFiles} })
{
next if $inFiles{$inFile} ++ ;
my $outFile = $inFile ;
if ( $inFile =~ m/$self->{InputPattern}/ )
{
no warnings 'uninitialized';
eval "\$outFile = $self->{OutputPattern};" ;
if (defined $outInMapping{$outFile})
{
$Error = "multiple input files map to one output file";
return undef ;
}
$outInMapping{$outFile} = $inFile;
push @{ $self->{Pairs} }, [$inFile, $outFile];
}
}
return 1 ;
}
sub getFileMap
{
my $self = shift ;
return $self->{Pairs} ;
}
sub getHash
{
my $self = shift ;
return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
}
1;
__END__
=head1 NAME
File::GlobMapper - Extend File Glob to Allow Input and Output Files
=head1 SYNOPSIS
use File::GlobMapper qw( globmap );
my $aref = globmap $input => $output
or die $File::GlobMapper::Error ;
my $gm = new File::GlobMapper $input => $output
or die $File::GlobMapper::Error ;
=head1 DESCRIPTION
This module needs Perl5.005 or better.
This module takes the existing C module as a starting point and
extends it to allow new filenames to be derived from the files matched by
C.
This can be useful when carrying out batch operations on multiple files that
have both an input filename and output filename and the output file can be
derived from the input filename. Examples of operations where this can be
useful include, file renaming, file copying and file compression.
=head2 Behind The Scenes
To help explain what C does, consider what code you
would write if you wanted to rename all files in the current directory
that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
current directory
alpha.tar.gz
beta.tar.gz
gamma.tar.gz
and they need renamed to this
alpha.tgz
beta.tgz
gamma.tgz
Below is a possible implementation of a script to carry out the rename
(error cases have been omitted)
foreach my $old ( glob "*.tar.gz" )
{
my $new = $old;
$new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
rename $old => $new
or die "Cannot rename '$old' to '$new': $!\n;
}
Notice that a file glob pattern C<*.tar.gz> was used to match the
C<.tar.gz> files, then a fairly similar regular expression was used in
the substitute to allow the new filename to be created.
Given that the file glob is just a cut-down regular expression and that it
has already done a lot of the hard work in pattern matching the filenames,
wouldn't it be handy to be able to use the patterns in the fileglob to
drive the new filename?
Well, that's I what C does.
Here is same snippet of code rewritten using C
for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
{
my ($from, $to) = @$pair;
rename $from => $to
or die "Cannot rename '$old' to '$new': $!\n;
}
So how does it work?
Behind the scenes the C function does a combination of a
file glob to match existing filenames followed by a substitute
to create the new filenames.
Notice how both parameters to C are strings that are delimited by <>.
This is done to make them look more like file globs - it is just syntactic
sugar, but it can be handy when you want the strings to be visually
distinctive. The enclosing <> are optional, so you don't have to use them - in
fact the first thing globmap will do is remove these delimiters if they are
present.
The first parameter to C, C<*.tar.gz>, is an I.
Once the enclosing "< ... >" is removed, this is passed (more or
less) unchanged to C to carry out a file match.
Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
full Perl regular expression, with the additional step of wrapping each
transformed wildcard metacharacter sequence in parenthesis.
In this case the input fileglob C<*.tar.gz> will be transformed into
this Perl regular expression
([^/]*)\.tar\.gz
Wrapping with parenthesis allows the wildcard parts of the Input File
Glob to be referenced by the second parameter to C, C<#1.tgz>,
the I