Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support hyperlinks in spreadsheets similar to Spreadsheet::ParseExcel #2

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
Replace tabs with spaces
gquipster authored Jul 27, 2016
commit b9f423a388302a3fbebb8caf7575b32673d942a0
180 changes: 90 additions & 90 deletions lib/Spreadsheet/ParseXLSX.pm
Original file line number Diff line number Diff line change
@@ -187,14 +187,14 @@ sub _parse_workbook {
$sheet->{SheetHidden} = 1 if defined $_->att('state') and $_->att('state') eq 'hidden';
$self->_parse_sheet($sheet, $files->{sheets}{$idx});

# Do we have a rels for for this sheet?
if (
$files->{sheets_rels}
&& $files->{sheets_rels}{$idx}
) {
# Yes - now parse the rels to extract the hyperlinks
$self->_parse_sheet_links($sheet, $files->{sheets}{$idx}, $files->{sheets_rels}{$idx});
}
# Do we have a rels for for this sheet?
if (
$files->{sheets_rels}
&& $files->{sheets_rels}{$idx}
) {
# Yes - now parse the rels to extract the hyperlinks
$self->_parse_sheet_links($sheet, $files->{sheets}{$idx}, $files->{sheets_rels}{$idx});
}

($sheet)
} else {
@@ -486,70 +486,70 @@ sub _parse_sheet {
}

sub _parse_sheet_links {
my $self = shift;
my ($sheet, $sheet_file, $rels_file) = @_;

# First we need to parse the hyperlinks out of the rels XML
my $rels;

my $rels_xml = XML::Twig->new(
twig_roots => {
'Relationships/Relationship' => sub {
my $twig = shift;
my $relationship = shift;

# TODO: I am only handling specific external links here, not internal spreadsheet links
if (
$relationship->att('Type') eq 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
&& $relationship->att('TargetMode') eq 'External'
) {
# Store the target URL in a hash by relationship id
$rels->{$relationship->att('Id')} = $relationship->att('Target');
}
}
}
);

# Run the parser
$rels_xml->parse($rels_file);

# Now iterate over the sheet XML again, this time processing hyperlink entries
my $sheet_xml = XML::Twig->new(
twig_roots => {
'hyperlinks/hyperlink' => sub {
my $twig = shift;
my $hyperlink = shift;

# Is this an external hyperlink I've parsed from the rels?
if ($rels->{$hyperlink->att('r:id')}) {
# Yes - work out our row and column
my ($row, $col) = $self->_cell_to_row_col($hyperlink->att('ref'));

# Get the cell
my $cell = $sheet->{Cells}[$row][$col];

# Do I have a cell?
unless ($cell) {
# No - just create an empty value for now
$cell = $sheet->{Cells}[$row][$col] = Spreadsheet::ParseExcel::Cell->new();
}

# Add the hyperlink
$cell->{Hyperlink} = [
$hyperlink->att('display'), # Description
$rels->{$hyperlink->att('r:id')}, # Target
undef, # Target Frame
$row, # Start Row
$row, # End Row
$col, # Start Column
$col, # End Column
];
}
},
},
);

# Now parse the XML
my $self = shift;
my ($sheet, $sheet_file, $rels_file) = @_;

# First we need to parse the hyperlinks out of the rels XML
my $rels;

my $rels_xml = XML::Twig->new(
twig_roots => {
'Relationships/Relationship' => sub {
my $twig = shift;
my $relationship = shift;

# TODO: I am only handling specific external links here, not internal spreadsheet links
if (
$relationship->att('Type') eq 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
&& $relationship->att('TargetMode') eq 'External'
) {
# Store the target URL in a hash by relationship id
$rels->{$relationship->att('Id')} = $relationship->att('Target');
}
}
}
);

# Run the parser
$rels_xml->parse($rels_file);

# Now iterate over the sheet XML again, this time processing hyperlink entries
my $sheet_xml = XML::Twig->new(
twig_roots => {
'hyperlinks/hyperlink' => sub {
my $twig = shift;
my $hyperlink = shift;

# Is this an external hyperlink I've parsed from the rels?
if ($rels->{$hyperlink->att('r:id')}) {
# Yes - work out our row and column
my ($row, $col) = $self->_cell_to_row_col($hyperlink->att('ref'));

# Get the cell
my $cell = $sheet->{Cells}[$row][$col];

# Do I have a cell?
unless ($cell) {
# No - just create an empty value for now
$cell = $sheet->{Cells}[$row][$col] = Spreadsheet::ParseExcel::Cell->new();
}

# Add the hyperlink
$cell->{Hyperlink} = [
$hyperlink->att('display'), # Description
$rels->{$hyperlink->att('r:id')}, # Target
undef, # Target Frame
$row, # Start Row
$row, # End Row
$col, # Start Column
$col, # End Column
];
}
},
},
);

# Now parse the XML
$sheet_xml->parse( $sheet_file );
}

@@ -1035,28 +1035,28 @@ sub _extract_files {
($_->att('Id') => $self->_zip_file_member($zip, $get_path->($_->att('Target'))))
} $wb_rels->find_nodes(qq<//packagerels:Relationship[\@Type="$type_base/worksheet"]>);

# If we have hyperlinks in cells we need the rels file to get the link details
my $worksheet_rels_xml;
# If we have hyperlinks in cells we need the rels file to get the link details
my $worksheet_rels_xml;

# Get each worksheet object
foreach my $worksheet ($wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/worksheet"]>)) {
# Split the worksheet xml path so we can
my @sheetname_parts = split('/', $worksheet->att('Target'));
# Get each worksheet object
foreach my $worksheet ($wb_rels->find_nodes(qq<//Relationship[\@Type="$type_base/worksheet"]>)) {
# Split the worksheet xml path so we can
my @sheetname_parts = split('/', $worksheet->att('Target'));

# Insert _rels before the sheetname, and amend the filename to have .rels on the end
my $sheetname = pop(@sheetname_parts);
push(@sheetname_parts, '_rels');
push(@sheetname_parts, $sheetname . '.rels');
# Insert _rels before the sheetname, and amend the filename to have .rels on the end
my $sheetname = pop(@sheetname_parts);
push(@sheetname_parts, '_rels');
push(@sheetname_parts, $sheetname . '.rels');

# Recreate the file path
my $rels_name = join('/', @sheetname_parts);
# Recreate the file path
my $rels_name = join('/', @sheetname_parts);

# Check if we have a rels file
if (my $relfile = $zip->memberNamed($get_path->($rels_name))->contents ) {
# Add the XML to our hash for access later on
$worksheet_rels_xml->{$worksheet->att('Id')} = $relfile;
}
}
# Check if we have a rels file
if (my $relfile = $zip->memberNamed($get_path->($rels_name)) ) {
# Add the XML to our hash for access later on
$worksheet_rels_xml->{$worksheet->att('Id')} = $relfile->contents;
}
}

my %themes_xml = map {
$_->att('Id') => $self->_parse_xml($zip, $get_path->($_->att('Target')))