From fbdb6938080f4aa55ca6dd40794c5a77cc4e6041 Mon Sep 17 00:00:00 2001 From: John McNamara Date: Sun, 17 Dec 2023 20:00:45 +0000 Subject: [PATCH] xmlwriter: fix issue with control chartacter in data elements --- lib/Excel/Writer/XLSX/Package/XMLwriter.pm | 24 ++++++ t/regression/escapes09.t | 90 +++++++++++++++++++++ t/regression/xlsx_files/escapes09.xlsx | Bin 0 -> 8102 bytes 3 files changed, 114 insertions(+) create mode 100644 t/regression/escapes09.t create mode 100644 t/regression/xlsx_files/escapes09.xlsx diff --git a/lib/Excel/Writer/XLSX/Package/XMLwriter.pm b/lib/Excel/Writer/XLSX/Package/XMLwriter.pm index c76bcbea..aa246972 100644 --- a/lib/Excel/Writer/XLSX/Package/XMLwriter.pm +++ b/lib/Excel/Writer/XLSX/Package/XMLwriter.pm @@ -232,6 +232,7 @@ sub xml_data_element { } $data = _escape_data( $data ); + $data = _escape_control_characters( $data ); local $\ = undef; print { $self->{_fh} } "<$tag>$data"; @@ -494,6 +495,29 @@ sub _escape_data { } +############################################################################### +# +# _escape_control_characters() +# +# Excel escapes control characters with _xHHHH_ and also escapes any +# literal strings of that type by encoding the leading underscore. So +# "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_. +# The following substitutions deal with those cases. +# +sub _escape_control_characters { + + my $str = $_[0]; + + # Escape the escape. + $str =~ s/(_x[0-9a-fA-F]{4}_)/_x005F$1/g; + + # Convert control character to the _xHHHH_ escape. + $str =~ s/([\x00-\x08\x0B-\x1F])/sprintf "_x%04X_", ord($1)/eg; + + return $str; +} + + 1; diff --git a/t/regression/escapes09.t b/t/regression/escapes09.t new file mode 100644 index 00000000..7ae159e4 --- /dev/null +++ b/t/regression/escapes09.t @@ -0,0 +1,90 @@ +############################################################################### +# +# Tests the output of Excel::Writer::XLSX against Excel generated files. +# +# Copyright 2000-2023, John McNamara, jmcnamara@cpan.org +# +# SPDX-License-Identifier: Artistic-1.0-Perl OR GPL-1.0-or-later +# + +use lib 't/lib'; +use TestFunctions qw(_compare_xlsx_files _is_deep_diff); +use strict; +use warnings; + +use Test::More tests => 1; + +############################################################################### +# +# Tests setup. +# +my $filename = 'escapes09.xlsx'; +my $dir = 't/regression/'; +my $got_filename = $dir . "ewx_$filename"; +my $exp_filename = $dir . 'xlsx_files/' . $filename; + +my $ignore_members = []; + +my $ignore_elements = {}; + + +############################################################################### +# +# Test the creation of a simple Excel::Writer::XLSX file. +# +use Excel::Writer::XLSX; + +my $workbook = Excel::Writer::XLSX->new( $got_filename ); +my $worksheet = $workbook->add_worksheet(); +my $chart = $workbook->add_chart( type => 'line', embedded => 1 ); + +# For testing, copy the randomly generated axis ids in the target xlsx file. +$chart->{_axis_ids} = [ 52721920, 53133312 ]; + + +$worksheet->write( 0, 0, "Data\x1b[32m1" ); +$worksheet->write( 1, 0, "Data\x1b[32m2" ); +$worksheet->write( 2, 0, "Data\x1b[32m3" ); +$worksheet->write( 3, 0, "Data\x1b[32m4" ); + +$worksheet->write( 0, 1, 10 ); +$worksheet->write( 1, 1, 20 ); +$worksheet->write( 2, 1, 10 ); +$worksheet->write( 3, 1, 30 ); + +$chart->add_series( + categories => '=Sheet1!$A$1:$A$4', + values => '=Sheet1!$B$1:$B$4' +); + +$worksheet->insert_chart( 'E9', $chart ); + +$workbook->close(); + + +############################################################################### +# +# Compare the generated and existing Excel files. +# + +my ( $got, $expected, $caption ) = _compare_xlsx_files( + + $got_filename, + $exp_filename, + $ignore_members, + $ignore_elements, +); + +_is_deep_diff( $got, $expected, $caption ); + + +############################################################################### +# +# Cleanup. +# +unlink $got_filename; + +__END__ + + + diff --git a/t/regression/xlsx_files/escapes09.xlsx b/t/regression/xlsx_files/escapes09.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c8895578010de2e76a906b4d1d4f04629b651ca3 GIT binary patch literal 8102 zcma)BWmuG3+Z`o`WJZ=@bQ}k(81y=}zg9kQhMa z8;8vSX}`C0x$po5&#uI zPtw8O)!g3IP~FSX+{J*y)6P~y9SA@JX-S&{^7}z^HB7CWw25EL}G{6Hz=0O?L0Y2mNH|mMwfv@Y_f-U6J#}s zl!a2ujqe%(wKw9czPy0;&3$~3Xhf$(#c<0Ub$m$J9c-O&_( zy@4oF7$b9k>+yJIUO#faS-Kl@lQ)QBE;Ur-nX>j(DZ%4d53Q&gKF32qq&1en$7(!8 z<@q3GD#5~3V0*Po_$#H<$C^Mp&V_p>D-Sn(HW=I(dazpD!MK)t$X127ZpK`%aJon|y*)v3pJ0epJ*y7)fDWeOS$7Sz zi`v82Vw_Dj3|&{t&q~{TL17wo-D%j6;p!K z0MvjI-{#G{u4CH`ukYcle(yDu`&!646+?TXzTd0ZuU%^`nB~qDX114n3t^S>W!{O5 zqJ}=4+<{@TB3+qO2d^LKfnDR@EexyLR+hb$YcA8gF1|YuRD(9kX_uoQ#Vn}np4rub z*XOM}?QhPBlhwhjQ;@9^HPPkHf@z1Q8b2V?Xo4ojgq)yS84(&?Envfx1SLU4{OO^g zHN0k5Xf`9m4hWY_@m@sZ=BvB$=rR4vHQ2`ZNuzx zZmVx#esGJ%cS@?sMtpk(d3Rska^P6p_e81S21j_iu+qHa+FJTThJ%g|;8zjXho0l` zqD0iViW2g&h^7wC=6_N#5^gBp%0=2fkI^LA<(*n_EiDTnW=^;y<|w(xPVb@j_Cykd zI70N*7Ee18ip;%m@jafAFWS?s>>}jnj4V+}Cysx%3Xy4eJ@oXh>;tAfKkH`QoGb|C zuGNgbp~j4=JypG+X)^{;B$OV(T+OE&XvF*>{f!uRhIDY$QVV~(1lue~??clB$WgqL zU6bg!k>4UaS0x%TBG#W!<_s}KeTzTnI_dg?Q~D?5I{rd@)5IX_9Yu!L4=vJL;#4y> zoLHvgZaP{=W6wo9Xoi!jeMZ$c5ykwO`JflhSZrkD>d13woq z$Q|!WLyK|T+o81U9Q)#4oHQ90BqOA`OvA!TvI4BweLKwW&Ss+SZ;4(V_33O1ZO3Pa z+-XD0+;GY84OqSLll=?dQ+}%0yoqu_1r+SSm2IJc4#EGXh{B+JID0N59VjC z0n0X;#Lwd0t@qWQ@$2?)UTpKjqhe~!f7hrhIuxK9<}BLw8i&CQtwE-KTyQ#Fcr}bJ zaDUi&pXpPw;LV|^iP%S@hsBDbX0e~S~iR=nX5(kP`Q8y54S>bR-)@KL z&zf}1_8t^&rYv>}ETWP_u^3zt<}rHzfxbBt3UV@Rt_)8HvXDK?hR zU5z&8JDIw7RTq!oA$yI!pKhLrFdKylozv$&P?3O);2{f)qrbJP6p8Kg2N%``JuO!- zYK`u8=$y(`JeDg5v$vQ%pOT}R8BopKu87lpo)rMj8BV{Gxbi-Tcqez4oiEJO2hor@ zQ{5QgrP3xYe3~aR=TM{&y*P)axSk_n5a-+CP79o%F)`@q%IGL0g zK+%4cAtUar#s?(>E6VuEe#+otWp3{3!ui)5_aF6uKU7*4!wcGxIirEN%qzkZF{EXQ zuxa^U2P8(Hft}#QUHtoa_-8v#`4yB2{zn!~i8Q_zHyogM?uzwi_LQf5*yRaT#Pl>RtVr)I(^JMi(#KuEAbH(mzV$7JAreBfH}W;EQC{SH!9CcP9wwFK|<}Ao~7{6X6<~z z6F1-bzNF&uXi4R`G=v`q$MN2}wU$$gW5y@?M`hgtYV)bLyGMe;6KG0b@fYri>To|E zkdNAjlXHqJFo-{FTwsshPvm{%{9eyzdVZ71fp3|e(DPOClTNjA1AxDw?;tSC+Ce$(@ZC;e3QS9c;ob!30*#v%uLa9X&CFa{eIL<1&MD>$Tb zt?%7-Wj#a-GCO^L7byGmV5bjKy3q2e*Sy~`d{ZEDTTujWOs3NdGa{nw30GQxyvx+K zUtkDi*$Q9wIkT8XiB@|uBw~Z<0EWxP%qX*N>WW3{ziHj_zL+i!K6VN;rGVa{Dq?k~ zuhOX^`9A&3*|JWXxraqBHoD9tua+wmwp zs{!y~x7GSY&sZ=5t?fA^grcNM_S`3*+fI;&3>xb_FzQm@vGsVi8|S6~S(b)>tIjiG zErm4+1j*NLl~7$1Gd2x@;)*4ph!bplW4X1ckJerRvy+&Kqz>GiSLvn@V$6^<((Brn z@aj4ReI-ZW{Vu<|QM#J+IQ|&ie2yupaF^7>YuhL5H*MiKfx}<=Y;*3_7d^Vh%6FiA zMfZq!SVPb-mX_-6K(-7FcgB1TXPwdD$ngfjgjMyx=?aVKp#}$D!C^Y=ga;`J4$Idt zyspBPC%lgE8_P4&%Y@U(lCPtC>y7N$sn2*>PINe7$*cgQxKvAG79PqrXA{NsL`}om zY!peVF!%|rUWZ?;1iJIOZEAiruCEL^yhW+ZBeL>F6o>aQv*5EDQ+n<3V4L*zwNAnt zb*Wt>Uv0(=j{Wx(%3&+rM{Ms|AJ1y><`v>Yh&)PKy?xwMW?Y5)8>E+$4hhwO9kX^c zwqN;9%#Z~egg-^SD02d-gZXk)nKS$>^Z$zaqsT8y`#+R<=|!1iE*R|IY(X#>M0tpwd+3h}uZGStw+-n4891NCYA8mJ$<(+O^igXo zSn(Up)V|&hs?pm8Q9f zd@?G|Y0Et^*03Uqv!&Uw!N9hP7V0Xj6~?5I)>67@<_%v_A@`5*`|{KBrt{F_lh0q$ zyq%}w*|h!C)pxnW2qZ`WO41N7Ziq+qjWa{#OzNezvI(RxW{}Rb^4Wcw*ZJdChGLTNI*gIz)%2YtF}}B$fS(NN zpDNXNx}>zcli32*xYL8kh9Flea}y*;)Trp{u{8rCiFd!%3ADtOnA8Dv!*uRZ1)VIseQdyi?xJX4(sH^-6azof z&ih2ziD1d-O#%p>R@|Enhpes*Vy;lTLL#K>*iRKk^o3R>J#6#P;!^usK5NFgWOqSH zf`+!gdw`N+8FVYxw4Q`Sr+0L4E-tU_b;%=FyuL2pr6$B}qm*^zgufj1@ih2s#aFJ| zhp9`h{3c;5hc~u)1;j7YqC4^7RPlb!E<48Z4dT62sx&#-nw*||vU+A3Z*j-@=OwT7 zfw8Q9;CUWP{;=rfbsMVB6LCzkaHgZQcYJt|W9u&zk#eCxrWsOY-scEmK{MGCK5JnF zKqh5JLZGvy)G9?ppIzHwb#Ps8F<1*HFTc;eWDY|lXhiqx)s38OdwHxXBQ;HAuPVFu z*28qx-Er!`ETwABnF$Zl3ArWFYeuuMy$7B0vb?NKEscJhayM;jT_c`;NhvTG;R$5z zrn$8x|FkJgPYW3qR&0ZLO8j1{!jaMtegjL?OD7AEZl?*ANL!!n2@lrp*^Mir>>q=L zx3-S!sg|r|R;sAzw1PssJUIgfZ_U=jua<l?v`9h=VtnpLs6gfN(d5Kcq$BWNj{cf$E?!g86Cp@3qX5ZSh|QsOz>2hLvi zw4R@}pD24zy9@CRg7f}^4a|VS_vTSXlN_;ZmgX3px^n;%%~qX~dG1ilH1HWZ9hc_5 z)QB%<*u;zXFFkj>#f~aO_@(Rd4T#SGavkw2wx!dy)Hnrr-L3 zL(!UH{zjMV%1z3t!v35x;7KeuWrW@PscLZ6JOcmhBM#Nf+)!Kz{eWQF_tupl3trQG z#=SSz$=~L+ova;BaG$WaN*b!it{QY@Y{*Vtqm6=6OW2|J$j(obyV%J$@)>H%P^pp{<&R_u zH8QwbncJEFc>U>d|B_Q6bRI@eOOZ9w?a@FQtD{JS`btuY?@s0hx=)YV>y9k9B~J`T z+#l;TtA3Yt6cP}?6oL!7M}S>8BB$T|Mu*A&_95r)DP5N*@0`W7G-;&(efr)PA7A9n zwf%%MV##1BneOGPPx`NQLfS-VhWM@YsbHMhQ z(aUmww=?zv-nEy0`#l29*rLTvX41<89Sk7Z-8^_CI}TG;xW)%~6RZNP=@Hm+GzPCdVUfRQNr7dt zXi~I%#4Q5u`kan!=a3@Cs7BIbFC+K(Gk3^c0vQETxt%>AEoNC=eB#$yd1x~O4XbNX zHbFil32$9FIkaR5dx?ja89R&P#NwD>)6v4DNs^#Ph6%e@fepy|ZjU9Mg^HZ-1MzU| zy|*|I<*0iD44rG#i8h{bMLnUrofo>A_&i~xAu5Z6ytT2%LWP`}H0^oJJ+3uD=U}B; z+9*&?KbXL6`+@US=CC1ZEeM0>JBg2cZC5%H78@l)1@(01^Tn+cV-EV&Ne${oeCa4M z#v;fA?Y}mX4=hgaTJa>bbzRpFJfpcgcT+g>7<=XPtnsXIr!gE_U6eSEsOrJA*>Z9E zz*$5E?}#r&<1<|e>eVer&wBu!=)F!pWWuNtv)Qo3>c1U8h*W&O>4?Umou^>0Q1uBzc0npMyJ}=$xX2Y9&^>(8htdJ zYX_cFm=YBK*#7R?QoT!vsvU#e(PM$KntF)`nF(6QY%9yTc9F(#{~o3*K0J0}&oP{) z_ce^WS;J3qn^!AbA17cYNzqFb-DGQh!BWTP>jvUHeB*7u8T^I&{b1zSaQ2Y%cS-i` zxGaWc?mdn6erKJTWi{zGaehGw$-CFi$PV1UY*VNJ2O>_N(q>A`-rnm@FjQ~u(gI7n z86^hPtjn^q9|;K8Jz9?`1|M$Y%o+~Ie-KDnYN*T4O~y5~F^#`jXoN>ZmM1V(RijKB z%n=_jvV%)T#iNsX!@o>MLxK&u_+nFzAPLI47JKjAE%h_7n7>4UTGlNyrEBhu{>f_N z8E;%5(iu91tZ`pGWC(zixueqxh-zj29mYU9o|&%9$^1dloD&_Ehxl>FFPj`HV0+BE>dtfbChHR=se*dPD)8Nk?sC-9)QRZ+;!_xVsz2-2QLI#Uj%Hms6LnC$F}~X)xVq@{jt?A zhBrS(g-235f!|>h-t0o`dGVd^0whZ8POnJl>M>T)S4`)AKMM=UCkPmXDVadFdttMm zHoe!|sY%?LSqsBJ=ub#&*Yq$lrmKT*5VcL|&03gl!m%wi&KaT8i`WgXXO&)3$T7;QS4| z64(jF;Fkm(FKp`v1hFoKitJ_n<;pw-0;w2R(?Y$KeQNxfoIzxs1$YL3Cq7Q$IInx3#54Xj+ylv**_@ z%akJC`X37$-#YfLWR~DD<#HUy+7oa%yonyj)V_LX+#5j97`wPmEbsVmeWRb7vLZnN&&R$ovxW#hR^gUA8ufaA(lYP5elmm)(^0RW)50Oa~zeEP>z zDvTeeRDYY5E(2dM`|H`sW#Gz-CiJ(H6cp+|oTB_4_17@}GU^jlTl-(spV9uGVSi7l zFT*0y0l#L}|J&XFn&Vyuyn_k&J=Oj5zr3LI*HrQ{XbEb-{ZCZ<4;kg3(SOg#E~EPZ z|Bn7IiP`_g&9BkZMuZhT?Z{zoH;qryt z@c+8dKST{EI4=#SU>wo*_7^tNO01%)ar>Iq(NOAG){{T<{E0X{K literal 0 HcmV?d00001