From c18110e282bb0d8c935407f32cf0ada5e1d823df Mon Sep 17 00:00:00 2001 From: Xpol Wan Date: Sat, 14 May 2016 16:24:21 +0800 Subject: [PATCH 1/2] add support for one cell with different formatted text nodes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eg, In one cell the text is abcdef, where abc and def have different format (e.g., different colours or fonts). The the sharedString.xml in xlsx have multiple r node in si node: ```xml abc def ``` Currently `shared_strings_serializer::read_shared_strings()` only reads the first r node, and should consider as a bug. This commit fixes this bug by read all `r.t.text` values in one `si` node and concat the text into one single string. --- .../serialization/shared_strings_serializer.cpp | 13 +++++++++++-- source/serialization/tests/test_read.hpp | 14 ++++++++++++++ .../reader/shared_strings-multiple_r_nodes.xlsx | Bin 0 -> 8447 bytes 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/data/reader/shared_strings-multiple_r_nodes.xlsx diff --git a/source/serialization/shared_strings_serializer.cpp b/source/serialization/shared_strings_serializer.cpp index 93591fe5..cb0e5d05 100644 --- a/source/serialization/shared_strings_serializer.cpp +++ b/source/serialization/shared_strings_serializer.cpp @@ -68,9 +68,18 @@ bool shared_strings_serializer::read_shared_strings(const xml_document &xml, std { strings.push_back(si_node.get_child("t").get_text()); } - else if (si_node.has_child("r")) + else if (si_node.has_child("r")) // possible multiple text entities. { - strings.push_back(si_node.get_child("r").get_child("t").get_text()); + std::string text; + for (const auto& r_node : si_node.get_children()) + { + if (r_node.get_name() == "r" && r_node.has_child("t")) + { + std::cout << r_node.get_child("t").get_text() << std::endl; + text += r_node.get_child("t").get_text(); + } + } + strings.push_back(std::move(text)); } } diff --git a/source/serialization/tests/test_read.hpp b/source/serialization/tests/test_read.hpp index 5533a516..1b8b5acd 100644 --- a/source/serialization/tests/test_read.hpp +++ b/source/serialization/tests/test_read.hpp @@ -184,6 +184,20 @@ public: TS_ASSERT_EQUALS(val, "Donald"); } + void test_read_shared_strings_multiple_r_nodes() + { + auto path = PathHelper::GetDataDirectory("/reader/shared_strings-multiple_r_nodes.xlsx"); + + xlnt::workbook wb; + xlnt::excel_serializer serializer(wb); + + serializer.load_workbook(path); + + auto ws = wb["Sheet1"]; + auto val = ws.get_cell("A1").get_value(); + TS_ASSERT_EQUALS(val, "abcdef"); + } + xlnt::workbook date_mac_1904() { auto path = PathHelper::GetDataDirectory("/reader/date_1904.xlsx"); diff --git a/tests/data/reader/shared_strings-multiple_r_nodes.xlsx b/tests/data/reader/shared_strings-multiple_r_nodes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d35130e51d1f7be64322c74c8bb28d8847726018 GIT binary patch literal 8447 zcmeHMhgVbC_6{A9P5|junkXGaK$;*$sT!I{?=27@AiY;FyoFf^2Uby%I*mCqc1eT(%{cnS1AE0-!TMLUVgC6K8 zNUz5W8%-5r?v=R>P;&7<(x9aS*gC{dzIonz#Ujn1n3nt{O%`#c+KWZsy+~({$c~A# z$Hk0yr#k#>pBx>QkLw8QAic&VBFJ~E!;d8KGR0w zd8u|!!u7a9mZ?8^%TMqM&AIDqEP%#e7^&f{)^Uu6*%LJ3ZlW1!wPK)Sw=<$yzG(?Cu3ilT&spQF}q`Y8vyCfv!16i99RbxA-7 z=G1!m!CkCLxy9v4kO$FDDQ?d~ID>c5@)o!afsrKWsS5L$1g~N@tsj|w|3I4T)4A`$$9~uoBqy`=;R*9pBzD;IYJA- z$MCS{`xi~1PA+z)PEK||^VaW_!9XW3w3q+xfrgB27e8U|K~PK3Cm-k0*8+-JFMD&R z?s@^t?l@bU(y4xj!E^d^pXRxyydca8Zad)2+uOII@22M14${$}yp4atF}(mKc$hxb z#7WvcYaoRkOTfWCJpmV+6QdX2P+$F~#MY(7las%7@L@_hRb(!&?P}XfriYV!IJ;X&?$o-=#x`-?SHAppk%BJ)GA=N zjxQ{+ojRQnP^(@cvCIf5p7mQ4iLNw!`Ct#{`^yXz1le-Bf=FUL?}%R0x8Oq1=J|7% zs#X%yygCO3YXfy3KT{3{@yc6Se)@WZ3hPnMK&kDR0RSH@i(M+S&FpP@*x%_OED1oT zvq~$>!g%&}T}n20dNYCd8DDGh-q0z2oSJcFym^(a-7r}f|KvSmH@__X2%NHN&!1z< zYl&T;uaLMTSwv>HacTc^z#Ey-i__dX772`+IWC$wlUEpb#YJYfTbeMrKEs+v=w5?? zUlUTIq^Nh_!KIkpu9B4eB4q-YCmmGwb<^^Iaz3U`nYAakSHBAw%{@p*~b7wst{;xjV_E2X&ttn%j_fe$}V z>Mx1ZJ}$Dm{3!3Dm+J?P*WjZMn$FgczBV{_S(Z47uZ3Nq5=eqhBg(XL1hKDt8P{RJ zH`?&SZdDx%u87>HVV|vfNs^9Rhc6W$yF*(K1cft@4@9eocM$Z%d%ZGl6D?<1OdW1V zC@KPch|mVVz?|SVY3L#s*~uif?;+i?a4Drjbb#%8+zI!{u>M&O!V!DeGc;TJ_7@LW zd7nup5Cu-MS2RhCLGfn+odx?*9tqvjM*B+NTNRUoO((S`4QIDTI%=}22rC+Pa!FSK zS)@>M|1ml_o6PkwVPe#CVdA&euwXHiGC5Von^eVG@2ufSYB{;CMe0oKMViUQl6{5% z_Z?BBNv#-GbPcjS$gS`6%xVq)!_md_1DnVA=Vzeh<$3)dXh8lQsv&GLg@69+l~R&m~Oq9Zefoh;d(AJ)$EH-BP0b&p_V;WKHoeKeQKoPfL5R*^6C!mNi($uDo0xls)eNzvPT<;Q${bf58^$y& zet8Z(o*YYjuyKDQ*g^{K+y!CC?6Fs@Q5g2QPc!wPvJ2;81$c zM}gYAGFQ<|x!D=`?<9+-JPCWk7mZajJK*92+U!drM&nlE2tX5eg_smyc~Q7|B`gm( zxR5cd9Gw**yv67k9m~~h88{G_x?VIYENaWOyIame<~_ugF0m1Q`oh|(_a4W2378?W z(Vs+WO54xz>d=ybAu*|VT3e?~4CH6gQq#FxUNSpKJ?8MXmFe8X&&~*=c)I8H>{~F< zagQ4iX|`$8p#Y~LLOH%u{obHJ*Sxp8}DhIsa7C#Y^~k4n($q=9mQ=+e1KkG4u9j%aQ- z;pw0r$LX4WWOlVTA>c;7QrsZf&h6~Cw`%UoB|d?y(8ClkBjAr{P#U`;E$^lRua7p$ zIM>Kr{o_FfN0=PmMM_7`9+|L$&G$xu&vk+%Uy46|!p?rr;JIQnEnKr<#IE_rT2q_f zx892#c#cuqd8_q_VD`>A3zEu+BC(|eW7A92Qn3+xjmih zne*mJE)KF9yej4%`mO*Pdqd#zmE20EvnVyvY=w~Y9l~6$P?7a{k|PYbQqw>dtU=c* z?zu{MtN!b&@ZqQIgoKaou5zZ*I!emj3DC;uzoAmHe`v-|PS-!?#<(U46NQxEX9sD0=>VT{5UlY<@|I5|4tIdyx6Mb?U|%m zU*(CQ*otj;pH?ax^{=B&uixzb*ii6V z)NS=GsT`*XZqk2SM(Krsc0A-v%#IZ4>3dNzBR;5HD*!AV3ZB(;6ax=nGY;$p1Q@I; zO*p%7w6d~_KOG^bIwh*VdT}^<5-6eCYtm8`IiYyVsHVc5sO%(oDALFFEz4PchzFRx z)P-+4xr);pm=s`hgC!X1^(JgjPtoj+cElLoU2CL#a61@MctHh{EIa z#b8^Co{BsEv!XoJ)-J{+6A-Mn>FX{Kfugf?}nJu}9F6C%N15xuRVqbBGS+k^3kT74)jzV=A$x!gOrZ<6Q8lA7@V~juPWHM3L zV92kQa7~3Du9mxuLUKH17h}_9pB+EwC|-wqS54mdR-rGAum4(eB+)Wz}MaqIkhbPj|r(?OW)XwN&x^ zlbJ8ex$E(n^CJ#tXI<7kISjRB2LpIYf?+RgWO>68hfhkl`=f=(Ki~AePUFOfH7cY= zoUkwlZkCnO=c^=k*HEGcCyA&OS+lYzwQOouWZZ=4<6WcX@u5pNs`Y|Aa6G_9iKG>G z%Ye)FQz@Cqt{2L78l^loY~J`I$gW+LQZYg2$8qiZAo_IpW;%G}d^$>A8QWovVhA5v znu*`L1ERUFfZ&*wd0tK(rO zBgcn6jThg(WZzE*6LxpqKe%1d{+&S*d#hOBc=w(wmpk<)yu3v^dizYOxI9(owM*(VGi&uK!fJ5;yi4v(mC>wgQxd zjH;pf$WJV!NO1cG6sNj=QxuS)h2(p2)`E00adKh56OZqetZ?hwbQ042W;4PeYKnlk zEq2oM%or>8(nGhdAA0Ur%ZS+)^UPl}XBE7wg1J z9Fde~SpMOwS+G&#Om|+aM63040mrmUd3S)&-0tzbaJ?I(&@&>7TFFZhqUG%MULyL? z_d3(EoP2D)B4t;92A8$-yN}n-2g+OX(-15Q0O*+w)iSi``0dt_@C{rFzlSu@#9JQx z48tiwU#ta;?!ii!&ZEv+-kmYYSJ5CfFuFbsf^WS>+-$h{#k494tbQN*T*`hH%Ub@m zL(t)=^y_o#e}s9WIuM|bhWUH+A?wz^VE*%v1+liUaE0*wHvLwdB_=Ma&+(J?EpqPp zRbN&YX>zXVY>hHM3RyBK=CSv3&r~%f+u)tcYd30(s-@z15TRge_kI82kA+xa2|vNR zUf3HV?l-g5$dS3)y4#g651Gz%@Pv@wafaLmyT%x$RY4(PXpIowM4(ari&Z^dq&gAZwdGJC?|v>0Wo|yBv*>uMjQ1^i zr%f&zxt^x!GTrYvDb7EL7G~pT*uV7m?=L@JD5EALu_+gK~6g`ry z#&pb15u2Fuh9m$^6CLD~B!v-<<2g@Gr!i;mf9V?=o0n(7 z3H7(kDahNxNkF84SVH}1N+g07B=B1-K>A?lw~)|C+T|qr-Y8I#`@`}Ndkta=?9QQT z+9fD3U!wwb=6FEuLf6Nwn0BdY}ALI|bX z22Jvs+aTXH>+36bnzjomq7o~3wTYc@&38m7!l?lDDP0XUtibUy2oK4$>IV+3(>pOz zmd+^I%|pU7x+#_>_J?w5qKNB?G7rTWFR*ecIhE zd@HZ_NP-^(;*rmX<>CXYf}iaW4q2hj$rL69ZtH4te;6U-rCmFNa!)1d;4VBZ=(vAF z^ho8SR;5s#vzm7F-9?#x`#zyWJe?%&wBrIjv!Qb_1FU_He$tesRO6Useg4S`)(6T2 z3o6?c+=2c}w+Ogo#iNwNmTpc3aPQ?XVY>A27D(7ikM-}@H!KlN*N}i3y03f}XU+nN z4sw>(Rvl;+Ul)|tuJ|8418?WwVOIaLwq@l0{Q~E+$X>%CK&<_R1#RwKN#=GJw)$f_ z+xCpmMfr~MjPs-l_V5j}*74_LcT-iqIF-N52*@g*%Az3Wa@dm!3#Yxq%qlJH!1{nK zYGGiNtGz=I0^J=K#6 z_>WgE8ehC=j~on4-pq9k`u$@ZExfpWKMRdpQ7iy}^v}380fYYsTlA9sdt_=4IHL1N zGxY~){N}#IQ9ObmQw?@*7i$eH(5R4V1urX587vl0(pkuQ;nt$x9kvQ9kYTbzrHK;Pe1$S3{Jh~Ys6=0XwLiku#^iJjDZ$_UccXml~ z(&d#x3#^UtL1aoD>J}*FnO;KXGQs6OHSe1pv=v+ytW41{j6RQhf<}Ae`&t|4?5YF# zY*-!YajwXT7|gW{8cfTQLNd&i+&{A7`(k!W71N(8riE)o*D=b%VsX-^fM=9&4h5-G}Yt(o5Y!wtM^?>Lf2){}YNaFz=!TtbcAb|GtI4 z&;QVP)=>ScgTHphemDF%C!l%qr#{)QhJS4r{MoPpU623&&4XY4{Hh`U;b{xqP5(tv z{?+(bRqqeu#GAhv|4rlj)x)o<*B>4l2>($7`_;j(r;tA!SfN!k^#6za=S=de>93XD zAEs(l|9SPl)p@^q`D;G@!v_GcfG!vRmYsh!|7%$Mvw1SzpUnRqBsElV(7Ou&5THN) M=mNlo@#nYy1DH&o4gdfE literal 0 HcmV?d00001 From 6500f71f703c979c75ad3f7feea645fea6db1f40 Mon Sep 17 00:00:00 2001 From: Xpol Wan Date: Sat, 14 May 2016 16:36:14 +0800 Subject: [PATCH 2/2] Remove log code. --- source/serialization/shared_strings_serializer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/source/serialization/shared_strings_serializer.cpp b/source/serialization/shared_strings_serializer.cpp index cb0e5d05..9afce1f5 100644 --- a/source/serialization/shared_strings_serializer.cpp +++ b/source/serialization/shared_strings_serializer.cpp @@ -75,7 +75,6 @@ bool shared_strings_serializer::read_shared_strings(const xml_document &xml, std { if (r_node.get_name() == "r" && r_node.has_child("t")) { - std::cout << r_node.get_child("t").get_text() << std::endl; text += r_node.get_child("t").get_text(); } }