diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp index ecdb33ec17bd8af12a789dd52cb920506986ce3d..6f4395818700856704adba1ed495766f524d88c6 100644 --- a/include/orcus/spreadsheet/import_interface.hpp +++ b/include/orcus/spreadsheet/import_interface.hpp @@ -952,6 +952,21 @@ public: */ virtual void set_string(row_t row, col_t col, string_id_t sindex) = 0; + /** + * Set string value to a cell when string is not in the pool. + * + * Used in cases when parser cannot quarentee that string pools indexes + * and in document references are consistent if pushed to pool. + * + * Implementing this allowed document backend to handle these best way possible but + * primarily strings are pushed to the string pool. + * + * @param row row ID + * @oaram col column ID + * @param s inline string found while parsing. + */ + virtual void set_string(row_t row, col_t col, std::string_view s); + /** * Set numerical value to a cell. * diff --git a/src/include/mock_spreadsheet.hpp b/src/include/mock_spreadsheet.hpp index 07a72f7028f14f9c51a402666e71db2b52e34392..d6c32b2034e86023e9cb320c5db44ecbc77e01fd 100644 --- a/src/include/mock_spreadsheet.hpp +++ b/src/include/mock_spreadsheet.hpp @@ -135,6 +135,8 @@ public: virtual void set_string(orcus::spreadsheet::row_t row, orcus::spreadsheet::col_t col, orcus::spreadsheet::string_id_t sindex) override; + virtual void set_string(orcus::spreadsheet::row_t row, orcus::spreadsheet::col_t col, std::string_view s) override; + virtual void set_value(orcus::spreadsheet::row_t row, orcus::spreadsheet::col_t col, double value) override; virtual void set_bool(orcus::spreadsheet::row_t row, orcus::spreadsheet::col_t col, bool value) override; diff --git a/src/liborcus/spreadsheet_interface.cpp b/src/liborcus/spreadsheet_interface.cpp index 585644a7dedd60d31db95bcb43be4436038418d3..8e97e81d31d88a48ed836920ae85f240cebc825d 100644 --- a/src/liborcus/spreadsheet_interface.cpp +++ b/src/liborcus/spreadsheet_interface.cpp @@ -114,6 +114,8 @@ import_array_formula::~import_array_formula() {} import_sheet::~import_sheet() {} +void import_sheet::set_string(row_t /*row*/, col_t /*col*/, std::string_view /*s*/) {} + import_sheet_view* import_sheet::get_sheet_view() { return nullptr; diff --git a/src/liborcus/xlsx_sheet_context.cpp b/src/liborcus/xlsx_sheet_context.cpp index 2b6192aafde6cd938e56575c784f1482085f2bde..b61b5620bcbbff42d3ffe8b0ba429101b04cc550 100644 --- a/src/liborcus/xlsx_sheet_context.cpp +++ b/src/liborcus/xlsx_sheet_context.cpp @@ -244,6 +244,12 @@ void xlsx_sheet_context::start_element(xmlns_id_t ns, xml_token_t name, const xm case XML_v: xml_element_expected(parent, NS_ooxml_xlsx, XML_c); break; + case XML_is: + xml_element_expected(parent, NS_ooxml_xlsx, XML_c); + break; + case XML_t: + xml_element_expected(parent, NS_ooxml_xlsx, XML_is); + break; case XML_tableParts: xml_element_expected(parent, NS_ooxml_xlsx, XML_worksheet); break; @@ -278,6 +284,9 @@ bool xlsx_sheet_context::end_element(xmlns_id_t ns, xml_token_t name) case XML_c: end_element_cell(); break; + case XML_t: + m_cur_value = m_cur_str; + break; case XML_f: m_cur_formula.str = m_cur_str; break; @@ -791,29 +800,34 @@ void xlsx_sheet_context::push_raw_cell_value() switch (m_cur_cell_type) { - case xlsx_ct_shared_string: - { - // string cell - size_t str_id = to_long(m_cur_value); - m_sheet.set_string(m_cur_row, m_cur_col, str_id); - } - break; - case xlsx_ct_numeric: - { - // value cell - double val = to_double(m_cur_value); - m_sheet.set_value(m_cur_row, m_cur_col, val); - } + case xlsx_ct_inline_string: + // For the rare case of inline string we do not have context is pool safe to push. + // Hence, push to document backend to handle or not. + m_sheet.set_string(m_cur_row, m_cur_col, m_cur_value); break; - case xlsx_ct_boolean: - { - // boolean cell - bool val = to_long(m_cur_value) != 0; - m_sheet.set_bool(m_cur_row, m_cur_col, val); - } - break; - default: - warn("unhanlded cell content type"); + case xlsx_ct_shared_string: + { + // string cell + size_t str_id = to_long(m_cur_value); + m_sheet.set_string(m_cur_row, m_cur_col, str_id); + } + break; + case xlsx_ct_numeric: + { + // value cell + double val = to_double(m_cur_value); + m_sheet.set_value(m_cur_row, m_cur_col, val); + } + break; + case xlsx_ct_boolean: + { + // boolean cell + bool val = to_long(m_cur_value) != 0; + m_sheet.set_bool(m_cur_row, m_cur_col, val); + } + break; + default: + warn("unhanlded cell content type"); } } diff --git a/src/liborcus/xlsx_sheet_context_test.cpp b/src/liborcus/xlsx_sheet_context_test.cpp index a116b78556daa23df5bae0e5959368b59a33d125..50b49ad872172c8c5e0152ca09da6fe408b8cafa 100644 --- a/src/liborcus/xlsx_sheet_context_test.cpp +++ b/src/liborcus/xlsx_sheet_context_test.cpp @@ -85,6 +85,14 @@ public: } }; +const string_view str_value = "Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque " + "faucibus ex sapien vitae pellentesque sem placerat. In id cursus" + " mi pretium tellus duis convallis. Tempus leo eu aenean sed diam" + " urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendu" + "m egestas. Iaculis massa nisl malesuada lacinia integer nunc pos" + "uere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad l" + "itora torquent per conubia nostra inceptos himenaeos."; + class mock_sheet : public import_sheet { mock_array_formula m_array_formula; @@ -104,6 +112,12 @@ public: assert(val == true); } + virtual void set_string(row_t row, col_t col, std::string_view s) override { + assert(row == -1); + assert(col == 0); + assert(s == str_value); + } + virtual iface::import_array_formula* get_array_formula() override { return &m_array_formula; @@ -227,6 +241,35 @@ void test_array_formula() context.end_element(ns, elem); } +void test_cell_string() +{ + mock_sheet sheet; + mock_ref_resolver resolver; + session_context cxt(std::make_unique()); + config opt(format_t::xlsx); + opt.structure_check = false; + + orcus::xlsx_sheet_context context(cxt, orcus::ooxml_tokens, 0, resolver, sheet); + context.set_config(opt); + + orcus::xmlns_id_t ns = NS_ooxml_xlsx; + + orcus::xml_token_attrs_t inline_attrs; + inline_attrs.push_back(xml_token_attr_t(ns, XML_t, "inlineStr", false)); + context.start_element(ns, XML_c, inline_attrs); + + { + xml_token_attrs_t val_attrs; + context.start_element(ns, XML_is, val_attrs); + context.start_element(ns, XML_t, val_attrs); + context.characters(str_value, false); + context.end_element(ns, XML_t); + context.end_element(ns, XML_is); + } + + context.end_element(ns, XML_c); +} + void test_hidden_col() { mock_sheet2 sheet; @@ -274,6 +317,7 @@ int main() { test_cell_value(); test_cell_bool(); + test_cell_string(); test_array_formula(); test_hidden_col(); test_hidden_row(); diff --git a/src/test/mock_spreadsheet.cpp b/src/test/mock_spreadsheet.cpp index 38b3c731e7427cdd4b8ea33c40f1ac3da750c112..c5e07eeb7e9a88afcd90acac0cc053ca9b15fd2f 100644 --- a/src/test/mock_spreadsheet.cpp +++ b/src/test/mock_spreadsheet.cpp @@ -297,6 +297,11 @@ void import_sheet::set_string(row_t, col_t, string_id_t) assert(false); } +void import_sheet::set_string(row_t, col_t, std::string_view) +{ + assert(false); +} + void import_sheet::set_format(row_t, col_t, size_t) { assert(false);