mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
start porting benchmarks
This commit is contained in:
parent
74bfdb6f7d
commit
a8be9fff32
23
benchmarks/bufzip.cpp
Normal file
23
benchmarks/bufzip.cpp
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
#include <xlnt/xlnt.hpp>
|
||||||
|
#include <xlnt/serialization/xml_document.hpp>
|
||||||
|
#include <xlnt/serialization/xml_node.hpp>
|
||||||
|
#include <xlnt/serialization/xml_serializer.hpp>
|
||||||
|
|
||||||
|
void standard()
|
||||||
|
{
|
||||||
|
xlnt::xml_document doc;
|
||||||
|
|
||||||
|
for (int i = 0; i < 1000000; i++)
|
||||||
|
{
|
||||||
|
doc.add_child("test");
|
||||||
|
}
|
||||||
|
|
||||||
|
xlnt::zip_file archive;
|
||||||
|
archive.writestr("sheet.xml", doc.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
standard();
|
||||||
|
return 0;
|
||||||
|
}
|
BIN
benchmarks/files/large.xlsx
Normal file
BIN
benchmarks/files/large.xlsx
Normal file
Binary file not shown.
BIN
benchmarks/files/very_large.xlsx
Normal file
BIN
benchmarks/files/very_large.xlsx
Normal file
Binary file not shown.
65
benchmarks/memory.cpp
Normal file
65
benchmarks/memory.cpp
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include<mach/mach.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <xlnt/xlnt.hpp>
|
||||||
|
|
||||||
|
#include "../tests/helpers/path_helper.hpp"
|
||||||
|
|
||||||
|
int calc_memory_usage()
|
||||||
|
{
|
||||||
|
#ifdef __APPLE__
|
||||||
|
struct task_basic_info t_info;
|
||||||
|
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||||
|
|
||||||
|
if (KERN_SUCCESS != task_info(mach_task_self(),
|
||||||
|
TASK_BASIC_INFO, (task_info_t)&t_info,
|
||||||
|
&t_info_count))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t_info.virtual_size;
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_memory_use()
|
||||||
|
{
|
||||||
|
// Naive test that assumes memory use will never be more than 120 % of
|
||||||
|
// that for first 50 rows
|
||||||
|
auto current_folder = PathHelper::GetExecutableDirectory();
|
||||||
|
auto src = current_folder + "rks/files/very_large.xlsx";
|
||||||
|
|
||||||
|
xlnt::workbook wb;
|
||||||
|
wb.load(src);
|
||||||
|
auto ws = wb.get_active_sheet();
|
||||||
|
|
||||||
|
int initial_use = 0;
|
||||||
|
int n = 0;
|
||||||
|
|
||||||
|
for (auto line : ws.rows())
|
||||||
|
{
|
||||||
|
if (n % 50 == 0)
|
||||||
|
{
|
||||||
|
auto use = calc_memory_usage();
|
||||||
|
|
||||||
|
if (initial_use == 0)
|
||||||
|
{
|
||||||
|
initial_use = use;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(use / initial_use < 1.2);
|
||||||
|
std::cout << n << " " << use << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
test_memory_use();
|
||||||
|
}
|
131
benchmarks/profiling.cpp
Normal file
131
benchmarks/profiling.cpp
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
from io import BytesIO
|
||||||
|
from lxml.etree import xmlfile
|
||||||
|
import os
|
||||||
|
from random import randint
|
||||||
|
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.xml.functions import XMLGenerator
|
||||||
|
|
||||||
|
def make_worksheet():
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
for i in range(1000):
|
||||||
|
ws.append(list(range(100)))
|
||||||
|
return ws
|
||||||
|
|
||||||
|
|
||||||
|
def lxml_writer(ws=None):
|
||||||
|
from openpyxl.writer.lxml_worksheet import write_rows
|
||||||
|
if ws is None:
|
||||||
|
ws = make_worksheet()
|
||||||
|
|
||||||
|
out = BytesIO()
|
||||||
|
with xmlfile(out) as xf:
|
||||||
|
write_rows(xf, ws)
|
||||||
|
#with open("lxml_writer.xml", "wb") as dump:
|
||||||
|
#dump.write(out.getvalue())
|
||||||
|
#ws.parent.save("lxml_writer.xlsx")
|
||||||
|
|
||||||
|
|
||||||
|
def make_dump_worksheet():
|
||||||
|
wb = Workbook(write_only=True)
|
||||||
|
ws = wb.create_sheet()
|
||||||
|
return ws
|
||||||
|
|
||||||
|
def dump_writer(ws=None):
|
||||||
|
if ws is None:
|
||||||
|
ws = make_dump_worksheet()
|
||||||
|
for i in range(1000):
|
||||||
|
ws.append(list(range(100)))
|
||||||
|
|
||||||
|
|
||||||
|
COLUMNS = 100
|
||||||
|
ROWS = 1000
|
||||||
|
BOLD = 1
|
||||||
|
ITALIC = 2
|
||||||
|
UNDERLINE = 4
|
||||||
|
RED_BG = 8
|
||||||
|
formatData = [[None] * COLUMNS for _ in range(ROWS)]
|
||||||
|
|
||||||
|
def generate_format_data():
|
||||||
|
for row in range(ROWS):
|
||||||
|
for col in range(COLUMNS):
|
||||||
|
formatData[row][col] = randint(1, 15)
|
||||||
|
|
||||||
|
|
||||||
|
def styled_sheet():
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.styles import Font, Style, PatternFill, Color, colors
|
||||||
|
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = 'Test 1'
|
||||||
|
|
||||||
|
red_fill = PatternFill(fill_type='solid', fgColor=Color(colors.RED), bgColor=Color(colors.RED))
|
||||||
|
empty_fill = PatternFill()
|
||||||
|
styles = []
|
||||||
|
# pregenerate relevant styles
|
||||||
|
for row in range(ROWS):
|
||||||
|
_row = []
|
||||||
|
for col in range(COLUMNS):
|
||||||
|
cell = ws.cell(row=row+1, column=col+1)
|
||||||
|
cell.value = 1
|
||||||
|
font = {}
|
||||||
|
fill = PatternFill()
|
||||||
|
if formatData[row][col] & BOLD:
|
||||||
|
font['bold'] = True
|
||||||
|
if formatData[row][col] & ITALIC:
|
||||||
|
font['italic'] = True
|
||||||
|
if formatData[row][col] & UNDERLINE:
|
||||||
|
font['underline'] = 'single'
|
||||||
|
if formatData[row][col] & RED_BG:
|
||||||
|
fill = red_fill
|
||||||
|
cell.style = Style(font=Font(**font), fill=fill)
|
||||||
|
|
||||||
|
#wb.save(get_output_path('test_openpyxl_style_std_pregen.xlsx'))
|
||||||
|
|
||||||
|
|
||||||
|
def read_workbook():
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
folder = os.path.split(__file__)[0]
|
||||||
|
src = os.path.join(folder, "files", "very_large.xlsx")
|
||||||
|
wb = load_workbook(src)
|
||||||
|
return wb
|
||||||
|
|
||||||
|
|
||||||
|
def rows(wb):
|
||||||
|
ws = wb.active
|
||||||
|
rows = ws.iter_rows()
|
||||||
|
for r, row in enumerate(rows):
|
||||||
|
for c, col in enumerate(row):
|
||||||
|
pass
|
||||||
|
print((r+1)* (c+1), "cells")
|
||||||
|
|
||||||
|
|
||||||
|
def col_index1():
|
||||||
|
from openpyxl.cell import get_column_letter
|
||||||
|
for i in range(1, 18279):
|
||||||
|
c = get_column_letter(i)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Sample use
|
||||||
|
import cProfile
|
||||||
|
ws = make_worksheet()
|
||||||
|
cProfile.run("profiling.lxml_writer(ws)", sort="tottime")
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import cProfile
|
||||||
|
ws = make_worksheet()
|
||||||
|
#wb = read_workbook()
|
||||||
|
#cProfile.run("rows(wb)", sort="tottime")
|
||||||
|
#cProfile.run("make_worksheet()", sort="tottime")
|
||||||
|
#cProfile.run("lxml_writer(ws)", sort="tottime")
|
||||||
|
#generate_format_data()
|
||||||
|
#cProfile.run("styled_sheet()", sort="tottime")
|
||||||
|
#ws = make_dump_worksheet()
|
||||||
|
#cProfile.run("dump_writer(ws)", sort="tottime")
|
||||||
|
cProfile.run("col_index1()", sort="tottime")
|
45
benchmarks/reader.cpp
Normal file
45
benchmarks/reader.cpp
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import timeit
|
||||||
|
|
||||||
|
import openpyxl
|
||||||
|
|
||||||
|
|
||||||
|
def reader(optimised):
|
||||||
|
"""
|
||||||
|
Loop through all cells of a workbook
|
||||||
|
"""
|
||||||
|
folder = os.path.split(__file__)[0]
|
||||||
|
src = os.path.join(folder, "files", "very_large.xlsx")
|
||||||
|
wb = openpyxl.load_workbook(src, use_iterators=optimised)
|
||||||
|
ws = wb.active
|
||||||
|
rows = ws.iter_rows()
|
||||||
|
for r, row in enumerate(rows):
|
||||||
|
for c, col in enumerate(row):
|
||||||
|
pass
|
||||||
|
print((r+1)* (c+1), "cells")
|
||||||
|
|
||||||
|
def timer(fn):
|
||||||
|
"""
|
||||||
|
Create a timeit call to a function and pass in keyword arguments.
|
||||||
|
The function is called twice, once using the standard workbook, then with the optimised one.
|
||||||
|
Time from the best of three is taken.
|
||||||
|
"""
|
||||||
|
print("lxml", openpyxl.LXML)
|
||||||
|
result = []
|
||||||
|
for opt in (False, True,):
|
||||||
|
print("Workbook is {0}".format(opt and "optimised" or "not optimised"))
|
||||||
|
times = timeit.repeat("{0}({1})".format(fn.__name__, opt),
|
||||||
|
setup="from __main__ import {0}".format(fn.__name__),
|
||||||
|
number = 1,
|
||||||
|
repeat = 3
|
||||||
|
)
|
||||||
|
print("{0:.2f}s".format(min(times)))
|
||||||
|
result.append(min(times))
|
||||||
|
std, opt = result
|
||||||
|
print("Optimised takes {0:.2%} time\n".format(opt/std))
|
||||||
|
return std, opt
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
timer(reader)
|
31
benchmarks/speed.cpp
Normal file
31
benchmarks/speed.cpp
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
"Benchmark some different implementations for cells"
|
||||||
|
|
||||||
|
from openpyxl.compat import range
|
||||||
|
|
||||||
|
from openpyxl.cell import Cell
|
||||||
|
from openpyxl.cell.read_only import ReadOnlyCell
|
||||||
|
from memory_profiler import memory_usage
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def standard():
|
||||||
|
c = Cell(None, "A", "0", None)
|
||||||
|
|
||||||
|
def iterative():
|
||||||
|
c = ReadOnlyCell(None, None, None, 'n')
|
||||||
|
|
||||||
|
def dictionary():
|
||||||
|
c = {'ws':'None', 'col':'A', 'row':0, 'value':1}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
initial_use = memory_usage(proc=-1, interval=1)[0]
|
||||||
|
for fn in (standard, iterative, dictionary):
|
||||||
|
t = time.time()
|
||||||
|
container = []
|
||||||
|
for i in range(1000000):
|
||||||
|
container.append(fn())
|
||||||
|
print("{0} {1} MB, {2:.2f}s".format(
|
||||||
|
fn.func_name,
|
||||||
|
memory_usage(proc=-1, interval=1)[0] - initial_use,
|
||||||
|
time.time() - t))
|
118
benchmarks/styles.cpp
Normal file
118
benchmarks/styles.cpp
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
#include <iterator>
|
||||||
|
#include <random>
|
||||||
|
#include <xlnt/xlnt.hpp>
|
||||||
|
|
||||||
|
template<typename Iter>
|
||||||
|
Iter random_choice(Iter start, Iter end) {
|
||||||
|
static std::random_device rd;
|
||||||
|
static std::mt19937 gen(rd());
|
||||||
|
|
||||||
|
std::uniform_int_distribution<> dis(0, std::distance(start, end) - 1);
|
||||||
|
std::advance(start, dis(gen));
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<xlnt::style> generate_all_styles()
|
||||||
|
{
|
||||||
|
std::vector<xlnt::style> styles;
|
||||||
|
|
||||||
|
std::vector<xlnt::vertical_alignment> vertical_alignments = {xlnt::vertical_alignment::center, xlnt::vertical_alignment::justify, xlnt::vertical_alignment::top, xlnt::vertical_alignment::bottom};
|
||||||
|
std::vector<xlnt::horizontal_alignment> horizontal_alignments = {xlnt::horizontal_alignment::center, xlnt::horizontal_alignment::center_continuous, xlnt::horizontal_alignment::general, xlnt::horizontal_alignment::justify, xlnt::horizontal_alignment::left, xlnt::horizontal_alignment::right};
|
||||||
|
std::vector<std::string> font_names = {"Calibri", "Tahoma", "Arial", "Times New Roman"};
|
||||||
|
std::vector<int> font_sizes = {11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
|
||||||
|
std::vector<bool> bold_options = {true, false};
|
||||||
|
std::vector<xlnt::font::underline_style> underline_options = {xlnt::font::underline_style::single, xlnt::font::underline_style::none};
|
||||||
|
std::vector<bool> italic_options = {true, false};
|
||||||
|
|
||||||
|
for(auto vertical_alignment : vertical_alignments)
|
||||||
|
{
|
||||||
|
for(auto horizontal_alignment : horizontal_alignments)
|
||||||
|
{
|
||||||
|
for(auto name : font_names)
|
||||||
|
{
|
||||||
|
for(auto size : font_sizes)
|
||||||
|
{
|
||||||
|
for(auto bold : bold_options)
|
||||||
|
{
|
||||||
|
for(auto underline : underline_options)
|
||||||
|
{
|
||||||
|
for(auto italic : italic_options)
|
||||||
|
{
|
||||||
|
xlnt::style s;
|
||||||
|
|
||||||
|
xlnt::font f;
|
||||||
|
f.set_name(name);
|
||||||
|
f.set_size(size);
|
||||||
|
f.set_italic(italic);
|
||||||
|
f.set_underline(underline);
|
||||||
|
f.set_bold(bold);
|
||||||
|
s.set_font(f);
|
||||||
|
|
||||||
|
xlnt::alignment a;
|
||||||
|
a.set_vertical(vertical_alignment);
|
||||||
|
a.set_horizontal(horizontal_alignment);
|
||||||
|
s.set_alignment(a);
|
||||||
|
|
||||||
|
styles.push_back(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return styles;
|
||||||
|
}
|
||||||
|
|
||||||
|
xlnt::workbook optimized_workbook(const std::vector<xlnt::style> &styles, int n)
|
||||||
|
{
|
||||||
|
xlnt::workbook wb;
|
||||||
|
wb.set_optimized_write(true);
|
||||||
|
auto worksheet = wb.create_sheet();
|
||||||
|
|
||||||
|
for(int i = 1; i < n; i++)
|
||||||
|
{
|
||||||
|
auto style = *random_choice(styles.begin(), styles.end());
|
||||||
|
worksheet.append({{0, style}});
|
||||||
|
}
|
||||||
|
|
||||||
|
return wb;
|
||||||
|
}
|
||||||
|
|
||||||
|
xlnt::workbook non_optimized_workbook(const std::vector<xlnt::style> &styles, int n)
|
||||||
|
{
|
||||||
|
xlnt::workbook wb;
|
||||||
|
|
||||||
|
for(int idx = 1; idx < n; idx++)
|
||||||
|
{
|
||||||
|
auto worksheet = *random_choice(wb.begin(), wb.end());
|
||||||
|
auto cell = worksheet.get_cell({1, (xlnt::row_t)idx + 1});
|
||||||
|
cell.set_value(0);
|
||||||
|
cell.set_style(*random_choice(styles.begin(), styles.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return wb;
|
||||||
|
}
|
||||||
|
|
||||||
|
void to_profile(xlnt::workbook &wb, const std::string &f, int n)
|
||||||
|
{
|
||||||
|
auto t = 0;//-time.time();
|
||||||
|
wb.save(f);
|
||||||
|
std::cout << "took " << t << "s for " << n << " styles";
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
auto styles = generate_all_styles();
|
||||||
|
int n = 10000;
|
||||||
|
|
||||||
|
for(auto func : {&optimized_workbook, &non_optimized_workbook})
|
||||||
|
{
|
||||||
|
std::cout << (func == &optimized_workbook ? "optimized_workbook" : "non_optimized_workbook") << std::endl;
|
||||||
|
auto wb = func(styles, n);
|
||||||
|
std::string f = "/tmp/xlnt.xlsx";
|
||||||
|
to_profile(wb, f, n);
|
||||||
|
}
|
||||||
|
}
|
83
benchmarks/writer.cpp
Normal file
83
benchmarks/writer.cpp
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
#include <chrono>
|
||||||
|
#include <xlnt/xlnt.hpp>
|
||||||
|
#include "path_helper.hpp"
|
||||||
|
|
||||||
|
int current_time()
|
||||||
|
{
|
||||||
|
return std::chrono::duration<double, std::milli>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a worksheet with variable width rows. Because data must be
|
||||||
|
// serialised row by row it is often the width of the rows which is most
|
||||||
|
// important.
|
||||||
|
void writer(bool optimized, int cols, int rows)
|
||||||
|
{
|
||||||
|
xlnt::workbook wb;
|
||||||
|
// wb.set_optimized_write(optimized);
|
||||||
|
|
||||||
|
auto ws = wb.create_sheet();
|
||||||
|
|
||||||
|
std::vector<int> row;
|
||||||
|
|
||||||
|
for(int i = 0; i < cols; i++)
|
||||||
|
{
|
||||||
|
row.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int index = 0; index < rows; index++)
|
||||||
|
{
|
||||||
|
if ((index + 1) % (rows / 10) == 0)
|
||||||
|
{
|
||||||
|
std::string progress = std::string((index + 1) / (1 + rows / 10), '.');
|
||||||
|
std::cout << "\r" << progress;
|
||||||
|
std::cout.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
ws.append(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
auto filename = PathHelper::GetExecutableDirectory() + "s/files/large.xlsx";
|
||||||
|
wb.save(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a timeit call to a function and pass in keyword arguments.
|
||||||
|
// The function is called twice, once using the standard workbook, then with the optimised one.
|
||||||
|
// Time from the best of three is taken.
|
||||||
|
std::pair<int, int> timer(std::function<void(bool, int, int)> fn, int cols, int rows)
|
||||||
|
{
|
||||||
|
const int repeat = 3;
|
||||||
|
int min_time_standard = std::numeric_limits<int>::max();
|
||||||
|
int min_time_optimized = std::numeric_limits<int>::max();
|
||||||
|
|
||||||
|
for(bool opt : {false, true})
|
||||||
|
{
|
||||||
|
std::cout << cols << " cols " << rows << " rows, Worksheet is " << (opt ? "optimised" : "not optimised") << std::endl;
|
||||||
|
auto &time = opt ? min_time_optimized : min_time_standard;
|
||||||
|
|
||||||
|
for(int i = 0; i < repeat; i++)
|
||||||
|
{
|
||||||
|
auto start = current_time();
|
||||||
|
fn(opt, cols, rows);
|
||||||
|
time = std::min(current_time() - start, time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ratio = min_time_optimized / static_cast<double>(min_time_standard) * 100;
|
||||||
|
std::cout << "Optimised takes " << ratio << "% time" << std::endl;
|
||||||
|
|
||||||
|
return {min_time_standard, min_time_optimized};
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
timer(&writer, 100, 100);
|
||||||
|
timer(&writer, 1000, 100);
|
||||||
|
timer(&writer, 4000, 100);
|
||||||
|
timer(&writer, 8192, 100);
|
||||||
|
timer(&writer, 10, 10000);
|
||||||
|
timer(&writer, 4000, 1000);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user