mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
start porting benchmarks
This commit is contained in:
parent
74bfdb6f7d
commit
a8be9fff32
23
benchmarks/bufzip.cpp
Normal file
23
benchmarks/bufzip.cpp
Normal file
|
@ -0,0 +1,23 @@
|
|||
#include <xlnt/xlnt.hpp>
|
||||
#include <xlnt/serialization/xml_document.hpp>
|
||||
#include <xlnt/serialization/xml_node.hpp>
|
||||
#include <xlnt/serialization/xml_serializer.hpp>
|
||||
|
||||
void standard()
|
||||
{
|
||||
xlnt::xml_document doc;
|
||||
|
||||
for (int i = 0; i < 1000000; i++)
|
||||
{
|
||||
doc.add_child("test");
|
||||
}
|
||||
|
||||
xlnt::zip_file archive;
|
||||
archive.writestr("sheet.xml", doc.to_string());
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
standard();
|
||||
return 0;
|
||||
}
|
BIN
benchmarks/files/large.xlsx
Normal file
BIN
benchmarks/files/large.xlsx
Normal file
Binary file not shown.
BIN
benchmarks/files/very_large.xlsx
Normal file
BIN
benchmarks/files/very_large.xlsx
Normal file
Binary file not shown.
65
benchmarks/memory.cpp
Normal file
65
benchmarks/memory.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
#include <cassert>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include<mach/mach.h>
|
||||
#endif
|
||||
|
||||
#include <xlnt/xlnt.hpp>
|
||||
|
||||
#include "../tests/helpers/path_helper.hpp"
|
||||
|
||||
int calc_memory_usage()
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
struct task_basic_info t_info;
|
||||
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||
|
||||
if (KERN_SUCCESS != task_info(mach_task_self(),
|
||||
TASK_BASIC_INFO, (task_info_t)&t_info,
|
||||
&t_info_count))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return t_info.virtual_size;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_memory_use()
|
||||
{
|
||||
// Naive test that assumes memory use will never be more than 120 % of
|
||||
// that for first 50 rows
|
||||
auto current_folder = PathHelper::GetExecutableDirectory();
|
||||
auto src = current_folder + "rks/files/very_large.xlsx";
|
||||
|
||||
xlnt::workbook wb;
|
||||
wb.load(src);
|
||||
auto ws = wb.get_active_sheet();
|
||||
|
||||
int initial_use = 0;
|
||||
int n = 0;
|
||||
|
||||
for (auto line : ws.rows())
|
||||
{
|
||||
if (n % 50 == 0)
|
||||
{
|
||||
auto use = calc_memory_usage();
|
||||
|
||||
if (initial_use == 0)
|
||||
{
|
||||
initial_use = use;
|
||||
}
|
||||
|
||||
assert(use / initial_use < 1.2);
|
||||
std::cout << n << " " << use << std::endl;
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_memory_use();
|
||||
}
|
131
benchmarks/profiling.cpp
Normal file
131
benchmarks/profiling.cpp
Normal file
|
@ -0,0 +1,131 @@
|
|||
from io import BytesIO
|
||||
from lxml.etree import xmlfile
|
||||
import os
|
||||
from random import randint
|
||||
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.xml.functions import XMLGenerator
|
||||
|
||||
def make_worksheet():
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
for i in range(1000):
|
||||
ws.append(list(range(100)))
|
||||
return ws
|
||||
|
||||
|
||||
def lxml_writer(ws=None):
|
||||
from openpyxl.writer.lxml_worksheet import write_rows
|
||||
if ws is None:
|
||||
ws = make_worksheet()
|
||||
|
||||
out = BytesIO()
|
||||
with xmlfile(out) as xf:
|
||||
write_rows(xf, ws)
|
||||
#with open("lxml_writer.xml", "wb") as dump:
|
||||
#dump.write(out.getvalue())
|
||||
#ws.parent.save("lxml_writer.xlsx")
|
||||
|
||||
|
||||
def make_dump_worksheet():
|
||||
wb = Workbook(write_only=True)
|
||||
ws = wb.create_sheet()
|
||||
return ws
|
||||
|
||||
def dump_writer(ws=None):
|
||||
if ws is None:
|
||||
ws = make_dump_worksheet()
|
||||
for i in range(1000):
|
||||
ws.append(list(range(100)))
|
||||
|
||||
|
||||
COLUMNS = 100
|
||||
ROWS = 1000
|
||||
BOLD = 1
|
||||
ITALIC = 2
|
||||
UNDERLINE = 4
|
||||
RED_BG = 8
|
||||
formatData = [[None] * COLUMNS for _ in range(ROWS)]
|
||||
|
||||
def generate_format_data():
|
||||
for row in range(ROWS):
|
||||
for col in range(COLUMNS):
|
||||
formatData[row][col] = randint(1, 15)
|
||||
|
||||
|
||||
def styled_sheet():
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Style, PatternFill, Color, colors
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = 'Test 1'
|
||||
|
||||
red_fill = PatternFill(fill_type='solid', fgColor=Color(colors.RED), bgColor=Color(colors.RED))
|
||||
empty_fill = PatternFill()
|
||||
styles = []
|
||||
# pregenerate relevant styles
|
||||
for row in range(ROWS):
|
||||
_row = []
|
||||
for col in range(COLUMNS):
|
||||
cell = ws.cell(row=row+1, column=col+1)
|
||||
cell.value = 1
|
||||
font = {}
|
||||
fill = PatternFill()
|
||||
if formatData[row][col] & BOLD:
|
||||
font['bold'] = True
|
||||
if formatData[row][col] & ITALIC:
|
||||
font['italic'] = True
|
||||
if formatData[row][col] & UNDERLINE:
|
||||
font['underline'] = 'single'
|
||||
if formatData[row][col] & RED_BG:
|
||||
fill = red_fill
|
||||
cell.style = Style(font=Font(**font), fill=fill)
|
||||
|
||||
#wb.save(get_output_path('test_openpyxl_style_std_pregen.xlsx'))
|
||||
|
||||
|
||||
def read_workbook():
|
||||
from openpyxl import load_workbook
|
||||
folder = os.path.split(__file__)[0]
|
||||
src = os.path.join(folder, "files", "very_large.xlsx")
|
||||
wb = load_workbook(src)
|
||||
return wb
|
||||
|
||||
|
||||
def rows(wb):
|
||||
ws = wb.active
|
||||
rows = ws.iter_rows()
|
||||
for r, row in enumerate(rows):
|
||||
for c, col in enumerate(row):
|
||||
pass
|
||||
print((r+1)* (c+1), "cells")
|
||||
|
||||
|
||||
def col_index1():
|
||||
from openpyxl.cell import get_column_letter
|
||||
for i in range(1, 18279):
|
||||
c = get_column_letter(i)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Sample use
|
||||
import cProfile
|
||||
ws = make_worksheet()
|
||||
cProfile.run("profiling.lxml_writer(ws)", sort="tottime")
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cProfile
|
||||
ws = make_worksheet()
|
||||
#wb = read_workbook()
|
||||
#cProfile.run("rows(wb)", sort="tottime")
|
||||
#cProfile.run("make_worksheet()", sort="tottime")
|
||||
#cProfile.run("lxml_writer(ws)", sort="tottime")
|
||||
#generate_format_data()
|
||||
#cProfile.run("styled_sheet()", sort="tottime")
|
||||
#ws = make_dump_worksheet()
|
||||
#cProfile.run("dump_writer(ws)", sort="tottime")
|
||||
cProfile.run("col_index1()", sort="tottime")
|
45
benchmarks/reader.cpp
Normal file
45
benchmarks/reader.cpp
Normal file
|
@ -0,0 +1,45 @@
|
|||
import os
|
||||
import sys
|
||||
import timeit
|
||||
|
||||
import openpyxl
|
||||
|
||||
|
||||
def reader(optimised):
|
||||
"""
|
||||
Loop through all cells of a workbook
|
||||
"""
|
||||
folder = os.path.split(__file__)[0]
|
||||
src = os.path.join(folder, "files", "very_large.xlsx")
|
||||
wb = openpyxl.load_workbook(src, use_iterators=optimised)
|
||||
ws = wb.active
|
||||
rows = ws.iter_rows()
|
||||
for r, row in enumerate(rows):
|
||||
for c, col in enumerate(row):
|
||||
pass
|
||||
print((r+1)* (c+1), "cells")
|
||||
|
||||
def timer(fn):
|
||||
"""
|
||||
Create a timeit call to a function and pass in keyword arguments.
|
||||
The function is called twice, once using the standard workbook, then with the optimised one.
|
||||
Time from the best of three is taken.
|
||||
"""
|
||||
print("lxml", openpyxl.LXML)
|
||||
result = []
|
||||
for opt in (False, True,):
|
||||
print("Workbook is {0}".format(opt and "optimised" or "not optimised"))
|
||||
times = timeit.repeat("{0}({1})".format(fn.__name__, opt),
|
||||
setup="from __main__ import {0}".format(fn.__name__),
|
||||
number = 1,
|
||||
repeat = 3
|
||||
)
|
||||
print("{0:.2f}s".format(min(times)))
|
||||
result.append(min(times))
|
||||
std, opt = result
|
||||
print("Optimised takes {0:.2%} time\n".format(opt/std))
|
||||
return std, opt
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
timer(reader)
|
31
benchmarks/speed.cpp
Normal file
31
benchmarks/speed.cpp
Normal file
|
@ -0,0 +1,31 @@
|
|||
"Benchmark some different implementations for cells"
|
||||
|
||||
from openpyxl.compat import range
|
||||
|
||||
from openpyxl.cell import Cell
|
||||
from openpyxl.cell.read_only import ReadOnlyCell
|
||||
from memory_profiler import memory_usage
|
||||
import time
|
||||
|
||||
|
||||
def standard():
|
||||
c = Cell(None, "A", "0", None)
|
||||
|
||||
def iterative():
|
||||
c = ReadOnlyCell(None, None, None, 'n')
|
||||
|
||||
def dictionary():
|
||||
c = {'ws':'None', 'col':'A', 'row':0, 'value':1}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
initial_use = memory_usage(proc=-1, interval=1)[0]
|
||||
for fn in (standard, iterative, dictionary):
|
||||
t = time.time()
|
||||
container = []
|
||||
for i in range(1000000):
|
||||
container.append(fn())
|
||||
print("{0} {1} MB, {2:.2f}s".format(
|
||||
fn.func_name,
|
||||
memory_usage(proc=-1, interval=1)[0] - initial_use,
|
||||
time.time() - t))
|
118
benchmarks/styles.cpp
Normal file
118
benchmarks/styles.cpp
Normal file
|
@ -0,0 +1,118 @@
|
|||
#include <iterator>
|
||||
#include <random>
|
||||
#include <xlnt/xlnt.hpp>
|
||||
|
||||
template<typename Iter>
|
||||
Iter random_choice(Iter start, Iter end) {
|
||||
static std::random_device rd;
|
||||
static std::mt19937 gen(rd());
|
||||
|
||||
std::uniform_int_distribution<> dis(0, std::distance(start, end) - 1);
|
||||
std::advance(start, dis(gen));
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
std::vector<xlnt::style> generate_all_styles()
|
||||
{
|
||||
std::vector<xlnt::style> styles;
|
||||
|
||||
std::vector<xlnt::vertical_alignment> vertical_alignments = {xlnt::vertical_alignment::center, xlnt::vertical_alignment::justify, xlnt::vertical_alignment::top, xlnt::vertical_alignment::bottom};
|
||||
std::vector<xlnt::horizontal_alignment> horizontal_alignments = {xlnt::horizontal_alignment::center, xlnt::horizontal_alignment::center_continuous, xlnt::horizontal_alignment::general, xlnt::horizontal_alignment::justify, xlnt::horizontal_alignment::left, xlnt::horizontal_alignment::right};
|
||||
std::vector<std::string> font_names = {"Calibri", "Tahoma", "Arial", "Times New Roman"};
|
||||
std::vector<int> font_sizes = {11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
|
||||
std::vector<bool> bold_options = {true, false};
|
||||
std::vector<xlnt::font::underline_style> underline_options = {xlnt::font::underline_style::single, xlnt::font::underline_style::none};
|
||||
std::vector<bool> italic_options = {true, false};
|
||||
|
||||
for(auto vertical_alignment : vertical_alignments)
|
||||
{
|
||||
for(auto horizontal_alignment : horizontal_alignments)
|
||||
{
|
||||
for(auto name : font_names)
|
||||
{
|
||||
for(auto size : font_sizes)
|
||||
{
|
||||
for(auto bold : bold_options)
|
||||
{
|
||||
for(auto underline : underline_options)
|
||||
{
|
||||
for(auto italic : italic_options)
|
||||
{
|
||||
xlnt::style s;
|
||||
|
||||
xlnt::font f;
|
||||
f.set_name(name);
|
||||
f.set_size(size);
|
||||
f.set_italic(italic);
|
||||
f.set_underline(underline);
|
||||
f.set_bold(bold);
|
||||
s.set_font(f);
|
||||
|
||||
xlnt::alignment a;
|
||||
a.set_vertical(vertical_alignment);
|
||||
a.set_horizontal(horizontal_alignment);
|
||||
s.set_alignment(a);
|
||||
|
||||
styles.push_back(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return styles;
|
||||
}
|
||||
|
||||
xlnt::workbook optimized_workbook(const std::vector<xlnt::style> &styles, int n)
|
||||
{
|
||||
xlnt::workbook wb;
|
||||
wb.set_optimized_write(true);
|
||||
auto worksheet = wb.create_sheet();
|
||||
|
||||
for(int i = 1; i < n; i++)
|
||||
{
|
||||
auto style = *random_choice(styles.begin(), styles.end());
|
||||
worksheet.append({{0, style}});
|
||||
}
|
||||
|
||||
return wb;
|
||||
}
|
||||
|
||||
xlnt::workbook non_optimized_workbook(const std::vector<xlnt::style> &styles, int n)
|
||||
{
|
||||
xlnt::workbook wb;
|
||||
|
||||
for(int idx = 1; idx < n; idx++)
|
||||
{
|
||||
auto worksheet = *random_choice(wb.begin(), wb.end());
|
||||
auto cell = worksheet.get_cell({1, (xlnt::row_t)idx + 1});
|
||||
cell.set_value(0);
|
||||
cell.set_style(*random_choice(styles.begin(), styles.end()));
|
||||
}
|
||||
|
||||
return wb;
|
||||
}
|
||||
|
||||
void to_profile(xlnt::workbook &wb, const std::string &f, int n)
|
||||
{
|
||||
auto t = 0;//-time.time();
|
||||
wb.save(f);
|
||||
std::cout << "took " << t << "s for " << n << " styles";
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
auto styles = generate_all_styles();
|
||||
int n = 10000;
|
||||
|
||||
for(auto func : {&optimized_workbook, &non_optimized_workbook})
|
||||
{
|
||||
std::cout << (func == &optimized_workbook ? "optimized_workbook" : "non_optimized_workbook") << std::endl;
|
||||
auto wb = func(styles, n);
|
||||
std::string f = "/tmp/xlnt.xlsx";
|
||||
to_profile(wb, f, n);
|
||||
}
|
||||
}
|
83
benchmarks/writer.cpp
Normal file
83
benchmarks/writer.cpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
#include <chrono>
|
||||
#include <xlnt/xlnt.hpp>
|
||||
#include "path_helper.hpp"
|
||||
|
||||
int current_time()
|
||||
{
|
||||
return std::chrono::duration<double, std::milli>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
// Create a worksheet with variable width rows. Because data must be
|
||||
// serialised row by row it is often the width of the rows which is most
|
||||
// important.
|
||||
void writer(bool optimized, int cols, int rows)
|
||||
{
|
||||
xlnt::workbook wb;
|
||||
// wb.set_optimized_write(optimized);
|
||||
|
||||
auto ws = wb.create_sheet();
|
||||
|
||||
std::vector<int> row;
|
||||
|
||||
for(int i = 0; i < cols; i++)
|
||||
{
|
||||
row.push_back(i);
|
||||
}
|
||||
|
||||
for(int index = 0; index < rows; index++)
|
||||
{
|
||||
if ((index + 1) % (rows / 10) == 0)
|
||||
{
|
||||
std::string progress = std::string((index + 1) / (1 + rows / 10), '.');
|
||||
std::cout << "\r" << progress;
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
ws.append(row);
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
auto filename = PathHelper::GetExecutableDirectory() + "s/files/large.xlsx";
|
||||
wb.save(filename);
|
||||
}
|
||||
|
||||
// Create a timeit call to a function and pass in keyword arguments.
|
||||
// The function is called twice, once using the standard workbook, then with the optimised one.
|
||||
// Time from the best of three is taken.
|
||||
std::pair<int, int> timer(std::function<void(bool, int, int)> fn, int cols, int rows)
|
||||
{
|
||||
const int repeat = 3;
|
||||
int min_time_standard = std::numeric_limits<int>::max();
|
||||
int min_time_optimized = std::numeric_limits<int>::max();
|
||||
|
||||
for(bool opt : {false, true})
|
||||
{
|
||||
std::cout << cols << " cols " << rows << " rows, Worksheet is " << (opt ? "optimised" : "not optimised") << std::endl;
|
||||
auto &time = opt ? min_time_optimized : min_time_standard;
|
||||
|
||||
for(int i = 0; i < repeat; i++)
|
||||
{
|
||||
auto start = current_time();
|
||||
fn(opt, cols, rows);
|
||||
time = std::min(current_time() - start, time);
|
||||
}
|
||||
}
|
||||
|
||||
double ratio = min_time_optimized / static_cast<double>(min_time_standard) * 100;
|
||||
std::cout << "Optimised takes " << ratio << "% time" << std::endl;
|
||||
|
||||
return {min_time_standard, min_time_optimized};
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
timer(&writer, 100, 100);
|
||||
timer(&writer, 1000, 100);
|
||||
timer(&writer, 4000, 100);
|
||||
timer(&writer, 8192, 100);
|
||||
timer(&writer, 10, 10000);
|
||||
timer(&writer, 4000, 1000);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user