start porting benchmarks

This commit is contained in:
Thomas Fussell 2016-02-06 10:04:41 -05:00
parent 74bfdb6f7d
commit a8be9fff32
9 changed files with 496 additions and 0 deletions

23
benchmarks/bufzip.cpp Normal file
View File

@ -0,0 +1,23 @@
#include <xlnt/xlnt.hpp>
#include <xlnt/serialization/xml_document.hpp>
#include <xlnt/serialization/xml_node.hpp>
#include <xlnt/serialization/xml_serializer.hpp>
void standard()
{
xlnt::xml_document doc;
for (int i = 0; i < 1000000; i++)
{
doc.add_child("test");
}
xlnt::zip_file archive;
archive.writestr("sheet.xml", doc.to_string());
}
int main()
{
standard();
return 0;
}

BIN
benchmarks/files/large.xlsx Normal file

Binary file not shown.

Binary file not shown.

65
benchmarks/memory.cpp Normal file
View File

@ -0,0 +1,65 @@
#include <cassert>
#ifdef __APPLE__
#include<mach/mach.h>
#endif
#include <xlnt/xlnt.hpp>
#include "../tests/helpers/path_helper.hpp"
int calc_memory_usage()
{
#ifdef __APPLE__
struct task_basic_info t_info;
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
if (KERN_SUCCESS != task_info(mach_task_self(),
TASK_BASIC_INFO, (task_info_t)&t_info,
&t_info_count))
{
return 0;
}
return t_info.virtual_size;
#endif
return 0;
}
void test_memory_use()
{
// Naive test that assumes memory use will never be more than 120 % of
// that for first 50 rows
auto current_folder = PathHelper::GetExecutableDirectory();
auto src = current_folder + "rks/files/very_large.xlsx";
xlnt::workbook wb;
wb.load(src);
auto ws = wb.get_active_sheet();
int initial_use = 0;
int n = 0;
for (auto line : ws.rows())
{
if (n % 50 == 0)
{
auto use = calc_memory_usage();
if (initial_use == 0)
{
initial_use = use;
}
assert(use / initial_use < 1.2);
std::cout << n << " " << use << std::endl;
}
n++;
}
}
int main()
{
test_memory_use();
}

131
benchmarks/profiling.cpp Normal file
View File

@ -0,0 +1,131 @@
from io import BytesIO
from lxml.etree import xmlfile
import os
from random import randint
from openpyxl import Workbook
from openpyxl.xml.functions import XMLGenerator
def make_worksheet():
wb = Workbook()
ws = wb.active
for i in range(1000):
ws.append(list(range(100)))
return ws
def lxml_writer(ws=None):
from openpyxl.writer.lxml_worksheet import write_rows
if ws is None:
ws = make_worksheet()
out = BytesIO()
with xmlfile(out) as xf:
write_rows(xf, ws)
#with open("lxml_writer.xml", "wb") as dump:
#dump.write(out.getvalue())
#ws.parent.save("lxml_writer.xlsx")
def make_dump_worksheet():
wb = Workbook(write_only=True)
ws = wb.create_sheet()
return ws
def dump_writer(ws=None):
if ws is None:
ws = make_dump_worksheet()
for i in range(1000):
ws.append(list(range(100)))
COLUMNS = 100
ROWS = 1000
BOLD = 1
ITALIC = 2
UNDERLINE = 4
RED_BG = 8
formatData = [[None] * COLUMNS for _ in range(ROWS)]
def generate_format_data():
for row in range(ROWS):
for col in range(COLUMNS):
formatData[row][col] = randint(1, 15)
def styled_sheet():
from openpyxl import Workbook
from openpyxl.styles import Font, Style, PatternFill, Color, colors
wb = Workbook()
ws = wb.active
ws.title = 'Test 1'
red_fill = PatternFill(fill_type='solid', fgColor=Color(colors.RED), bgColor=Color(colors.RED))
empty_fill = PatternFill()
styles = []
# pregenerate relevant styles
for row in range(ROWS):
_row = []
for col in range(COLUMNS):
cell = ws.cell(row=row+1, column=col+1)
cell.value = 1
font = {}
fill = PatternFill()
if formatData[row][col] & BOLD:
font['bold'] = True
if formatData[row][col] & ITALIC:
font['italic'] = True
if formatData[row][col] & UNDERLINE:
font['underline'] = 'single'
if formatData[row][col] & RED_BG:
fill = red_fill
cell.style = Style(font=Font(**font), fill=fill)
#wb.save(get_output_path('test_openpyxl_style_std_pregen.xlsx'))
def read_workbook():
from openpyxl import load_workbook
folder = os.path.split(__file__)[0]
src = os.path.join(folder, "files", "very_large.xlsx")
wb = load_workbook(src)
return wb
def rows(wb):
ws = wb.active
rows = ws.iter_rows()
for r, row in enumerate(rows):
for c, col in enumerate(row):
pass
print((r+1)* (c+1), "cells")
def col_index1():
from openpyxl.cell import get_column_letter
for i in range(1, 18279):
c = get_column_letter(i)
"""
Sample use
import cProfile
ws = make_worksheet()
cProfile.run("profiling.lxml_writer(ws)", sort="tottime")
"""
if __name__ == '__main__':
import cProfile
ws = make_worksheet()
#wb = read_workbook()
#cProfile.run("rows(wb)", sort="tottime")
#cProfile.run("make_worksheet()", sort="tottime")
#cProfile.run("lxml_writer(ws)", sort="tottime")
#generate_format_data()
#cProfile.run("styled_sheet()", sort="tottime")
#ws = make_dump_worksheet()
#cProfile.run("dump_writer(ws)", sort="tottime")
cProfile.run("col_index1()", sort="tottime")

45
benchmarks/reader.cpp Normal file
View File

@ -0,0 +1,45 @@
import os
import sys
import timeit
import openpyxl
def reader(optimised):
"""
Loop through all cells of a workbook
"""
folder = os.path.split(__file__)[0]
src = os.path.join(folder, "files", "very_large.xlsx")
wb = openpyxl.load_workbook(src, use_iterators=optimised)
ws = wb.active
rows = ws.iter_rows()
for r, row in enumerate(rows):
for c, col in enumerate(row):
pass
print((r+1)* (c+1), "cells")
def timer(fn):
"""
Create a timeit call to a function and pass in keyword arguments.
The function is called twice, once using the standard workbook, then with the optimised one.
Time from the best of three is taken.
"""
print("lxml", openpyxl.LXML)
result = []
for opt in (False, True,):
print("Workbook is {0}".format(opt and "optimised" or "not optimised"))
times = timeit.repeat("{0}({1})".format(fn.__name__, opt),
setup="from __main__ import {0}".format(fn.__name__),
number = 1,
repeat = 3
)
print("{0:.2f}s".format(min(times)))
result.append(min(times))
std, opt = result
print("Optimised takes {0:.2%} time\n".format(opt/std))
return std, opt
if __name__ == "__main__":
timer(reader)

31
benchmarks/speed.cpp Normal file
View File

@ -0,0 +1,31 @@
"Benchmark some different implementations for cells"
from openpyxl.compat import range
from openpyxl.cell import Cell
from openpyxl.cell.read_only import ReadOnlyCell
from memory_profiler import memory_usage
import time
def standard():
c = Cell(None, "A", "0", None)
def iterative():
c = ReadOnlyCell(None, None, None, 'n')
def dictionary():
c = {'ws':'None', 'col':'A', 'row':0, 'value':1}
if __name__ == '__main__':
initial_use = memory_usage(proc=-1, interval=1)[0]
for fn in (standard, iterative, dictionary):
t = time.time()
container = []
for i in range(1000000):
container.append(fn())
print("{0} {1} MB, {2:.2f}s".format(
fn.func_name,
memory_usage(proc=-1, interval=1)[0] - initial_use,
time.time() - t))

118
benchmarks/styles.cpp Normal file
View File

@ -0,0 +1,118 @@
#include <iterator>
#include <random>
#include <xlnt/xlnt.hpp>
template<typename Iter>
Iter random_choice(Iter start, Iter end) {
static std::random_device rd;
static std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, std::distance(start, end) - 1);
std::advance(start, dis(gen));
return start;
}
std::vector<xlnt::style> generate_all_styles()
{
std::vector<xlnt::style> styles;
std::vector<xlnt::vertical_alignment> vertical_alignments = {xlnt::vertical_alignment::center, xlnt::vertical_alignment::justify, xlnt::vertical_alignment::top, xlnt::vertical_alignment::bottom};
std::vector<xlnt::horizontal_alignment> horizontal_alignments = {xlnt::horizontal_alignment::center, xlnt::horizontal_alignment::center_continuous, xlnt::horizontal_alignment::general, xlnt::horizontal_alignment::justify, xlnt::horizontal_alignment::left, xlnt::horizontal_alignment::right};
std::vector<std::string> font_names = {"Calibri", "Tahoma", "Arial", "Times New Roman"};
std::vector<int> font_sizes = {11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
std::vector<bool> bold_options = {true, false};
std::vector<xlnt::font::underline_style> underline_options = {xlnt::font::underline_style::single, xlnt::font::underline_style::none};
std::vector<bool> italic_options = {true, false};
for(auto vertical_alignment : vertical_alignments)
{
for(auto horizontal_alignment : horizontal_alignments)
{
for(auto name : font_names)
{
for(auto size : font_sizes)
{
for(auto bold : bold_options)
{
for(auto underline : underline_options)
{
for(auto italic : italic_options)
{
xlnt::style s;
xlnt::font f;
f.set_name(name);
f.set_size(size);
f.set_italic(italic);
f.set_underline(underline);
f.set_bold(bold);
s.set_font(f);
xlnt::alignment a;
a.set_vertical(vertical_alignment);
a.set_horizontal(horizontal_alignment);
s.set_alignment(a);
styles.push_back(s);
}
}
}
}
}
}
}
return styles;
}
xlnt::workbook optimized_workbook(const std::vector<xlnt::style> &styles, int n)
{
xlnt::workbook wb;
wb.set_optimized_write(true);
auto worksheet = wb.create_sheet();
for(int i = 1; i < n; i++)
{
auto style = *random_choice(styles.begin(), styles.end());
worksheet.append({{0, style}});
}
return wb;
}
xlnt::workbook non_optimized_workbook(const std::vector<xlnt::style> &styles, int n)
{
xlnt::workbook wb;
for(int idx = 1; idx < n; idx++)
{
auto worksheet = *random_choice(wb.begin(), wb.end());
auto cell = worksheet.get_cell({1, (xlnt::row_t)idx + 1});
cell.set_value(0);
cell.set_style(*random_choice(styles.begin(), styles.end()));
}
return wb;
}
void to_profile(xlnt::workbook &wb, const std::string &f, int n)
{
auto t = 0;//-time.time();
wb.save(f);
std::cout << "took " << t << "s for " << n << " styles";
}
int main()
{
auto styles = generate_all_styles();
int n = 10000;
for(auto func : {&optimized_workbook, &non_optimized_workbook})
{
std::cout << (func == &optimized_workbook ? "optimized_workbook" : "non_optimized_workbook") << std::endl;
auto wb = func(styles, n);
std::string f = "/tmp/xlnt.xlsx";
to_profile(wb, f, n);
}
}

83
benchmarks/writer.cpp Normal file
View File

@ -0,0 +1,83 @@
#include <chrono>
#include <xlnt/xlnt.hpp>
#include "path_helper.hpp"
int current_time()
{
return std::chrono::duration<double, std::milli>(std::chrono::system_clock::now().time_since_epoch()).count();
}
// Create a worksheet with variable width rows. Because data must be
// serialised row by row it is often the width of the rows which is most
// important.
void writer(bool optimized, int cols, int rows)
{
xlnt::workbook wb;
// wb.set_optimized_write(optimized);
auto ws = wb.create_sheet();
std::vector<int> row;
for(int i = 0; i < cols; i++)
{
row.push_back(i);
}
for(int index = 0; index < rows; index++)
{
if ((index + 1) % (rows / 10) == 0)
{
std::string progress = std::string((index + 1) / (1 + rows / 10), '.');
std::cout << "\r" << progress;
std::cout.flush();
}
ws.append(row);
}
std::cout << std::endl;
auto filename = PathHelper::GetExecutableDirectory() + "s/files/large.xlsx";
wb.save(filename);
}
// Create a timeit call to a function and pass in keyword arguments.
// The function is called twice, once using the standard workbook, then with the optimised one.
// Time from the best of three is taken.
std::pair<int, int> timer(std::function<void(bool, int, int)> fn, int cols, int rows)
{
const int repeat = 3;
int min_time_standard = std::numeric_limits<int>::max();
int min_time_optimized = std::numeric_limits<int>::max();
for(bool opt : {false, true})
{
std::cout << cols << " cols " << rows << " rows, Worksheet is " << (opt ? "optimised" : "not optimised") << std::endl;
auto &time = opt ? min_time_optimized : min_time_standard;
for(int i = 0; i < repeat; i++)
{
auto start = current_time();
fn(opt, cols, rows);
time = std::min(current_time() - start, time);
}
}
double ratio = min_time_optimized / static_cast<double>(min_time_standard) * 100;
std::cout << "Optimised takes " << ratio << "% time" << std::endl;
return {min_time_standard, min_time_optimized};
}
int main()
{
timer(&writer, 100, 100);
timer(&writer, 1000, 100);
timer(&writer, 4000, 100);
timer(&writer, 8192, 100);
timer(&writer, 10, 10000);
timer(&writer, 4000, 1000);
return 0;
}