From 7cc9898dab08fdf7fd63846ef898ac90ca11e4d6 Mon Sep 17 00:00:00 2001 From: Thomas Fussell Date: Fri, 4 Aug 2017 10:12:25 -0700 Subject: [PATCH] move most of the pyarrow logic to python wrapper, use pyarrow's cmake setup.py code --- CMakeLists.txt | 2 +- python/CMakeLists.txt | 41 +++ {xlntpyarrow => python}/python_streambuf.hpp | 0 python/setup.py | 285 ++++++++++++++++++ .../xlntpyarrow.lib.cpp | 2 +- .../test.py => python/xlntpyarrow/__init__.py | 4 +- source/CMakeLists.txt | 2 +- xlntpyarrow/CMakeLists.txt | 27 -- 8 files changed, 330 insertions(+), 33 deletions(-) create mode 100644 python/CMakeLists.txt rename {xlntpyarrow => python}/python_streambuf.hpp (100%) create mode 100644 python/setup.py rename xlntpyarrow/xlntpyarrow.cpp => python/xlntpyarrow.lib.cpp (99%) rename xlntpyarrow/test.py => python/xlntpyarrow/__init__.py (97%) delete mode 100644 xlntpyarrow/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 748b74e4..482fea53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if(TESTS) endif() if(PYTHON) - add_subdirectory(xlntpyarrow) + add_subdirectory(python) endif() add_subdirectory(source) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 00000000..bca4324d --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.2) +project(xlntpyarrow) + +if(NOT COMBINED_PROJECT) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../source ${CMAKE_CURRENT_BINARY_DIR}/source) +endif() + +add_subdirectory(../third-party/pybind11 pybind11) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + +find_package(Arrow) + +if(NOT ARROW_FOUND) + message(FATAL_ERROR "Arrow not found.") +endif() + +pybind11_add_module(xlntpyarrowlib xlntpyarrow.lib.cpp) + +set_target_properties(xlntpyarrowlib PROPERTIES + OUTPUT_NAME "lib") + +if(MSVC) + target_compile_definitions(xlntpyarrowlib PRIVATE _CRT_SECURE_NO_WARNINGS=1) +endif() + +target_include_directories(xlntpyarrowlib + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../source + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/pybind11/include + PRIVATE ${ARROW_INCLUDE_DIR}) +target_link_libraries(xlntpyarrowlib + PRIVATE xlnt + PRIVATE ${ARROW_SHARED_IMP_LIB} + PRIVATE ${ARROW_PYTHON_SHARED_IMP_LIB}) + +if(NOT STATIC) + add_custom_command(TARGET xlntpyarrowlib POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + $ + $) +endif() diff --git a/xlntpyarrow/python_streambuf.hpp b/python/python_streambuf.hpp similarity index 100% rename from xlntpyarrow/python_streambuf.hpp rename to python/python_streambuf.hpp diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 00000000..29c03ab1 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,285 @@ +import glob +import os +import os.path as osp +import re +import shutil +import sys + +from distutils.command.clean import clean as _clean +from distutils import sysconfig + +from os.path import join as pjoin + +from setuptools import setup, Extension, Distribution +from setuptools.command.build_ext import build_ext as _build_ext + +# Check if we're running 64-bit Python +is_64_bit = sys.maxsize > 2**32 + +class clean(_clean): + def run(self): + _clean.run(self) + for x in []: + try: + os.remove(x) + except OSError: + pass + +class build_ext(_build_ext): + def run(self): + self._run_cmake() + + # adapted from cmake_build_ext in dynd-python + # github.com/libdynd/dynd-python + + description = "Build the C-extensions for arrow" + user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'), + ('build-type=', None, 'build type (debug or release)')] + + _build_ext.user_options) + + def initialize_options(self): + _build_ext.initialize_options(self) + self.extra_cmake_args = os.environ.get('XLNTPYARROW_CMAKE_OPTIONS', '') + self.build_type = os.environ.get('XLNTPYARROW_BUILD_TYPE', 'debug').lower() + + self.cmake_cxxflags = os.environ.get('XLNTPYARROW_CXXFLAGS', '') + + if sys.platform == 'win32': + # Cannot do debug builds in Windows unless Python itself is a debug + # build + if not hasattr(sys, 'gettotalrefcount'): + self.build_type = 'release' + + def _run_cmake(self): + # The directory containing this setup.py + source = osp.dirname(osp.abspath(__file__)) + + # The staging directory for the module being built + build_temp = pjoin(os.getcwd(), self.build_temp) + build_lib = os.path.join(os.getcwd(), self.build_lib) + + # Change to the build directory + saved_cwd = os.getcwd() + if not os.path.isdir(self.build_temp): + self.mkpath(self.build_temp) + os.chdir(self.build_temp) + + # Detect if we built elsewhere + if os.path.isfile('CMakeCache.txt'): + cachefile = open('CMakeCache.txt', 'r') + cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', + cachefile.read()).group(1) + cachefile.close() + if (cachedir != build_temp): + return + + static_lib_option = '' + + cmake_options = [ + '-DPYTHON_EXECUTABLE=%s' % sys.executable, + static_lib_option, + ] + + if len(self.cmake_cxxflags) > 0: + cmake_options.append('-DPYARROW_CXXFLAGS="{0}"' + .format(self.cmake_cxxflags)) + + # ARROW-1090: work around CMake rough edges + if 'ARROW_HOME' in os.environ and sys.platform != 'win32': + pkg_config = pjoin(os.environ['ARROW_HOME'], 'lib', + 'pkgconfig') + os.environ['PKG_CONFIG_PATH'] = pkg_config + del os.environ['ARROW_HOME'] + + cmake_options.append('-DCMAKE_BUILD_TYPE={0}' + .format(self.build_type.lower())) + + if sys.platform != 'win32': + cmake_command = (['cmake', self.extra_cmake_args] + + cmake_options + [source]) + + print("-- Runnning cmake for pyarrow") + self.spawn(cmake_command) + print("-- Finished cmake for pyarrow") + args = ['make'] + if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1': + args.append('VERBOSE=1') + + if 'PYARROW_PARALLEL' in os.environ: + args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL'])) + print("-- Running cmake --build for pyarrow") + self.spawn(args) + print("-- Finished cmake --build for pyarrow") + else: + import shlex + cmake_generator = 'Visual Studio 15 2017 Win64' + if not is_64_bit: + raise RuntimeError('Not supported on 32-bit Windows') + + # Generate the build files + extra_cmake_args = shlex.split(self.extra_cmake_args) + cmake_command = (['cmake'] + extra_cmake_args + + cmake_options + + [source, '-G', cmake_generator]) + if "-G" in self.extra_cmake_args: + cmake_command = cmake_command[:-2] + + print("-- Runnning cmake for pyarrow") + self.spawn(cmake_command) + print("-- Finished cmake for pyarrow") + # Do the build + print("-- Running cmake --build for pyarrow") + self.spawn(['cmake', '--build', '.', '--config', self.build_type]) + print("-- Finished cmake --build for pyarrow") + + if self.inplace: + # a bit hacky + build_lib = saved_cwd + + # Move the libraries to the place expected by the Python + # build + shared_library_prefix = 'lib' + if sys.platform == 'darwin': + shared_library_suffix = '.dylib' + elif sys.platform == 'win32': + shared_library_suffix = '.dll' + shared_library_prefix = '' + else: + shared_library_suffix = '.so' + + try: + os.makedirs(pjoin(build_lib, 'xlntpyarrow')) + except OSError: + pass + + build_prefix = self.build_type + + def move_lib(lib_name): + print('move_lib', lib_name) + lib_filename = (shared_library_prefix + lib_name + + shared_library_suffix) + print(build_prefix, sys.platform, shared_library_prefix, lib_name, shared_library_suffix, lib_filename) + # Also copy libraries with ABI/SO version suffix + if sys.platform == 'darwin': + lib_pattern = (shared_library_prefix + lib_name + + ".*" + shared_library_suffix[1:]) + libs = glob.glob(pjoin(build_prefix, lib_pattern)) + else: + libs = glob.glob(pjoin(build_prefix, lib_filename) + '*') + # Longest suffix library should be copied, all others symlinked + libs.sort(key=lambda s: -len(s)) + print(libs, libs[0]) + lib_filename = os.path.basename(libs[0]) + shutil.move(pjoin(build_prefix, lib_filename), + pjoin(build_lib, 'xlntpyarrow', lib_filename)) + for lib in libs[1:]: + filename = os.path.basename(lib) + link_name = pjoin(build_lib, 'xlntpyarrow', filename) + if not os.path.exists(link_name): + os.symlink(lib_filename, link_name) + + move_lib('xlnt') + + self._found_names = [] + built_path = self.get_ext_built('lib') + if not os.path.exists(built_path): + raise RuntimeError('pyarrow C-extension failed to build:', + os.path.abspath(built_path)) + + ext_path = pjoin(build_lib, self._get_cmake_ext_path('lib')) + if os.path.exists(ext_path): + os.remove(ext_path) + self.mkpath(os.path.dirname(ext_path)) + print('Moving built C-extension', built_path, + 'to build path', ext_path) + shutil.move(self.get_ext_built('lib'), ext_path) + self._found_names.append('lib') + + os.chdir(saved_cwd) + + def _failure_permitted(self, name): + return False + + def _get_inplace_dir(self): + pass + + def _get_cmake_ext_path(self, name): + # Get the package directory from build_py + build_py = self.get_finalized_command('build_py') + package_dir = build_py.get_package_dir('xlntpyarrow') + # This is the name of the arrow C-extension + suffix = sysconfig.get_config_var('EXT_SUFFIX') + if suffix is None: + suffix = sysconfig.get_config_var('SO') + filename = name + suffix + print(pjoin(package_dir, filename)) + return pjoin(package_dir, filename) + + def get_ext_built(self, name): + if sys.platform == 'win32': + head, tail = os.path.split(name) + suffix = sysconfig.get_config_var('SO') + return pjoin(head, self.build_type, tail + suffix) + else: + suffix = sysconfig.get_config_var('SO') + return pjoin(self.build_type, name + suffix) + + def get_names(self): + return self._found_names + + def get_outputs(self): + # Just the C extensions + # regular_exts = _build_ext.get_outputs(self) + return [self._get_cmake_ext_path(name) + for name in self.get_names()] + +long_description = """ +xlntpyarrow allows Apache Arrow tables to be written to and read from an XLSX +file efficiently using the C++ library xlnt. +""".strip() + +classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Plugins', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: C', + 'Programming Language :: C++', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: Implementation :: CPython', + 'Topic :: Database', + 'Topic :: Office/Business :: Financial :: Spreadsheet', + 'Topic :: Scientific/Engineering :: Information Analysis', + 'Topic :: Software Development :: Libraries :: Python Modules' +] + +class BinaryDistribution(Distribution): + def has_ext_modules(foo): + return True + +setup( + name = 'xlntpyarrow', + version = '1.1.0', + description = 'Python library for converting Apache Arrow tables<->XLSX files', + long_description = long_description, + author = 'Thomas Fussell', + author_email = 'thomas.fussell@gmail.com', + maintainer = 'Thomas Fussell', + maintainer_email = 'thomas.fussell@gmail.com', + url = 'https://github.com/tfussell/xlnt', + download_url = 'https://github.com/tfussell/xlnt/releases', + packages = ['xlntpyarrow'], + ext_modules = [Extension('xlntpyarrow.lib', [])], + cmdclass = { + 'clean': clean, + 'build_ext': build_ext + }, + classifiers = classifiers, + distclass = BinaryDistribution, + zip_safe = False +) diff --git a/xlntpyarrow/xlntpyarrow.cpp b/python/xlntpyarrow.lib.cpp similarity index 99% rename from xlntpyarrow/xlntpyarrow.cpp rename to python/xlntpyarrow.lib.cpp index 6fd2bddb..e384d627 100644 --- a/xlntpyarrow/xlntpyarrow.cpp +++ b/python/xlntpyarrow.lib.cpp @@ -332,7 +332,7 @@ pybind11::handle read_batch(xlnt::streaming_workbook_reader &reader, return batch_handle; } -PYBIND11_MODULE(xlntpyarrow, m) +PYBIND11_MODULE(lib, m) { m.doc() = "streaming read/write interface for C++ XLSX library xlnt"; diff --git a/xlntpyarrow/test.py b/python/xlntpyarrow/__init__.py similarity index 97% rename from xlntpyarrow/test.py rename to python/xlntpyarrow/__init__.py index 508e1516..8cf24f85 100644 --- a/xlntpyarrow/test.py +++ b/python/xlntpyarrow/__init__.py @@ -1,7 +1,5 @@ import pyarrow as pa -print('pyarrow loaded') -import xlntpyarrow as xpa -print('xlntpyarrow loaded') +import xlntpyarrow.lib as xpa COLUMN_TYPE_FIELD = { xpa.Cell.Type.Number: pa.float64, diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 3ab1c3c6..05bd458c 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -26,7 +26,7 @@ if(COVERAGE) endif() if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /MP") elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unknown-pragmas") diff --git a/xlntpyarrow/CMakeLists.txt b/xlntpyarrow/CMakeLists.txt deleted file mode 100644 index 79bab418..00000000 --- a/xlntpyarrow/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.2) -project(xlntpyarrow) - -add_subdirectory(../third-party/pybind11 pybind11) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") - -find_package(Arrow) - -if(NOT ARROW_FOUND) - message(FATAL_ERROR "Arrow not found.") -endif() - -pybind11_add_module(xlntpyarrow xlntpyarrow.cpp) - -if(MSVC) - target_compile_definitions(xlntpyarrow PRIVATE _CRT_SECURE_NO_WARNINGS=1) -endif() - -target_include_directories(xlntpyarrow - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../source - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/pybind11/include - PRIVATE ${ARROW_INCLUDE_DIR}) -target_link_libraries(xlntpyarrow - PRIVATE xlnt - PRIVATE ${ARROW_SHARED_IMP_LIB} - PRIVATE ${ARROW_PYTHON_SHARED_IMP_LIB})