diff --git a/contrib/README.md b/contrib/README.md index 28ebb7f..04614c7 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -5,10 +5,11 @@ libraries. ## Projects Sandboxed -Directory | Project | Home Page | Integration ----------- | ------------------------------------------------ | -------------------------------------------------------------- | ----------- -`jsonnet/` | Jsonnet - The Data Templating Language | [github.com/google/jsonnet](https://github.com/google/jsonnet) | CMake -`zstd/` | Zstandard - Fast real-time compression algorithm | [github.com/facebook/zstd](https://github.com/facebook/zstd) | CMake +Directory | Project | Home Page | Integration +----------- | ------------------------------------------------- | -------------------------------------------------------------------- | ----------- +`jsonnet/` | Jsonnet - The Data Templating Language | [github.com/google/jsonnet](https://github.com/google/jsonnet) | CMake +`hunspell/` | Hunspell - The most popular spellchecking library | [github.com/hunspell/hunspell](https://github.com/hunspell/hunspell) | CMake +`zstd/` | Zstandard - Fast real-time compression algorithm | [github.com/facebook/zstd](https://github.com/facebook/zstd) | CMake ## Projects Shipping with Sandboxed API Sandboxes diff --git a/contrib/hunspell/CMakeLists.txt b/contrib/hunspell/CMakeLists.txt new file mode 100644 index 0000000..8a38f85 --- /dev/null +++ b/contrib/hunspell/CMakeLists.txt @@ -0,0 +1,132 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.13..3.22) + +project(sapi_hunspell CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +set(SAPI_ROOT "" CACHE PATH "Path to the Sandboxed API source tree") + +add_subdirectory( + "${SAPI_ROOT}" + "${CMAKE_BINARY_DIR}/sandboxed-api-build" + EXCLUDE_FROM_ALL +) + +FetchContent_Declare( + libhunspell + + GIT_REPOSITORY https://github.com/hunspell/hunspell.git + GIT_TAG 31e6d6323026a3bef12c5912ce032d88bfef2091 +) + +FetchContent_GetProperties(libhunspell) +if(NOT libhunspell_POPULATED) + FetchContent_Populate(libhunspell) + set(libhunspell_STATUS_FILE "${libhunspell_SOURCE_DIR}/config.status") + if(EXISTS "${libhunspell_STATUS_FILE}") + file(SHA256 "${libhunspell_STATUS_FILE}" _sapi_CONFIG_STATUS) + endif() + if(NOT _sapi_CONFIG_STATUS STREQUAL "${libhunspell_CONFIG_STATUS}") + message("-- Configuring libhunspell...") + execute_process( + COMMAND autoreconf -vfi + COMMAND ./configure --disable-dependency-tracking + WORKING_DIRECTORY "${libhunspell_SOURCE_DIR}" + RESULT_VARIABLE libhunspell_config_result + ) + if(NOT libhunspell_config_result EQUAL "0") + message(FATAL_ERROR "Configuration for libhunspell failed") + endif() + file(SHA256 "${libhunspell_SOURCE_DIR}/config.status" _sapi_CONFIG_STATUS) + set(libhunspell_CONFIG_STATUS "${_sapi_CONFIG_STATUS}" CACHE INTERNAL "") + endif() +endif() + +add_library(hunspell STATIC + ${libhunspell_SOURCE_DIR}/src/hunspell/affentry.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/affentry.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/affixmgr.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/affixmgr.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/atypes.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/baseaffix.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/csutil.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/csutil.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/filemgr.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/filemgr.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hashmgr.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hashmgr.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/htypes.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.h + ${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hunzip.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/hunzip.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/langnum.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/phonet.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/phonet.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/replist.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/replist.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/suggestmgr.cxx + ${libhunspell_SOURCE_DIR}/src/hunspell/suggestmgr.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/utf_info.hxx + ${libhunspell_SOURCE_DIR}/src/hunspell/w_char.hxx +) + +target_include_directories(hunspell PUBLIC + ${libhunspell_SOURCE_DIR}/src/hunspell +) + +set(libhunspell_INCLUDE_DIR "${libhunspell_SOURCE_DIR}/src/hunspell") + +add_sapi_library( + sapi_hunspell + + FUNCTIONS + Hunspell_create + Hunspell_create_key + Hunspell_destroy + + Hunspell_spell + Hunspell_get_dic_encoding + + Hunspell_suggest + Hunspell_analyze + + Hunspell_add + Hunspell_remove + + Hunspell_free_list + INPUTS + ${libhunspell_INCLUDE_DIR}/hunspell.h + + LIBRARY hunspell + LIBRARY_NAME Hunspell + NAMESPACE "" +) + +target_include_directories(sapi_hunspell INTERFACE + "${PROJECT_BINARY_DIR}" +) + +if(SAPI_ENABLE_EXAMPLES) + add_subdirectory(example) +endif() + +if(SAPI_ENABLE_TESTS) + add_subdirectory(test) +endif() diff --git a/contrib/hunspell/example/CMakeLists.txt b/contrib/hunspell/example/CMakeLists.txt new file mode 100644 index 0000000..d10fc56 --- /dev/null +++ b/contrib/hunspell/example/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable( + sapi_minihunspell + + main.cc +) + +target_link_libraries( + sapi_minihunspell PRIVATE + + sapi_hunspell + sapi::sapi +) diff --git a/contrib/hunspell/example/main.cc b/contrib/hunspell/example/main.cc new file mode 100644 index 0000000..8d55891 --- /dev/null +++ b/contrib/hunspell/example/main.cc @@ -0,0 +1,112 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include + +#include "contrib/hunspell/sandboxed.h" + +absl::Status PrintSuggest(HunspellApi& api, sapi::v::RemotePtr& hunspellrp, + sapi::v::ConstCStr& word) { + sapi::v::GenericPtr outptr; + + SAPI_ASSIGN_OR_RETURN( + int nlist, + api.Hunspell_suggest(&hunspellrp, outptr.PtrAfter(), word.PtrBefore())); + + if (nlist == 0) { + std::cout << "No suggestions.\n"; + return absl::OkStatus(); + } + + sapi::v::Array ptr_list(nlist); + ptr_list.SetRemote(reinterpret_cast(outptr.GetValue())); + + SAPI_RETURN_IF_ERROR(api.GetSandbox()->TransferFromSandboxee(&ptr_list)); + + std::cout << "Suggestions:\n"; + for (int i = 0; i < nlist; i++) { + sapi::v::RemotePtr sugrp(ptr_list[i]); + SAPI_ASSIGN_OR_RETURN(std::string sugestion, + api.GetSandbox()->GetCString(sugrp)); + std::cout << sugestion[i] << "\n"; + } + + api.Hunspell_free_list(&hunspellrp, ptr_list.PtrNone(), nlist).IgnoreError(); + + return absl::OkStatus(); +} + +int main(int argc, char* argv[]) { + google::InitGoogleLogging(argv[0]); + + if (argc != 4) { + std::cerr << "Usage:\n " << argv[0]; + std::cerr << " AFFIX_FILE FICTIONARY_FILE WORDS_TO_CHECK_FILE\n"; + return EXIT_FAILURE; + } + + sapi::v::ConstCStr affix_file_name(argv[1]); + sapi::v::ConstCStr dictionary_file_name(argv[2]); + + HunspellSapiSandbox sandbox(affix_file_name.GetData(), + dictionary_file_name.GetData()); + if (!sandbox.Init().ok()) { + std::cerr << "Unable to start sandbox\n"; + return EXIT_FAILURE; + } + + HunspellApi api(&sandbox); + absl::StatusOr hunspell = api.Hunspell_create( + affix_file_name.PtrBefore(), dictionary_file_name.PtrBefore()); + if (!hunspell.ok()) { + std::cerr << "Could not initialize hunsepll\n"; + return EXIT_FAILURE; + } + sapi::v::RemotePtr hunspellrp(*hunspell); + + std::ifstream word_to_check_list(argv[3], std::ios_base::in); + if (!word_to_check_list.is_open()) { + std::cerr << "Could not open file of words to check\n"; + return EXIT_FAILURE; + } + + std::string buf; + while (std::getline(word_to_check_list, buf)) { + sapi::v::ConstCStr cbuf(buf.c_str()); + absl::StatusOr result = + api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore()); + if (!result.ok()) { + std::cerr << "Could not check word\n" << result.status() << std::endl; + return EXIT_FAILURE; + } + + if (*result) { + std::cout << "Word " << buf << " is ok\n"; + } else { + std::cout << "Word " << buf << " is incorrect\n"; + absl::Status status = PrintSuggest(api, hunspellrp, cbuf); + if (!status.ok()) { + std::cerr << "Unable to get all suggestion\n" << status << std::endl; + } + } + } + + api.Hunspell_destroy(&hunspellrp).IgnoreError(); + + return EXIT_SUCCESS; +} diff --git a/contrib/hunspell/files/utf8.aff b/contrib/hunspell/files/utf8.aff new file mode 100644 index 0000000..e8934d7 --- /dev/null +++ b/contrib/hunspell/files/utf8.aff @@ -0,0 +1,10 @@ +SET UTF-8 + +SFX A Y 7 +SFX A 0 őő . +SFX A 0 ő o +SFX A 0 ő ó +SFX A ó ő ó +SFX A ó őoo ó +SFX A o őo o +SFX A 0 ó [abcdó] diff --git a/contrib/hunspell/files/utf8.dic b/contrib/hunspell/files/utf8.dic new file mode 100644 index 0000000..e7cb34d --- /dev/null +++ b/contrib/hunspell/files/utf8.dic @@ -0,0 +1,3 @@ +2 +foo/A +foó/A diff --git a/contrib/hunspell/files/utf8.good b/contrib/hunspell/files/utf8.good new file mode 100644 index 0000000..08aa4da --- /dev/null +++ b/contrib/hunspell/files/utf8.good @@ -0,0 +1,9 @@ +foo +foó +fooőő +fooő +foóő +foő +foőo +foőoo +foóó diff --git a/contrib/hunspell/files/utf8.wrong b/contrib/hunspell/files/utf8.wrong new file mode 100644 index 0000000..d18dfa4 --- /dev/null +++ b/contrib/hunspell/files/utf8.wrong @@ -0,0 +1,2 @@ +𐏑𐏒𐏒 +𐏑𐏒𐏒 diff --git a/contrib/hunspell/sandboxed.h b/contrib/hunspell/sandboxed.h new file mode 100644 index 0000000..ecd1503 --- /dev/null +++ b/contrib/hunspell/sandboxed.h @@ -0,0 +1,55 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CONTRIB_HUNSPELL_SANDBOXED_H_ +#define CONTRIB_HUNSPELL_SANDBOXED_H_ + +#include +#include + +#include "sapi_hunspell.sapi.h" // NOLINT(build/include) + +class HunspellSapiSandbox : public HunspellSandbox { + public: + explicit HunspellSapiSandbox(std::string affix_file_name, + std::string dictionary_file_name) + : affix_file_name_(std::move(affix_file_name)), + dictionary_file_name_(std::move(dictionary_file_name)) {} + + private: + std::unique_ptr ModifyPolicy( + sandbox2::PolicyBuilder*) override { + return sandbox2::PolicyBuilder() + .AllowStaticStartup() + .AllowOpen() + .AllowRead() + .AllowWrite() + .AllowSystemMalloc() + .AllowExit() + .AllowSyscalls({ + __NR_clock_gettime, + __NR_close, + }) + .AddFile(affix_file_name_, /*is_ro=*/true) + .AddFile(dictionary_file_name_, /*is_ro=*/true) + .AllowRestartableSequencesWithProcFiles( + sandbox2::PolicyBuilder::kAllowSlowFences) // hangs without it + .BuildOrDie(); + } + + std::string affix_file_name_; + std::string dictionary_file_name_; +}; + +#endif // CONTRIB_HUNSPELL_SANDBOXED_H_ diff --git a/contrib/hunspell/test/CMakeLists.txt b/contrib/hunspell/test/CMakeLists.txt new file mode 100644 index 0000000..cc58a54 --- /dev/null +++ b/contrib/hunspell/test/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(GoogleTest) + +add_executable( + sapi_hunspell_test + + hunspell_test.cc +) + + +target_link_libraries( + sapi_hunspell_test PRIVATE + + sapi_hunspell + sapi::test_main + sapi::temp_file +) + +gtest_discover_tests(sapi_hunspell_test PROPERTIES ENVIRONMENT "TEST_FILES_DIR=${PROJECT_SOURCE_DIR}/files") diff --git a/contrib/hunspell/test/hunspell_test.cc b/contrib/hunspell/test/hunspell_test.cc new file mode 100644 index 0000000..5f9dcd5 --- /dev/null +++ b/contrib/hunspell/test/hunspell_test.cc @@ -0,0 +1,153 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "../sandboxed.h" +#include "sandboxed_api/util/path.h" +#include "sandboxed_api/util/status_matchers.h" +#include "sandboxed_api/util/temp_file.h" + +namespace { + +using ::sapi::IsOk; + +class HunspellTest : public ::testing::Test { + protected: + static constexpr absl::string_view kEncoding = "UTF-8"; + static constexpr absl::string_view kAffixFileName = "utf8.aff"; + static constexpr absl::string_view kDictionaryFileName = "utf8.dic"; + + static constexpr absl::string_view kGoodFileName = "utf8.good"; + static constexpr absl::string_view kWrongFileName = "utf8.wrong"; + + static constexpr absl::string_view kSuggestion = "fo"; + static constexpr absl::string_view kRandomWord = "random_word123"; + + void SetUp() override { + test_files_dir_ = getenv("TEST_FILES_DIR"); + ASSERT_NE(test_files_dir_, nullptr); + + std::string s_afn = GetTestFilePath(kAffixFileName); + std::string s_dfn = GetTestFilePath(kDictionaryFileName); + sapi::v::ConstCStr c_afn(s_afn.c_str()); + sapi::v::ConstCStr c_dfn(s_dfn.c_str()); + + sandbox_ = std::make_unique(s_afn, s_dfn); + ASSERT_THAT(sandbox_->Init(), IsOk()); + + api_ = std::make_unique(sandbox_.get()); + + SAPI_ASSERT_OK_AND_ASSIGN( + Hunhandle * hunspell, + api_->Hunspell_create(c_afn.PtrBefore(), c_dfn.PtrBefore())); + hunspellrp_ = std::make_unique(hunspell); + } + + void TearDown() override { + absl::Status status = api_->Hunspell_destroy(&(*hunspellrp_)); + ASSERT_THAT(status, IsOk()); + } + + std::string GetTestFilePath(const absl::string_view& filename) { + return sapi::file::JoinPath(test_files_dir_, filename); + } + + std::unique_ptr sandbox_; + std::unique_ptr api_; + std::unique_ptr hunspellrp_; + + private: + const char* test_files_dir_; +}; + +TEST_F(HunspellTest, CheckEncoding) { + SAPI_ASSERT_OK_AND_ASSIGN(char* ret, + api_->Hunspell_get_dic_encoding(&(*hunspellrp_))); + SAPI_ASSERT_OK_AND_ASSIGN( + std::string encoding, + api_->GetSandbox()->GetCString(sapi::v::RemotePtr(ret))); + EXPECT_EQ(encoding, kEncoding); +} + +TEST_F(HunspellTest, CheckGoodSpell) { + SAPI_ASSERT_OK_AND_ASSIGN(char* ret, + api_->Hunspell_get_dic_encoding(&(*hunspellrp_))); + std::ifstream wtclst(GetTestFilePath(kGoodFileName), std::ios_base::in); + ASSERT_TRUE(wtclst.is_open()); + + std::string buf; + while (std::getline(wtclst, buf)) { + sapi::v::ConstCStr cbuf(buf.c_str()); + SAPI_ASSERT_OK_AND_ASSIGN( + int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 1); + } +} + +TEST_F(HunspellTest, CheckWrongSpell) { + SAPI_ASSERT_OK_AND_ASSIGN(char* ret, + api_->Hunspell_get_dic_encoding(&(*hunspellrp_))); + std::ifstream wtclst(GetTestFilePath(kWrongFileName), std::ios_base::in); + ASSERT_TRUE(wtclst.is_open()); + + std::string buf; + while (std::getline(wtclst, buf)) { + sapi::v::ConstCStr cbuf(buf.c_str()); + SAPI_ASSERT_OK_AND_ASSIGN( + int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); + } +} + +TEST_F(HunspellTest, CheckAddToDict) { + sapi::v::ConstCStr cbuf(kRandomWord.data()); + + int result; + SAPI_ASSERT_OK_AND_ASSIGN( + result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); + + SAPI_ASSERT_OK_AND_ASSIGN( + result, api_->Hunspell_add(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); + + SAPI_ASSERT_OK_AND_ASSIGN( + result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 1); + + SAPI_ASSERT_OK_AND_ASSIGN( + result, api_->Hunspell_remove(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); + + SAPI_ASSERT_OK_AND_ASSIGN( + result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); +} + +TEST_F(HunspellTest, CheckSuggestion) { + sapi::v::ConstCStr cbuf(kSuggestion.data()); + + SAPI_ASSERT_OK_AND_ASSIGN( + int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore())); + ASSERT_EQ(result, 0); + + sapi::v::GenericPtr outptr; + SAPI_ASSERT_OK_AND_ASSIGN( + int nlist, api_->Hunspell_suggest(&(*hunspellrp_), outptr.PtrAfter(), + cbuf.PtrBefore())); + ASSERT_GT(nlist, 0); +} + +} // namespace