Merge pull request #103 from oshogbo:hunspell

PiperOrigin-RevId: 426372638
Change-Id: Ia15b7bb6772cdae0f3f44097d3ff66ef2527f927
This commit is contained in:
Copybara-Service 2022-02-04 04:45:04 -08:00
commit 0f78eb4c59
11 changed files with 539 additions and 4 deletions

View File

@ -5,10 +5,11 @@ libraries.
## Projects Sandboxed
Directory | Project | Home Page | Integration
---------- | ------------------------------------------------ | -------------------------------------------------------------- | -----------
`jsonnet/` | Jsonnet - The Data Templating Language | [github.com/google/jsonnet](https://github.com/google/jsonnet) | CMake
`zstd/` | Zstandard - Fast real-time compression algorithm | [github.com/facebook/zstd](https://github.com/facebook/zstd) | CMake
Directory | Project | Home Page | Integration
----------- | ------------------------------------------------- | -------------------------------------------------------------------- | -----------
`jsonnet/` | Jsonnet - The Data Templating Language | [github.com/google/jsonnet](https://github.com/google/jsonnet) | CMake
`hunspell/` | Hunspell - The most popular spellchecking library | [github.com/hunspell/hunspell](https://github.com/hunspell/hunspell) | CMake
`zstd/` | Zstandard - Fast real-time compression algorithm | [github.com/facebook/zstd](https://github.com/facebook/zstd) | CMake
## Projects Shipping with Sandboxed API Sandboxes

View File

@ -0,0 +1,132 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cmake_minimum_required(VERSION 3.13..3.22)
project(sapi_hunspell CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(SAPI_ROOT "" CACHE PATH "Path to the Sandboxed API source tree")
add_subdirectory(
"${SAPI_ROOT}"
"${CMAKE_BINARY_DIR}/sandboxed-api-build"
EXCLUDE_FROM_ALL
)
FetchContent_Declare(
libhunspell
GIT_REPOSITORY https://github.com/hunspell/hunspell.git
GIT_TAG 31e6d6323026a3bef12c5912ce032d88bfef2091
)
FetchContent_GetProperties(libhunspell)
if(NOT libhunspell_POPULATED)
FetchContent_Populate(libhunspell)
set(libhunspell_STATUS_FILE "${libhunspell_SOURCE_DIR}/config.status")
if(EXISTS "${libhunspell_STATUS_FILE}")
file(SHA256 "${libhunspell_STATUS_FILE}" _sapi_CONFIG_STATUS)
endif()
if(NOT _sapi_CONFIG_STATUS STREQUAL "${libhunspell_CONFIG_STATUS}")
message("-- Configuring libhunspell...")
execute_process(
COMMAND autoreconf -vfi
COMMAND ./configure --disable-dependency-tracking
WORKING_DIRECTORY "${libhunspell_SOURCE_DIR}"
RESULT_VARIABLE libhunspell_config_result
)
if(NOT libhunspell_config_result EQUAL "0")
message(FATAL_ERROR "Configuration for libhunspell failed")
endif()
file(SHA256 "${libhunspell_SOURCE_DIR}/config.status" _sapi_CONFIG_STATUS)
set(libhunspell_CONFIG_STATUS "${_sapi_CONFIG_STATUS}" CACHE INTERNAL "")
endif()
endif()
add_library(hunspell STATIC
${libhunspell_SOURCE_DIR}/src/hunspell/affentry.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/affentry.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/affixmgr.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/affixmgr.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/atypes.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/baseaffix.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/csutil.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/csutil.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/filemgr.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/filemgr.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/hashmgr.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/hashmgr.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/htypes.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.h
${libhunspell_SOURCE_DIR}/src/hunspell/hunspell.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/hunzip.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/hunzip.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/langnum.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/phonet.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/phonet.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/replist.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/replist.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/suggestmgr.cxx
${libhunspell_SOURCE_DIR}/src/hunspell/suggestmgr.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/utf_info.hxx
${libhunspell_SOURCE_DIR}/src/hunspell/w_char.hxx
)
target_include_directories(hunspell PUBLIC
${libhunspell_SOURCE_DIR}/src/hunspell
)
set(libhunspell_INCLUDE_DIR "${libhunspell_SOURCE_DIR}/src/hunspell")
add_sapi_library(
sapi_hunspell
FUNCTIONS
Hunspell_create
Hunspell_create_key
Hunspell_destroy
Hunspell_spell
Hunspell_get_dic_encoding
Hunspell_suggest
Hunspell_analyze
Hunspell_add
Hunspell_remove
Hunspell_free_list
INPUTS
${libhunspell_INCLUDE_DIR}/hunspell.h
LIBRARY hunspell
LIBRARY_NAME Hunspell
NAMESPACE ""
)
target_include_directories(sapi_hunspell INTERFACE
"${PROJECT_BINARY_DIR}"
)
if(SAPI_ENABLE_EXAMPLES)
add_subdirectory(example)
endif()
if(SAPI_ENABLE_TESTS)
add_subdirectory(test)
endif()

View File

@ -0,0 +1,26 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_executable(
sapi_minihunspell
main.cc
)
target_link_libraries(
sapi_minihunspell PRIVATE
sapi_hunspell
sapi::sapi
)

View File

@ -0,0 +1,112 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <string>
#include "contrib/hunspell/sandboxed.h"
absl::Status PrintSuggest(HunspellApi& api, sapi::v::RemotePtr& hunspellrp,
sapi::v::ConstCStr& word) {
sapi::v::GenericPtr outptr;
SAPI_ASSIGN_OR_RETURN(
int nlist,
api.Hunspell_suggest(&hunspellrp, outptr.PtrAfter(), word.PtrBefore()));
if (nlist == 0) {
std::cout << "No suggestions.\n";
return absl::OkStatus();
}
sapi::v::Array<char*> ptr_list(nlist);
ptr_list.SetRemote(reinterpret_cast<void*>(outptr.GetValue()));
SAPI_RETURN_IF_ERROR(api.GetSandbox()->TransferFromSandboxee(&ptr_list));
std::cout << "Suggestions:\n";
for (int i = 0; i < nlist; i++) {
sapi::v::RemotePtr sugrp(ptr_list[i]);
SAPI_ASSIGN_OR_RETURN(std::string sugestion,
api.GetSandbox()->GetCString(sugrp));
std::cout << sugestion[i] << "\n";
}
api.Hunspell_free_list(&hunspellrp, ptr_list.PtrNone(), nlist).IgnoreError();
return absl::OkStatus();
}
int main(int argc, char* argv[]) {
google::InitGoogleLogging(argv[0]);
if (argc != 4) {
std::cerr << "Usage:\n " << argv[0];
std::cerr << " AFFIX_FILE FICTIONARY_FILE WORDS_TO_CHECK_FILE\n";
return EXIT_FAILURE;
}
sapi::v::ConstCStr affix_file_name(argv[1]);
sapi::v::ConstCStr dictionary_file_name(argv[2]);
HunspellSapiSandbox sandbox(affix_file_name.GetData(),
dictionary_file_name.GetData());
if (!sandbox.Init().ok()) {
std::cerr << "Unable to start sandbox\n";
return EXIT_FAILURE;
}
HunspellApi api(&sandbox);
absl::StatusOr<Hunhandle*> hunspell = api.Hunspell_create(
affix_file_name.PtrBefore(), dictionary_file_name.PtrBefore());
if (!hunspell.ok()) {
std::cerr << "Could not initialize hunsepll\n";
return EXIT_FAILURE;
}
sapi::v::RemotePtr hunspellrp(*hunspell);
std::ifstream word_to_check_list(argv[3], std::ios_base::in);
if (!word_to_check_list.is_open()) {
std::cerr << "Could not open file of words to check\n";
return EXIT_FAILURE;
}
std::string buf;
while (std::getline(word_to_check_list, buf)) {
sapi::v::ConstCStr cbuf(buf.c_str());
absl::StatusOr<int> result =
api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore());
if (!result.ok()) {
std::cerr << "Could not check word\n" << result.status() << std::endl;
return EXIT_FAILURE;
}
if (*result) {
std::cout << "Word " << buf << " is ok\n";
} else {
std::cout << "Word " << buf << " is incorrect\n";
absl::Status status = PrintSuggest(api, hunspellrp, cbuf);
if (!status.ok()) {
std::cerr << "Unable to get all suggestion\n" << status << std::endl;
}
}
}
api.Hunspell_destroy(&hunspellrp).IgnoreError();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,10 @@
SET UTF-8
SFX A Y 7
SFX A 0 őő .
SFX A 0 ő o
SFX A 0 ő ó
SFX A ó ő ó
SFX A ó őoo ó
SFX A o őo o
SFX A 0 ó [abcdó]

View File

@ -0,0 +1,3 @@
2
foo/A
foó/A

View File

@ -0,0 +1,9 @@
foo
foó
fooőő
fooő
foóő
foő
foőo
foőoo
foóó

View File

@ -0,0 +1,2 @@
𐏑𐏒𐏒
𐏑𐏒𐏒

View File

@ -0,0 +1,55 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CONTRIB_HUNSPELL_SANDBOXED_H_
#define CONTRIB_HUNSPELL_SANDBOXED_H_
#include <libgen.h>
#include <syscall.h>
#include "sapi_hunspell.sapi.h" // NOLINT(build/include)
class HunspellSapiSandbox : public HunspellSandbox {
public:
explicit HunspellSapiSandbox(std::string affix_file_name,
std::string dictionary_file_name)
: affix_file_name_(std::move(affix_file_name)),
dictionary_file_name_(std::move(dictionary_file_name)) {}
private:
std::unique_ptr<sandbox2::Policy> ModifyPolicy(
sandbox2::PolicyBuilder*) override {
return sandbox2::PolicyBuilder()
.AllowStaticStartup()
.AllowOpen()
.AllowRead()
.AllowWrite()
.AllowSystemMalloc()
.AllowExit()
.AllowSyscalls({
__NR_clock_gettime,
__NR_close,
})
.AddFile(affix_file_name_, /*is_ro=*/true)
.AddFile(dictionary_file_name_, /*is_ro=*/true)
.AllowRestartableSequencesWithProcFiles(
sandbox2::PolicyBuilder::kAllowSlowFences) // hangs without it
.BuildOrDie();
}
std::string affix_file_name_;
std::string dictionary_file_name_;
};
#endif // CONTRIB_HUNSPELL_SANDBOXED_H_

View File

@ -0,0 +1,32 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(GoogleTest)
add_executable(
sapi_hunspell_test
hunspell_test.cc
)
target_link_libraries(
sapi_hunspell_test PRIVATE
sapi_hunspell
sapi::test_main
sapi::temp_file
)
gtest_discover_tests(sapi_hunspell_test PROPERTIES ENVIRONMENT "TEST_FILES_DIR=${PROJECT_SOURCE_DIR}/files")

View File

@ -0,0 +1,153 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include "../sandboxed.h"
#include "sandboxed_api/util/path.h"
#include "sandboxed_api/util/status_matchers.h"
#include "sandboxed_api/util/temp_file.h"
namespace {
using ::sapi::IsOk;
class HunspellTest : public ::testing::Test {
protected:
static constexpr absl::string_view kEncoding = "UTF-8";
static constexpr absl::string_view kAffixFileName = "utf8.aff";
static constexpr absl::string_view kDictionaryFileName = "utf8.dic";
static constexpr absl::string_view kGoodFileName = "utf8.good";
static constexpr absl::string_view kWrongFileName = "utf8.wrong";
static constexpr absl::string_view kSuggestion = "fo";
static constexpr absl::string_view kRandomWord = "random_word123";
void SetUp() override {
test_files_dir_ = getenv("TEST_FILES_DIR");
ASSERT_NE(test_files_dir_, nullptr);
std::string s_afn = GetTestFilePath(kAffixFileName);
std::string s_dfn = GetTestFilePath(kDictionaryFileName);
sapi::v::ConstCStr c_afn(s_afn.c_str());
sapi::v::ConstCStr c_dfn(s_dfn.c_str());
sandbox_ = std::make_unique<HunspellSapiSandbox>(s_afn, s_dfn);
ASSERT_THAT(sandbox_->Init(), IsOk());
api_ = std::make_unique<HunspellApi>(sandbox_.get());
SAPI_ASSERT_OK_AND_ASSIGN(
Hunhandle * hunspell,
api_->Hunspell_create(c_afn.PtrBefore(), c_dfn.PtrBefore()));
hunspellrp_ = std::make_unique<sapi::v::RemotePtr>(hunspell);
}
void TearDown() override {
absl::Status status = api_->Hunspell_destroy(&(*hunspellrp_));
ASSERT_THAT(status, IsOk());
}
std::string GetTestFilePath(const absl::string_view& filename) {
return sapi::file::JoinPath(test_files_dir_, filename);
}
std::unique_ptr<HunspellSapiSandbox> sandbox_;
std::unique_ptr<HunspellApi> api_;
std::unique_ptr<sapi::v::RemotePtr> hunspellrp_;
private:
const char* test_files_dir_;
};
TEST_F(HunspellTest, CheckEncoding) {
SAPI_ASSERT_OK_AND_ASSIGN(char* ret,
api_->Hunspell_get_dic_encoding(&(*hunspellrp_)));
SAPI_ASSERT_OK_AND_ASSIGN(
std::string encoding,
api_->GetSandbox()->GetCString(sapi::v::RemotePtr(ret)));
EXPECT_EQ(encoding, kEncoding);
}
TEST_F(HunspellTest, CheckGoodSpell) {
SAPI_ASSERT_OK_AND_ASSIGN(char* ret,
api_->Hunspell_get_dic_encoding(&(*hunspellrp_)));
std::ifstream wtclst(GetTestFilePath(kGoodFileName), std::ios_base::in);
ASSERT_TRUE(wtclst.is_open());
std::string buf;
while (std::getline(wtclst, buf)) {
sapi::v::ConstCStr cbuf(buf.c_str());
SAPI_ASSERT_OK_AND_ASSIGN(
int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 1);
}
}
TEST_F(HunspellTest, CheckWrongSpell) {
SAPI_ASSERT_OK_AND_ASSIGN(char* ret,
api_->Hunspell_get_dic_encoding(&(*hunspellrp_)));
std::ifstream wtclst(GetTestFilePath(kWrongFileName), std::ios_base::in);
ASSERT_TRUE(wtclst.is_open());
std::string buf;
while (std::getline(wtclst, buf)) {
sapi::v::ConstCStr cbuf(buf.c_str());
SAPI_ASSERT_OK_AND_ASSIGN(
int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
}
}
TEST_F(HunspellTest, CheckAddToDict) {
sapi::v::ConstCStr cbuf(kRandomWord.data());
int result;
SAPI_ASSERT_OK_AND_ASSIGN(
result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
SAPI_ASSERT_OK_AND_ASSIGN(
result, api_->Hunspell_add(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
SAPI_ASSERT_OK_AND_ASSIGN(
result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 1);
SAPI_ASSERT_OK_AND_ASSIGN(
result, api_->Hunspell_remove(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
SAPI_ASSERT_OK_AND_ASSIGN(
result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
}
TEST_F(HunspellTest, CheckSuggestion) {
sapi::v::ConstCStr cbuf(kSuggestion.data());
SAPI_ASSERT_OK_AND_ASSIGN(
int result, api_->Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
ASSERT_EQ(result, 0);
sapi::v::GenericPtr outptr;
SAPI_ASSERT_OK_AND_ASSIGN(
int nlist, api_->Hunspell_suggest(&(*hunspellrp_), outptr.PtrAfter(),
cbuf.PtrBefore()));
ASSERT_GT(nlist, 0);
}
} // namespace