clang_generator: Handle inter-type dependencies better

This change changes the emitter to work on `clang::TypeDecl`s instead of
`clang::QualType`s, as the latter only describe complete definitions. This
allows us to better deal with situations where we would otherwise have a kind
of circular type dependencies: For example, a `typedef` that depends on a
`struct` which in turn has a member that references the typedef.
For this to work, we now also record the actual source order of declarations,
similar to what the libclang based header generator does.

Also add some more tests for the newly covered cases.

PiperOrigin-RevId: 488370914
Change-Id: I2d1baa863fb12d1801bf11a20f3f88df7fde3129
This commit is contained in:
Christian Blichmann 2022-11-14 08:18:42 -08:00 committed by Copybara-Service
parent b626bf40da
commit 67bc67bbef
7 changed files with 142 additions and 36 deletions

View File

@ -25,6 +25,7 @@
#include "absl/strings/str_split.h" #include "absl/strings/str_split.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "absl/strings/strip.h" #include "absl/strings/strip.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h" #include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclTemplate.h"
#include "clang/AST/Type.h" #include "clang/AST/Type.h"
@ -433,16 +434,8 @@ absl::StatusOr<std::string> EmitHeader(
return out; return out;
} }
void Emitter::EmitType(clang::QualType qual) { void Emitter::EmitType(clang::TypeDecl* type_decl) {
clang::TypeDecl* decl = nullptr; if (!type_decl) {
if (const auto* typedef_type = qual->getAs<clang::TypedefType>()) {
decl = typedef_type->getDecl();
} else if (const auto* enum_type = qual->getAs<clang::EnumType>()) {
decl = enum_type->getDecl();
} else if (const auto* record_type = qual->getAs<clang::RecordType>()) {
decl = record_type->getDecl();
}
if (!decl) {
return; return;
} }
@ -450,18 +443,17 @@ void Emitter::EmitType(clang::QualType qual) {
// TODO(cblichmann): Instead of this and the hard-coded entities below, we // TODO(cblichmann): Instead of this and the hard-coded entities below, we
// should map add the correct (system) headers to the // should map add the correct (system) headers to the
// generated output. // generated output.
if (decl->getASTContext().getSourceManager().isInSystemHeader( if (type_decl->getASTContext().getSourceManager().isInSystemHeader(
decl->getBeginLoc())) { type_decl->getBeginLoc())) {
return; return;
} }
const std::vector<std::string> ns_path = GetNamespacePath(decl); const std::vector<std::string> ns_path = GetNamespacePath(type_decl);
std::string ns_name; std::string ns_name;
if (!ns_path.empty()) { if (!ns_path.empty()) {
const auto& ns_root = ns_path.front(); const auto& ns_root = ns_path.front();
// Filter out any and all declarations from the C++ standard library, // Filter out declarations from the C++ standard library, from SAPI itself
// from SAPI itself and from other well-known namespaces. This avoids // and from other well-known namespaces.
// re-declaring things like standard integer types, for example.
if (ns_root == "std" || ns_root == "__gnu_cxx" || ns_root == "sapi") { if (ns_root == "std" || ns_root == "__gnu_cxx" || ns_root == "sapi") {
return; return;
} }
@ -472,7 +464,7 @@ void Emitter::EmitType(clang::QualType qual) {
} }
// Skip types from Abseil that will already be included in the generated // Skip types from Abseil that will already be included in the generated
// header. // header.
if (auto name = ToStringView(decl->getName()); if (auto name = ToStringView(type_decl->getName());
name == "StatusCode" || name == "StatusToStringMode" || name == "StatusCode" || name == "StatusToStringMode" ||
name == "CordMemoryAccounting" || name == "string_view" || name == "CordMemoryAccounting" || name == "string_view" ||
name == "LogSeverity" || name == "LogEntry" || name == "Span" || name == "LogSeverity" || name == "LogEntry" || name == "Span" ||
@ -483,16 +475,17 @@ void Emitter::EmitType(clang::QualType qual) {
ns_name = absl::StrJoin(ns_path, "::"); ns_name = absl::StrJoin(ns_path, "::");
} }
std::string spelling = GetSpelling(decl); std::string spelling = GetSpelling(type_decl);
if (const auto& [it, inserted] = rendered_types_.emplace(ns_name, spelling); if (const auto& [it, inserted] = rendered_types_.emplace(ns_name, spelling);
inserted) { inserted) {
rendered_types_ordered_.push_back(&*it); rendered_types_ordered_.push_back(&*it);
} }
} }
void Emitter::AddTypesFiltered(const QualTypeSet& types) { void Emitter::AddTypeDeclarations(
for (clang::QualType qual : types) { const std::vector<clang::TypeDecl*>& type_decls) {
EmitType(qual); for (clang::TypeDecl* type_decl : type_decls) {
EmitType(type_decl);
} }
} }

View File

@ -60,13 +60,13 @@ class RenderedType {
// Sandboxed API header. // Sandboxed API header.
class Emitter { class Emitter {
public: public:
// Adds the set of previously collected types to the emitter, recording the // Adds the declarations of previously collected types to the emitter,
// spelling of each one. Types that are not supported by the current // recording the spelling of each one. Types/declarations that are not
// generator settings or that are unwanted/unnecessary are skipped. Filtered // supported by the current generator settings or that are unwanted or
// types include C++ constructs or well-known standard library elements. The // unnecessary are skipped. Other filtered types include C++ constructs or
// latter can be replaced by including the correct headers in the emitted // well-known standard library elements. The latter can be replaced by
// header. // including the correct headers in the emitted header.
void AddTypesFiltered(const QualTypeSet& types); void AddTypeDeclarations(const std::vector<clang::TypeDecl*>& type_decls);
absl::Status AddFunction(clang::FunctionDecl* decl); absl::Status AddFunction(clang::FunctionDecl* decl);
@ -74,7 +74,7 @@ class Emitter {
absl::StatusOr<std::string> EmitHeader(const GeneratorOptions& options); absl::StatusOr<std::string> EmitHeader(const GeneratorOptions& options);
private: private:
void EmitType(clang::QualType qual); void EmitType(clang::TypeDecl* type_decl);
protected: protected:
// Stores namespaces and a list of spellings for types. Keeps track of types // Stores namespaces and a list of spellings for types. Keeps track of types

View File

@ -65,7 +65,6 @@ TEST_F(EmitterTest, BasicFunctionality) {
RunFrontendAction(R"(extern "C" void ExposedFunction() {})", RunFrontendAction(R"(extern "C" void ExposedFunction() {})",
std::make_unique<GeneratorAction>(emitter, options)), std::make_unique<GeneratorAction>(emitter, options)),
IsOk()); IsOk());
EXPECT_THAT(emitter.GetRenderedFunctions(), SizeIs(1)); EXPECT_THAT(emitter.GetRenderedFunctions(), SizeIs(1));
absl::StatusOr<std::string> header = emitter.EmitHeader(options); absl::StatusOr<std::string> header = emitter.EmitHeader(options);
@ -92,6 +91,7 @@ TEST_F(EmitterTest, RelatedTypes) {
extern "C" void Structize(MyStruct* s);)", extern "C" void Structize(MyStruct* s);)",
std::make_unique<GeneratorAction>(emitter, GeneratorOptions())), std::make_unique<GeneratorAction>(emitter, GeneratorOptions())),
IsOk()); IsOk());
EXPECT_THAT(emitter.GetRenderedFunctions(), SizeIs(2));
// Types from "std" should be skipped // Types from "std" should be skipped
EXPECT_THAT(emitter.SpellingsForNS("std"), IsEmpty()); EXPECT_THAT(emitter.SpellingsForNS("std"), IsEmpty());
@ -105,6 +105,25 @@ TEST_F(EmitterTest, RelatedTypes) {
"typedef struct { int member; } MyStruct")); "typedef struct { int member; } MyStruct"));
} }
TEST_F(EmitterTest, TypedefNames) {
EmitterForTesting emitter;
ASSERT_THAT(
RunFrontendAction(
R"(typedef enum { kNone, kSome } E;
struct A { E member; };
typedef struct { int member; } B;
typedef struct tagC { int member; } C;
extern "C" void Colorize(A*, B*, C*);)",
std::make_unique<GeneratorAction>(emitter, GeneratorOptions())),
IsOk());
EXPECT_THAT(
UglifyAll(emitter.SpellingsForNS("")),
ElementsAre("typedef enum { kNone, kSome } E", "struct A { E member; }",
"typedef struct { int member; } B",
"struct tagC { int member; }", "typedef struct tagC C"));
}
TEST_F(EmitterTest, NestedStruct) { TEST_F(EmitterTest, NestedStruct) {
EmitterForTesting emitter; EmitterForTesting emitter;
ASSERT_THAT( ASSERT_THAT(
@ -209,6 +228,34 @@ TEST_F(EmitterTest, TypedefStructByValueSkipsFunction) {
EXPECT_THAT(emitter.GetRenderedFunctions(), IsEmpty()); EXPECT_THAT(emitter.GetRenderedFunctions(), IsEmpty());
} }
TEST_F(EmitterTest, TypedefTypeDependencies) {
EmitterForTesting emitter;
EXPECT_THAT(
RunFrontendAction(
R"(typedef bool some_other_unused;
using size_t = long long int;
typedef struct _Image Image;
typedef size_t (*StreamHandler)(const Image*, const void*,
const size_t);
enum unrelated_unused { NONE, SOME };
struct _Image {
StreamHandler stream;
int size;
};
extern "C" void Process(StreamHandler handler);)",
std::make_unique<GeneratorAction>(emitter, GeneratorOptions())),
IsOk());
EXPECT_THAT(emitter.GetRenderedFunctions(), SizeIs(1));
EXPECT_THAT(UglifyAll(emitter.SpellingsForNS("")),
ElementsAre("using size_t = long long", "struct _Image",
"typedef size_t (*StreamHandler)(const Image *, "
"const void *, const size_t)",
"struct _Image {"
" StreamHandler stream;"
" int size; }"));
}
TEST(IncludeGuard, CreatesRandomizedGuardForEmptyFilename) { TEST(IncludeGuard, CreatesRandomizedGuardForEmptyFilename) {
// Copybara will transform the string. This is intentional. // Copybara will transform the string. This is intentional.
constexpr absl::string_view kGeneratedHeaderPrefix = constexpr absl::string_view kGeneratedHeaderPrefix =

View File

@ -18,7 +18,9 @@
#include <iostream> #include <iostream>
#include "absl/status/status.h" #include "absl/status/status.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Type.h" #include "clang/AST/Type.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/PreprocessorOptions.h"
#include "sandboxed_api/tools/clang_generator/diagnostics.h" #include "sandboxed_api/tools/clang_generator/diagnostics.h"
#include "sandboxed_api/tools/clang_generator/emitter.h" #include "sandboxed_api/tools/clang_generator/emitter.h"
@ -43,6 +45,11 @@ std::string GetOutputFilename(absl::string_view source_file) {
return ReplaceFileExtension(source_file, ".sapi.h"); return ReplaceFileExtension(source_file, ".sapi.h");
} }
bool GeneratorASTVisitor::VisitTypeDecl(clang::TypeDecl* decl) {
collector_.RecordOrderedDecl(decl);
return true;
}
bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) { bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) {
if (decl->isCXXClassMember() || // Skip classes if (decl->isCXXClassMember() || // Skip classes
!decl->isExternC() || // Skip non external functions !decl->isExternC() || // Skip non external functions
@ -55,14 +62,16 @@ bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) {
if (bool all_functions = options_.function_names.empty(); if (bool all_functions = options_.function_names.empty();
all_functions || all_functions ||
options_.function_names.count(ToStringView(decl->getName())) > 0) { options_.function_names.count(ToStringView(decl->getName())) > 0) {
clang::SourceManager& source_manager =
decl->getASTContext().getSourceManager();
// Skip functions from system headers when all functions are requested. // Skip functions from system headers when all functions are requested.
// This allows to still specify standard library functions explicitly. // This allows to still specify standard library functions explicitly.
if (all_functions && if (all_functions && source_manager.isInSystemHeader(decl->getBeginLoc())) {
decl->getASTContext().getSourceManager().isInSystemHeader(
decl->getBeginLoc())) {
return true; return true;
} }
// TODO(cblichmann): Skip functions to implement limit_scan_depth feature.
functions_.push_back(decl); functions_.push_back(decl);
collector_.CollectRelatedTypes(decl->getDeclaredReturnType()); collector_.CollectRelatedTypes(decl->getDeclaredReturnType());
@ -81,7 +90,7 @@ void GeneratorASTConsumer::HandleTranslationUnit(clang::ASTContext& context) {
return; return;
} }
emitter_.AddTypesFiltered(visitor_.collector_.collected()); emitter_.AddTypeDeclarations(visitor_.collector_.GetTypeDeclarations());
for (clang::FunctionDecl* func : visitor_.functions_) { for (clang::FunctionDecl* func : visitor_.functions_) {
absl::Status status = emitter_.AddFunction(func); absl::Status status = emitter_.AddFunction(func);

View File

@ -21,6 +21,7 @@
#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_set.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTConsumer.h"
#include "clang/AST/Decl.h"
#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/CompilerInvocation.h"
@ -60,6 +61,7 @@ class GeneratorASTVisitor
explicit GeneratorASTVisitor(const GeneratorOptions& options) explicit GeneratorASTVisitor(const GeneratorOptions& options)
: options_(options) {} : options_(options) {}
bool VisitTypeDecl(clang::TypeDecl* decl);
bool VisitFunctionDecl(clang::FunctionDecl* decl); bool VisitFunctionDecl(clang::FunctionDecl* decl);
private: private:

View File

@ -14,8 +14,10 @@
#include "sandboxed_api/tools/clang_generator/types.h" #include "sandboxed_api/tools/clang_generator/types.h"
#include "absl/container/flat_hash_set.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h" #include "clang/AST/Decl.h"
#include "clang/AST/QualTypeNames.h" #include "clang/AST/QualTypeNames.h"
#include "clang/AST/Type.h" #include "clang/AST/Type.h"
@ -34,6 +36,11 @@ bool IsFunctionReferenceType(clang::QualType qual) {
} // namespace } // namespace
void TypeCollector::RecordOrderedDecl(clang::TypeDecl* type_decl) {
// This implicitly assigns a number (its source order) to each declaration.
ordered_decls_.push_back(type_decl);
}
void TypeCollector::CollectRelatedTypes(clang::QualType qual) { void TypeCollector::CollectRelatedTypes(clang::QualType qual) {
if (!seen_.insert(qual)) { if (!seen_.insert(qual)) {
return; return;
@ -106,6 +113,44 @@ void TypeCollector::CollectRelatedTypes(clang::QualType qual) {
} }
} }
std::vector<clang::TypeDecl*> TypeCollector::GetTypeDeclarations() {
if (ordered_decls_.empty()) {
return {};
}
// The AST context is the same for all declarations in this translation unit,
// so use a reference here.
clang::ASTContext& context = ordered_decls_.front()->getASTContext();
absl::flat_hash_set<std::string> collected_names;
for (clang::QualType qual : collected_) {
collected_names.insert(clang::TypeName::getFullyQualifiedName(
qual, context, context.getPrintingPolicy()));
}
std::vector<clang::TypeDecl*> result;
for (clang::TypeDecl* type_decl : ordered_decls_) {
// Ideally, collected_.contains() on the underlying QualType of the TypeDecl
// would work here. However, QualTypes obtained from a TypeDecl contain
// different Type pointers, even when referring to one of the same types
// from the set and thus will not be found. Instead, work around the issue
// by always using the fully qualified name of the type.
if (!collected_names.contains(type_decl->getQualifiedNameAsString())) {
continue;
}
if (auto* tag_decl = llvm::dyn_cast<clang::TagDecl>(type_decl);
tag_decl && tag_decl->getTypedefNameForAnonDecl()) {
// Skip anonymous declarations that are typedef-ed. For example skip
// things like "typedef enum { A } SomeName". In this case, the enum is
// unnamed and the emitter will instead work with the complete typedef, so
// nothing is lost.
continue;
}
result.push_back(type_decl);
}
return result;
}
namespace { namespace {
// Removes "const" from a qualified type if it denotes a pointer or reference // Removes "const" from a qualified type if it denotes a pointer or reference

View File

@ -18,7 +18,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "absl/container/flat_hash_set.h"
#include "clang/AST/ASTContext.h" #include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Type.h" #include "clang/AST/Type.h"
#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
@ -42,6 +44,12 @@ inline bool IsPointerOrReference(clang::QualType qual) {
class TypeCollector { class TypeCollector {
public: public:
// Records the source order of the given type in the current translation unit.
// This is different from collecting related types, as the emitter also needs
// to know in which order to emit typedefs vs forward decls, etc. and
// QualTypes only refer to complete definitions.
void RecordOrderedDecl(clang::TypeDecl* type_decl);
// Computes the transitive closure of all types that a type depends on. Those // Computes the transitive closure of all types that a type depends on. Those
// are types that need to be declared before a declaration of the type denoted // are types that need to be declared before a declaration of the type denoted
// by the qual parameter is valid. For example, given // by the qual parameter is valid. For example, given
@ -53,13 +61,15 @@ class TypeCollector {
// //
// calling this function on the type "AggregateStruct" yields these types: // calling this function on the type "AggregateStruct" yields these types:
// int // int
// SubStruct
// bool // bool
// SubStruct
void CollectRelatedTypes(clang::QualType qual); void CollectRelatedTypes(clang::QualType qual);
QualTypeSet& collected() { return collected_; } // Returns the declarations for the collected types in source order.
std::vector<clang::TypeDecl*> GetTypeDeclarations();
private: private:
std::vector<clang::TypeDecl*> ordered_decls_;
QualTypeSet collected_; QualTypeSet collected_;
QualTypeSet seen_; QualTypeSet seen_;
}; };