Implement (and test) ProcessReader::Modules().

This depended on MachOImageReader, which recently landed, so it can now
be added.

TEST=util_test ProcessReader.*Modules
R=rsesek@chromium.org

Review URL: https://codereview.chromium.org/546573002
This commit is contained in:
Mark Mentovai 2014-09-05 13:43:51 -04:00
parent 5a0b7827ff
commit 9dd0ac943a
3 changed files with 335 additions and 13 deletions

View File

@ -24,6 +24,8 @@
#include "base/mac/mach_logging.h"
#include "base/mac/scoped_mach_port.h"
#include "base/mac/scoped_mach_vm.h"
#include "util/mac/mach_o_image_reader.h"
#include "util/mac/process_types.h"
#include "util/misc/scoped_forbid_return.h"
namespace {
@ -352,11 +354,119 @@ void ProcessReader::InitializeModules() {
initialized_modules_ = true;
// TODO(mark): Complete this implementation. The implementation depends on
// process_types, which cannot land yet because it depends on this file,
// process_reader. This temporary “cut” was made to avoid a review thats too
// large. Yes, this circular dependency is unfortunate. Suggestions are
// welcome.
// This API only works on Mac OS X 10.6 and higher. On Mac OS X 10.5, find the
// “_dyld_all_image_infos” symbol in the loaded LC_LOAD_DYLINKER (dyld).
task_dyld_info_data_t dyld_info;
mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
kern_return_t kr = task_info(
task_, TASK_DYLD_INFO, reinterpret_cast<task_info_t>(&dyld_info), &count);
if (kr != KERN_SUCCESS) {
MACH_LOG(WARNING, kr) << "task_info";
return;
}
// TODO(mark): Deal with statically linked executables which dont use dyld.
// This may look for the module that matches the executable path in the same
// data set that vmmap uses.
#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
// The task_dyld_info_data_t struct grew in 10.7, adding the format field.
// Dont check this field if its not present, which can happen when either
// the SDK used at compile time or the kernel at run time are too old and
// dont know about it.
if (count >= TASK_DYLD_INFO_COUNT) {
const integer_t kExpectedFormat =
!Is64Bit() ? TASK_DYLD_ALL_IMAGE_INFO_32 : TASK_DYLD_ALL_IMAGE_INFO_64;
if (dyld_info.all_image_info_format != kExpectedFormat) {
LOG(WARNING) << "unexpected task_dyld_info_data_t::all_image_info_format "
<< dyld_info.all_image_info_format;
DCHECK_EQ(dyld_info.all_image_info_format, kExpectedFormat);
return;
}
}
#endif
process_types::dyld_all_image_infos all_image_infos;
if (!all_image_infos.Read(this, dyld_info.all_image_info_addr)) {
LOG(WARNING) << "could not read dyld_all_image_infos";
return;
}
// Note that all_image_infos.infoArrayCount may be 0 if a crash occurred while
// dyld was loading the executable. This can happen if a required dynamic
// library was not found.
DCHECK_GE(all_image_infos.version, 1u);
DCHECK_NE(all_image_infos.infoArray, static_cast<mach_vm_address_t>(NULL));
std::vector<process_types::dyld_image_info> image_info_vector(
all_image_infos.infoArrayCount);
if (!process_types::dyld_image_info::ReadArrayInto(this,
all_image_infos.infoArray,
image_info_vector.size(),
&image_info_vector[0])) {
LOG(WARNING) << "could not read dyld_image_info array";
return;
}
bool found_dyld = false;
for (const process_types::dyld_image_info& image_info : image_info_vector) {
ProcessReaderModule module;
module.address = image_info.imageLoadAddress;
module.timestamp = image_info.imageFileModDate;
if (!task_memory_->ReadCString(image_info.imageFilePath, &module.name)) {
LOG(WARNING) << "could not read dyld_image_info::imageFilePath";
// Proceed anyway with an empty module name.
}
modules_.push_back(module);
if (all_image_infos.version >= 2 && all_image_infos.dyldImageLoadAddress &&
image_info.imageLoadAddress == all_image_infos.dyldImageLoadAddress) {
found_dyld = true;
}
}
// all_image_infos.infoArray doesnt include an entry for dyld, but dyld is
// loaded into the process address space as a module. Its load address is
// easily known given a sufficiently recent all_image_infos.version, but the
// timestamp and pathname are not given as they are for other modules.
//
// The timestamp is a lost cause, because the kernel doesnt record the
// timestamp of the dynamic linker at the time its loaded in the same way
// that dyld records the timestamps of other modules when theyre loaded. (The
// timestamp for the main executable is also not reported and appears as 0
// even when accessed via dyld APIs, because its loaded by the kernel, not by
// dyld.)
//
// The name can be determined, but its not as simple as hardcoding the
// default "/usr/lib/dyld" because an executable could have specified anything
// in its LC_LOAD_DYLINKER command.
if (!found_dyld && all_image_infos.version >= 2 &&
all_image_infos.dyldImageLoadAddress) {
ProcessReaderModule module;
module.address = all_image_infos.dyldImageLoadAddress;
module.timestamp = 0;
// Examine the executables LC_LOAD_DYLINKER load command to find the path
// used to load dyld.
MachOImageReader executable;
if (all_image_infos.infoArrayCount >= 1 &&
executable.Initialize(this, modules_[0].address, modules_[0].name) &&
executable.FileType() == MH_EXECUTE &&
!executable.DylinkerName().empty()) {
module.name = executable.DylinkerName();
} else {
// Look inside dyld directly to find its preferred path.
MachOImageReader dyld;
if (dyld.Initialize(this, module.address, "(dyld)") &&
dyld.FileType() == MH_DYLINKER && !dyld.DylinkerName().empty()) {
module.name = dyld.DylinkerName();
}
}
// dyld is loaded in the process even if its path cant be determined.
modules_.push_back(module);
}
}
mach_vm_address_t ProcessReader::CalculateStackRegion(

View File

@ -69,8 +69,18 @@ struct ProcessReaderModule {
ProcessReaderModule();
~ProcessReaderModule();
//! \brief The pathname used to load the module from disk.
std::string name;
//! \brief The address where the base of the module is loaded in the remote
//! process.
mach_vm_address_t address;
//! \brief The modules timestamp.
//!
//! This field will be `0` if its value cannot be determined. It can only be
//! determined for images that are loaded by dyld, so it will be `0` for the
//! main executable and for dyld itself.
time_t timestamp;
};
@ -117,10 +127,13 @@ class ProcessReader {
//! \return Accesses the memory of the target task.
TaskMemory* Memory() { return task_memory_.get(); }
//! \return The threads that are in the task (process).
//! \return The threads that are in the task (process). The first element (at
//! index `0`) corresponds to the main thread.
const std::vector<ProcessReaderThread>& Threads();
//! \return The modules loaded in the process.
//! \return The modules loaded in the process. The first element (at index
//! `0`) corresponds to the main executable, and the final element
//! corresponds to the dynamic loader, dyld.
const std::vector<ProcessReaderModule>& Modules();
private:

View File

@ -15,22 +15,28 @@
#include "util/mac/process_reader.h"
#include <dispatch/dispatch.h>
#include <mach-o/dyld.h>
#include <mach-o/dyld_images.h>
#include <mach/mach.h>
#include <string.h>
#include <sys/stat.h>
#include <map>
#include <string>
#include <vector>
#include "base/logging.h"
#include "base/mac/scoped_mach_port.h"
#include "base/posix/eintr_wrapper.h"
#include "base/strings/stringprintf.h"
#include "build/build_config.h"
#include "gtest/gtest.h"
#include "util/file/fd_io.h"
#include "util/stdlib/pointer_container.h"
#include "util/test/errors.h"
#include "util/test/mac/dyld.h"
#include "util/test/mac/mach_errors.h"
#include "util/test/mac/mach_multiprocess.h"
#include "util/test/errors.h"
namespace {
@ -84,7 +90,7 @@ class ProcessReaderChild final : public MachMultiprocess {
int read_fd = ReadPipeFD();
mach_vm_address_t address;
int rv = ReadFD(read_fd, &address, sizeof(address));
ssize_t rv = ReadFD(read_fd, &address, sizeof(address));
ASSERT_EQ(static_cast<ssize_t>(sizeof(address)), rv)
<< ErrnoMessage("read");
@ -105,7 +111,7 @@ class ProcessReaderChild final : public MachMultiprocess {
mach_vm_address_t address =
reinterpret_cast<mach_vm_address_t>(kTestMemory);
int rv = WriteFD(write_fd, &address, sizeof(address));
ssize_t rv = WriteFD(write_fd, &address, sizeof(address));
ASSERT_EQ(static_cast<ssize_t>(sizeof(address)), rv)
<< ErrnoMessage("write");
@ -448,7 +454,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess {
thread_index < thread_count_ + 1;
++thread_index) {
uint64_t thread_id;
int rv = ReadFD(read_fd, &thread_id, sizeof(thread_id));
ssize_t rv = ReadFD(read_fd, &thread_id, sizeof(thread_id));
ASSERT_EQ(static_cast<ssize_t>(sizeof(thread_id)), rv)
<< ErrnoMessage("read");
@ -481,7 +487,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess {
// until the parent finished working with it.
int write_fd = WritePipeFD();
char c = '\0';
int rv = WriteFD(write_fd, &c, 1);
ssize_t rv = WriteFD(write_fd, &c, 1);
ASSERT_EQ(1, rv) << ErrnoMessage("write");
}
@ -498,7 +504,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess {
// to inspect it. Write an entry for it.
uint64_t thread_id = PthreadToThreadID(pthread_self());
int rv = WriteFD(write_fd, &thread_id, sizeof(thread_id));
ssize_t rv = WriteFD(write_fd, &thread_id, sizeof(thread_id));
ASSERT_EQ(static_cast<ssize_t>(sizeof(thread_id)), rv)
<< ErrnoMessage("write");
@ -567,4 +573,197 @@ TEST(ProcessReader, ChildSeveralThreads) {
process_reader_threaded_child.Run();
}
TEST(ProcessReader, SelfModules) {
ProcessReader process_reader;
ASSERT_TRUE(process_reader.Initialize(mach_task_self()));
uint32_t dyld_image_count = _dyld_image_count();
const std::vector<ProcessReaderModule>& modules = process_reader.Modules();
// There needs to be at least an entry for the main executable, for a dylib,
// and for dyld.
ASSERT_GE(modules.size(), 3u);
// dyld_image_count doesnt include an entry for dyld itself, but |modules|
// does.
ASSERT_EQ(dyld_image_count + 1, modules.size());
for (uint32_t index = 0; index < dyld_image_count; ++index) {
SCOPED_TRACE(base::StringPrintf(
"index %u, name %s", index, modules[index].name.c_str()));
const char* dyld_image_name = _dyld_get_image_name(index);
EXPECT_EQ(dyld_image_name, modules[index].name);
EXPECT_EQ(
reinterpret_cast<mach_vm_address_t>(_dyld_get_image_header(index)),
modules[index].address);
if (index == 0) {
// dyld didnt load the main executable, so it couldnt record its
// timestamp, and it is reported as 0.
EXPECT_EQ(0, modules[index].timestamp);
} else {
// Hope that the module didnt change on disk.
struct stat stat_buf;
int rv = stat(dyld_image_name, &stat_buf);
EXPECT_EQ(0, rv) << ErrnoMessage("stat");
if (rv == 0) {
EXPECT_EQ(stat_buf.st_mtime, modules[index].timestamp);
}
}
}
size_t index = modules.size() - 1;
EXPECT_EQ("/usr/lib/dyld", modules[index].name);
// dyld didnt load itself either, so it couldnt record its timestamp, and it
// is also reported as 0.
EXPECT_EQ(0, modules[index].timestamp);
const struct dyld_all_image_infos* dyld_image_infos =
_dyld_get_all_image_infos();
if (dyld_image_infos->version >= 2) {
EXPECT_EQ(reinterpret_cast<mach_vm_address_t>(
dyld_image_infos->dyldImageLoadAddress), modules[index].address);
}
}
class ProcessReaderModulesChild final : public MachMultiprocess {
public:
ProcessReaderModulesChild() : MachMultiprocess() {}
~ProcessReaderModulesChild() {}
private:
void MachMultiprocessParent() override {
ProcessReader process_reader;
ASSERT_TRUE(process_reader.Initialize(ChildTask()));
const std::vector<ProcessReaderModule>& modules = process_reader.Modules();
// There needs to be at least an entry for the main executable, for a dylib,
// and for dyld.
ASSERT_GE(modules.size(), 3u);
int read_fd = ReadPipeFD();
uint32_t expect_modules;
ssize_t rv = ReadFD(read_fd, &expect_modules, sizeof(expect_modules));
ASSERT_EQ(static_cast<ssize_t>(sizeof(expect_modules)), rv)
<< ErrnoMessage("read");
ASSERT_EQ(expect_modules, modules.size());
for (size_t index = 0; index < modules.size(); ++index) {
SCOPED_TRACE(base::StringPrintf(
"index %zu, name %s", index, modules[index].name.c_str()));
uint32_t expect_name_length;
rv = ReadFD(
read_fd, &expect_name_length, sizeof(expect_name_length));
ASSERT_EQ(static_cast<ssize_t>(sizeof(expect_name_length)), rv)
<< ErrnoMessage("read");
// The NUL terminator is not read.
std::string expect_name(expect_name_length, '\0');
rv = ReadFD(read_fd, &expect_name[0], expect_name_length);
ASSERT_EQ(static_cast<ssize_t>(expect_name_length), rv)
<< ErrnoMessage("read");
EXPECT_EQ(expect_name, modules[index].name);
mach_vm_address_t expect_address;
rv = ReadFD(read_fd, &expect_address, sizeof(expect_address));
ASSERT_EQ(static_cast<ssize_t>(sizeof(expect_address)), rv)
<< ErrnoMessage("read");
EXPECT_EQ(expect_address, modules[index].address);
if (index == 0 || index == modules.size() - 1) {
// dyld didnt load the main executable or itself, so it couldnt record
// these timestamps, and they are reported as 0.
EXPECT_EQ(0, modules[index].timestamp);
} else {
// Hope that the module didnt change on disk.
struct stat stat_buf;
int rv = stat(expect_name.c_str(), &stat_buf);
EXPECT_EQ(0, rv) << ErrnoMessage("stat");
if (rv == 0) {
EXPECT_EQ(stat_buf.st_mtime, modules[index].timestamp);
}
}
}
// Tell the child that its OK to exit. The child needed to be kept alive
// until the parent finished working with it.
int write_fd = WritePipeFD();
char c = '\0';
rv = WriteFD(write_fd, &c, 1);
ASSERT_EQ(1, rv) << ErrnoMessage("write");
}
void MachMultiprocessChild() override {
int write_fd = WritePipeFD();
uint32_t dyld_image_count = _dyld_image_count();
const struct dyld_all_image_infos* dyld_image_infos =
_dyld_get_all_image_infos();
uint32_t write_image_count = dyld_image_count;
if (dyld_image_infos->version >= 2) {
// dyld_image_count doesnt include an entry for dyld itself, but one will
// be written.
++write_image_count;
}
ssize_t rv = WriteFD(
write_fd, &write_image_count, sizeof(write_image_count));
ASSERT_EQ(static_cast<ssize_t>(sizeof(write_image_count)), rv)
<< ErrnoMessage("write");
for (size_t index = 0; index < write_image_count; ++index) {
const char* dyld_image_name;
mach_vm_address_t dyld_image_address;
if (index < dyld_image_count) {
dyld_image_name = _dyld_get_image_name(index);
dyld_image_address =
reinterpret_cast<mach_vm_address_t>(_dyld_get_image_header(index));
} else {
dyld_image_name = "/usr/lib/dyld";
dyld_image_address = reinterpret_cast<mach_vm_address_t>(
dyld_image_infos->dyldImageLoadAddress);
}
uint32_t dyld_image_name_length = strlen(dyld_image_name);
rv = WriteFD(
write_fd, &dyld_image_name_length, sizeof(dyld_image_name_length));
ASSERT_EQ(static_cast<ssize_t>(sizeof(dyld_image_name_length)), rv)
<< ErrnoMessage("write");
// The NUL terminator is not written.
rv = WriteFD(write_fd, dyld_image_name, dyld_image_name_length);
ASSERT_EQ(static_cast<ssize_t>(dyld_image_name_length), rv)
<< ErrnoMessage("write");
rv = WriteFD(write_fd, &dyld_image_address, sizeof(dyld_image_address));
ASSERT_EQ(static_cast<ssize_t>(sizeof(dyld_image_address)), rv)
<< ErrnoMessage("write");
}
// Wait for the parent to say that its OK to exit.
int read_fd = ReadPipeFD();
char c;
rv = ReadFD(read_fd, &c, 1);
ASSERT_EQ(1, rv) << ErrnoMessage("read");
}
DISALLOW_COPY_AND_ASSIGN(ProcessReaderModulesChild);
};
TEST(ProcessReader, ChildModules) {
ProcessReaderModulesChild process_reader_modules_child;
process_reader_modules_child.Run();
}
} // namespace