diff --git a/util/mac/process_reader.cc b/util/mac/process_reader.cc index 7013f7ec..fbe5ff07 100644 --- a/util/mac/process_reader.cc +++ b/util/mac/process_reader.cc @@ -24,6 +24,8 @@ #include "base/mac/mach_logging.h" #include "base/mac/scoped_mach_port.h" #include "base/mac/scoped_mach_vm.h" +#include "util/mac/mach_o_image_reader.h" +#include "util/mac/process_types.h" #include "util/misc/scoped_forbid_return.h" namespace { @@ -352,11 +354,119 @@ void ProcessReader::InitializeModules() { initialized_modules_ = true; - // TODO(mark): Complete this implementation. The implementation depends on - // process_types, which cannot land yet because it depends on this file, - // process_reader. This temporary “cut” was made to avoid a review that’s too - // large. Yes, this circular dependency is unfortunate. Suggestions are - // welcome. + // This API only works on Mac OS X 10.6 and higher. On Mac OS X 10.5, find the + // “_dyld_all_image_infos” symbol in the loaded LC_LOAD_DYLINKER (dyld). + task_dyld_info_data_t dyld_info; + mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; + kern_return_t kr = task_info( + task_, TASK_DYLD_INFO, reinterpret_cast(&dyld_info), &count); + if (kr != KERN_SUCCESS) { + MACH_LOG(WARNING, kr) << "task_info"; + return; + } + + // TODO(mark): Deal with statically linked executables which don’t use dyld. + // This may look for the module that matches the executable path in the same + // data set that vmmap uses. + +#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7 + // The task_dyld_info_data_t struct grew in 10.7, adding the format field. + // Don’t check this field if it’s not present, which can happen when either + // the SDK used at compile time or the kernel at run time are too old and + // don’t know about it. + if (count >= TASK_DYLD_INFO_COUNT) { + const integer_t kExpectedFormat = + !Is64Bit() ? TASK_DYLD_ALL_IMAGE_INFO_32 : TASK_DYLD_ALL_IMAGE_INFO_64; + if (dyld_info.all_image_info_format != kExpectedFormat) { + LOG(WARNING) << "unexpected task_dyld_info_data_t::all_image_info_format " + << dyld_info.all_image_info_format; + DCHECK_EQ(dyld_info.all_image_info_format, kExpectedFormat); + return; + } + } +#endif + + process_types::dyld_all_image_infos all_image_infos; + if (!all_image_infos.Read(this, dyld_info.all_image_info_addr)) { + LOG(WARNING) << "could not read dyld_all_image_infos"; + return; + } + + // Note that all_image_infos.infoArrayCount may be 0 if a crash occurred while + // dyld was loading the executable. This can happen if a required dynamic + // library was not found. + DCHECK_GE(all_image_infos.version, 1u); + DCHECK_NE(all_image_infos.infoArray, static_cast(NULL)); + + std::vector image_info_vector( + all_image_infos.infoArrayCount); + if (!process_types::dyld_image_info::ReadArrayInto(this, + all_image_infos.infoArray, + image_info_vector.size(), + &image_info_vector[0])) { + LOG(WARNING) << "could not read dyld_image_info array"; + return; + } + + bool found_dyld = false; + for (const process_types::dyld_image_info& image_info : image_info_vector) { + ProcessReaderModule module; + module.address = image_info.imageLoadAddress; + module.timestamp = image_info.imageFileModDate; + if (!task_memory_->ReadCString(image_info.imageFilePath, &module.name)) { + LOG(WARNING) << "could not read dyld_image_info::imageFilePath"; + // Proceed anyway with an empty module name. + } + + modules_.push_back(module); + + if (all_image_infos.version >= 2 && all_image_infos.dyldImageLoadAddress && + image_info.imageLoadAddress == all_image_infos.dyldImageLoadAddress) { + found_dyld = true; + } + } + + // all_image_infos.infoArray doesn’t include an entry for dyld, but dyld is + // loaded into the process’ address space as a module. Its load address is + // easily known given a sufficiently recent all_image_infos.version, but the + // timestamp and pathname are not given as they are for other modules. + // + // The timestamp is a lost cause, because the kernel doesn’t record the + // timestamp of the dynamic linker at the time it’s loaded in the same way + // that dyld records the timestamps of other modules when they’re loaded. (The + // timestamp for the main executable is also not reported and appears as 0 + // even when accessed via dyld APIs, because it’s loaded by the kernel, not by + // dyld.) + // + // The name can be determined, but it’s not as simple as hardcoding the + // default "/usr/lib/dyld" because an executable could have specified anything + // in its LC_LOAD_DYLINKER command. + if (!found_dyld && all_image_infos.version >= 2 && + all_image_infos.dyldImageLoadAddress) { + ProcessReaderModule module; + module.address = all_image_infos.dyldImageLoadAddress; + module.timestamp = 0; + + // Examine the executable’s LC_LOAD_DYLINKER load command to find the path + // used to load dyld. + MachOImageReader executable; + if (all_image_infos.infoArrayCount >= 1 && + executable.Initialize(this, modules_[0].address, modules_[0].name) && + executable.FileType() == MH_EXECUTE && + !executable.DylinkerName().empty()) { + module.name = executable.DylinkerName(); + } else { + // Look inside dyld directly to find its preferred path. + MachOImageReader dyld; + if (dyld.Initialize(this, module.address, "(dyld)") && + dyld.FileType() == MH_DYLINKER && !dyld.DylinkerName().empty()) { + module.name = dyld.DylinkerName(); + } + } + + // dyld is loaded in the process even if its path can’t be determined. + modules_.push_back(module); + } } mach_vm_address_t ProcessReader::CalculateStackRegion( diff --git a/util/mac/process_reader.h b/util/mac/process_reader.h index 85f60adf..56d99954 100644 --- a/util/mac/process_reader.h +++ b/util/mac/process_reader.h @@ -69,8 +69,18 @@ struct ProcessReaderModule { ProcessReaderModule(); ~ProcessReaderModule(); + //! \brief The pathname used to load the module from disk. std::string name; + + //! \brief The address where the base of the module is loaded in the remote + //! process. mach_vm_address_t address; + + //! \brief The module’s timestamp. + //! + //! This field will be `0` if its value cannot be determined. It can only be + //! determined for images that are loaded by dyld, so it will be `0` for the + //! main executable and for dyld itself. time_t timestamp; }; @@ -117,10 +127,13 @@ class ProcessReader { //! \return Accesses the memory of the target task. TaskMemory* Memory() { return task_memory_.get(); } - //! \return The threads that are in the task (process). + //! \return The threads that are in the task (process). The first element (at + //! index `0`) corresponds to the main thread. const std::vector& Threads(); - //! \return The modules loaded in the process. + //! \return The modules loaded in the process. The first element (at index + //! `0`) corresponds to the main executable, and the final element + //! corresponds to the dynamic loader, dyld. const std::vector& Modules(); private: diff --git a/util/mac/process_reader_test.cc b/util/mac/process_reader_test.cc index 10a6352a..d09bc0f1 100644 --- a/util/mac/process_reader_test.cc +++ b/util/mac/process_reader_test.cc @@ -15,22 +15,28 @@ #include "util/mac/process_reader.h" #include +#include +#include #include #include +#include #include #include +#include #include "base/logging.h" #include "base/mac/scoped_mach_port.h" #include "base/posix/eintr_wrapper.h" +#include "base/strings/stringprintf.h" #include "build/build_config.h" #include "gtest/gtest.h" #include "util/file/fd_io.h" #include "util/stdlib/pointer_container.h" +#include "util/test/errors.h" +#include "util/test/mac/dyld.h" #include "util/test/mac/mach_errors.h" #include "util/test/mac/mach_multiprocess.h" -#include "util/test/errors.h" namespace { @@ -84,7 +90,7 @@ class ProcessReaderChild final : public MachMultiprocess { int read_fd = ReadPipeFD(); mach_vm_address_t address; - int rv = ReadFD(read_fd, &address, sizeof(address)); + ssize_t rv = ReadFD(read_fd, &address, sizeof(address)); ASSERT_EQ(static_cast(sizeof(address)), rv) << ErrnoMessage("read"); @@ -105,7 +111,7 @@ class ProcessReaderChild final : public MachMultiprocess { mach_vm_address_t address = reinterpret_cast(kTestMemory); - int rv = WriteFD(write_fd, &address, sizeof(address)); + ssize_t rv = WriteFD(write_fd, &address, sizeof(address)); ASSERT_EQ(static_cast(sizeof(address)), rv) << ErrnoMessage("write"); @@ -448,7 +454,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess { thread_index < thread_count_ + 1; ++thread_index) { uint64_t thread_id; - int rv = ReadFD(read_fd, &thread_id, sizeof(thread_id)); + ssize_t rv = ReadFD(read_fd, &thread_id, sizeof(thread_id)); ASSERT_EQ(static_cast(sizeof(thread_id)), rv) << ErrnoMessage("read"); @@ -481,7 +487,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess { // until the parent finished working with it. int write_fd = WritePipeFD(); char c = '\0'; - int rv = WriteFD(write_fd, &c, 1); + ssize_t rv = WriteFD(write_fd, &c, 1); ASSERT_EQ(1, rv) << ErrnoMessage("write"); } @@ -498,7 +504,7 @@ class ProcessReaderThreadedChild final : public MachMultiprocess { // to inspect it. Write an entry for it. uint64_t thread_id = PthreadToThreadID(pthread_self()); - int rv = WriteFD(write_fd, &thread_id, sizeof(thread_id)); + ssize_t rv = WriteFD(write_fd, &thread_id, sizeof(thread_id)); ASSERT_EQ(static_cast(sizeof(thread_id)), rv) << ErrnoMessage("write"); @@ -567,4 +573,197 @@ TEST(ProcessReader, ChildSeveralThreads) { process_reader_threaded_child.Run(); } +TEST(ProcessReader, SelfModules) { + ProcessReader process_reader; + ASSERT_TRUE(process_reader.Initialize(mach_task_self())); + + uint32_t dyld_image_count = _dyld_image_count(); + const std::vector& modules = process_reader.Modules(); + + // There needs to be at least an entry for the main executable, for a dylib, + // and for dyld. + ASSERT_GE(modules.size(), 3u); + + // dyld_image_count doesn’t include an entry for dyld itself, but |modules| + // does. + ASSERT_EQ(dyld_image_count + 1, modules.size()); + + for (uint32_t index = 0; index < dyld_image_count; ++index) { + SCOPED_TRACE(base::StringPrintf( + "index %u, name %s", index, modules[index].name.c_str())); + + const char* dyld_image_name = _dyld_get_image_name(index); + EXPECT_EQ(dyld_image_name, modules[index].name); + EXPECT_EQ( + reinterpret_cast(_dyld_get_image_header(index)), + modules[index].address); + + if (index == 0) { + // dyld didn’t load the main executable, so it couldn’t record its + // timestamp, and it is reported as 0. + EXPECT_EQ(0, modules[index].timestamp); + } else { + // Hope that the module didn’t change on disk. + struct stat stat_buf; + int rv = stat(dyld_image_name, &stat_buf); + EXPECT_EQ(0, rv) << ErrnoMessage("stat"); + if (rv == 0) { + EXPECT_EQ(stat_buf.st_mtime, modules[index].timestamp); + } + } + } + + size_t index = modules.size() - 1; + EXPECT_EQ("/usr/lib/dyld", modules[index].name); + + // dyld didn’t load itself either, so it couldn’t record its timestamp, and it + // is also reported as 0. + EXPECT_EQ(0, modules[index].timestamp); + + const struct dyld_all_image_infos* dyld_image_infos = + _dyld_get_all_image_infos(); + if (dyld_image_infos->version >= 2) { + EXPECT_EQ(reinterpret_cast( + dyld_image_infos->dyldImageLoadAddress), modules[index].address); + } +} + +class ProcessReaderModulesChild final : public MachMultiprocess { + public: + ProcessReaderModulesChild() : MachMultiprocess() {} + + ~ProcessReaderModulesChild() {} + + private: + void MachMultiprocessParent() override { + ProcessReader process_reader; + ASSERT_TRUE(process_reader.Initialize(ChildTask())); + + const std::vector& modules = process_reader.Modules(); + + // There needs to be at least an entry for the main executable, for a dylib, + // and for dyld. + ASSERT_GE(modules.size(), 3u); + + int read_fd = ReadPipeFD(); + + uint32_t expect_modules; + ssize_t rv = ReadFD(read_fd, &expect_modules, sizeof(expect_modules)); + ASSERT_EQ(static_cast(sizeof(expect_modules)), rv) + << ErrnoMessage("read"); + + ASSERT_EQ(expect_modules, modules.size()); + + for (size_t index = 0; index < modules.size(); ++index) { + SCOPED_TRACE(base::StringPrintf( + "index %zu, name %s", index, modules[index].name.c_str())); + + uint32_t expect_name_length; + rv = ReadFD( + read_fd, &expect_name_length, sizeof(expect_name_length)); + ASSERT_EQ(static_cast(sizeof(expect_name_length)), rv) + << ErrnoMessage("read"); + + // The NUL terminator is not read. + std::string expect_name(expect_name_length, '\0'); + rv = ReadFD(read_fd, &expect_name[0], expect_name_length); + ASSERT_EQ(static_cast(expect_name_length), rv) + << ErrnoMessage("read"); + + EXPECT_EQ(expect_name, modules[index].name); + + mach_vm_address_t expect_address; + rv = ReadFD(read_fd, &expect_address, sizeof(expect_address)); + ASSERT_EQ(static_cast(sizeof(expect_address)), rv) + << ErrnoMessage("read"); + + EXPECT_EQ(expect_address, modules[index].address); + + if (index == 0 || index == modules.size() - 1) { + // dyld didn’t load the main executable or itself, so it couldn’t record + // these timestamps, and they are reported as 0. + EXPECT_EQ(0, modules[index].timestamp); + } else { + // Hope that the module didn’t change on disk. + struct stat stat_buf; + int rv = stat(expect_name.c_str(), &stat_buf); + EXPECT_EQ(0, rv) << ErrnoMessage("stat"); + if (rv == 0) { + EXPECT_EQ(stat_buf.st_mtime, modules[index].timestamp); + } + } + } + + // Tell the child that it’s OK to exit. The child needed to be kept alive + // until the parent finished working with it. + int write_fd = WritePipeFD(); + char c = '\0'; + rv = WriteFD(write_fd, &c, 1); + ASSERT_EQ(1, rv) << ErrnoMessage("write"); + } + + void MachMultiprocessChild() override { + int write_fd = WritePipeFD(); + + uint32_t dyld_image_count = _dyld_image_count(); + const struct dyld_all_image_infos* dyld_image_infos = + _dyld_get_all_image_infos(); + + uint32_t write_image_count = dyld_image_count; + if (dyld_image_infos->version >= 2) { + // dyld_image_count doesn’t include an entry for dyld itself, but one will + // be written. + ++write_image_count; + } + + ssize_t rv = WriteFD( + write_fd, &write_image_count, sizeof(write_image_count)); + ASSERT_EQ(static_cast(sizeof(write_image_count)), rv) + << ErrnoMessage("write"); + + for (size_t index = 0; index < write_image_count; ++index) { + const char* dyld_image_name; + mach_vm_address_t dyld_image_address; + + if (index < dyld_image_count) { + dyld_image_name = _dyld_get_image_name(index); + dyld_image_address = + reinterpret_cast(_dyld_get_image_header(index)); + } else { + dyld_image_name = "/usr/lib/dyld"; + dyld_image_address = reinterpret_cast( + dyld_image_infos->dyldImageLoadAddress); + } + + uint32_t dyld_image_name_length = strlen(dyld_image_name); + rv = WriteFD( + write_fd, &dyld_image_name_length, sizeof(dyld_image_name_length)); + ASSERT_EQ(static_cast(sizeof(dyld_image_name_length)), rv) + << ErrnoMessage("write"); + + // The NUL terminator is not written. + rv = WriteFD(write_fd, dyld_image_name, dyld_image_name_length); + ASSERT_EQ(static_cast(dyld_image_name_length), rv) + << ErrnoMessage("write"); + + rv = WriteFD(write_fd, &dyld_image_address, sizeof(dyld_image_address)); + ASSERT_EQ(static_cast(sizeof(dyld_image_address)), rv) + << ErrnoMessage("write"); + } + + // Wait for the parent to say that it’s OK to exit. + int read_fd = ReadPipeFD(); + char c; + rv = ReadFD(read_fd, &c, 1); + ASSERT_EQ(1, rv) << ErrnoMessage("read"); + } + + DISALLOW_COPY_AND_ASSIGN(ProcessReaderModulesChild); +}; + +TEST(ProcessReader, ChildModules) { + ProcessReaderModulesChild process_reader_modules_child; + process_reader_modules_child.Run(); +} + } // namespace