preload symbol names for each loaded, non library module

This commit is contained in:
2025-03-21 00:32:22 +02:00
parent 5bcff694f7
commit 7ecb0b7936
4 changed files with 72 additions and 61 deletions

View File

@@ -38,6 +38,9 @@ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=lld")
# set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") # set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
# Enable address sanitizer (optional) # Enable address sanitizer (optional)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")

View File

@@ -1,7 +1,6 @@
#include "hiload/hiload.h" #include "hiload/hiload.h"
#include "logger.h" #include "logger.h"
#include "logger/sc_log.h"
#include "memory.h" #include "memory.h"
#include "symbols.h" #include "symbols.h"
#include "types.h" #include "types.h"
@@ -17,7 +16,7 @@ typedef struct {
struct sc_array_str names; // Array of library names struct sc_array_str names; // Array of library names
struct sc_array_ptr handles; // Array of library handles struct sc_array_ptr handles; // Array of library handles
struct sc_array_syms symbols; // Symbol info for modules struct sc_array_syms symbols; // Symbol info for modules
size_t count; // Number of libraries size_t count; // Number of modules
} ModuleInfos; } ModuleInfos;
typedef struct { typedef struct {
@@ -27,6 +26,22 @@ typedef struct {
static HiloadContext context = {0}; static HiloadContext context = {0};
static ModuleInfos *module_infos = 0; static ModuleInfos *module_infos = 0;
// if the pathname contains these, skip them from early gathering
// as they are unlikely to be changed, and would bloat our memory
const char *path_filter_list[] = {"libstdc++.", "libc++.", "libc.", "libm.",
"libgcc", "ld-linux-", NULL};
static inline int if_load_symbols_for_module(struct dl_phdr_info *info) {
const char *name = info->dlpi_name;
for (int i = 0; path_filter_list[i] != NULL; i++) {
if (strstr(name, path_filter_list[i]) != NULL) {
return 0;
}
}
return 1;
}
// Callback function for dl_iterate_phdr // Callback function for dl_iterate_phdr
static int gather_module_infos_callback(struct dl_phdr_info *info, size_t size, static int gather_module_infos_callback(struct dl_phdr_info *info, size_t size,
void *data) { void *data) {
@@ -40,23 +55,29 @@ static int gather_module_infos_callback(struct dl_phdr_info *info, size_t size,
// Try to get the handle // Try to get the handle
void *handle = dlopen(info->dlpi_name, RTLD_LAZY | RTLD_NOLOAD); void *handle = dlopen(info->dlpi_name, RTLD_LAZY | RTLD_NOLOAD);
sc_array_add(&infos->handles, handle); assert(handle);
infos->count++;
sc_array_add(&infos->handles, handle);
sc_log_debug(" size: %u\n", size); sc_log_debug(" size: %u\n", size);
sc_log_debug(" handle: %p\n", sc_array_last(&infos->handles)); sc_log_debug(" handle: %p\n", sc_array_last(&infos->handles));
sc_log_debug(" dlpi_addr: %p\n", info->dlpi_addr); sc_log_debug(" dlpi_addr: %p\n", info->dlpi_addr);
sc_log_debug(" dlpi_tls_modid: %zu\n", info->dlpi_tls_modid); sc_log_debug(" dlpi_tls_modid: %zu\n", info->dlpi_tls_modid);
sc_log_debug(" dlpi_tls_data: %p\n", info->dlpi_tls_data); sc_log_debug(" dlpi_tls_data: %p\n", info->dlpi_tls_data);
SymbolInfos symbol_info = {0}; SymbolInfos symbol_info = {0};
sc_array_add(&infos->symbols, symbol_info); sc_array_add(&infos->symbols, symbol_info);
if (!if_load_symbols_for_module(info)) {
sc_log_info("Skipping symbol preload for: %s\n", info->dlpi_name);
return 0;
}
if (hi_create_symbol_info(&(sc_array_last(&infos->symbols)), if (hi_create_symbol_info(&(sc_array_last(&infos->symbols)),
&context.memory_regions, info) != HILOAD_OK) { &context.memory_regions, info) != HILOAD_OK) {
sc_log_error("Failed to create symbol info for %s\n", sc_log_error("Failed to create symbol info for %s\n", info->dlpi_name);
info->dlpi_name);
} }
infos->count++;
return 0; // Continue iteration return 0; // Continue iteration
} }
@@ -67,9 +88,7 @@ static void free_module_infos(ModuleInfos *modules) {
for (size_t i = 0; i < modules->count; i++) { for (size_t i = 0; i < modules->count; i++) {
// Free char* before clearing the array // Free char* before clearing the array
const char *n = 0; const char *n = 0;
sc_array_foreach(&modules->names, n) { sc_array_foreach(&modules->names, n) { free((void *)n); }
free((void*)n);
}
sc_array_term(&modules->names); sc_array_term(&modules->names);
// Use a destructor for the symbolinfos // Use a destructor for the symbolinfos
@@ -167,34 +186,12 @@ static ReloadResult reload_module(ModuleInfos *modules, const char *filename,
return HI_RELOAD_SUCCESS; return HI_RELOAD_SUCCESS;
} }
/**
* Helper function to print the result of a module reload
*/
static void print_reload_result(ReloadResult result, const char *filename) {
switch (result) {
case HI_RELOAD_SUCCESS:
printf("Successfully reloaded module: %s\n", filename);
break;
case HI_RELOAD_NOT_FOUND:
printf("Module not found: %s\n", filename);
break;
case HI_RELOAD_CLOSE_ERROR:
printf("Error closing module: %s\n", filename);
break;
case HI_RELOAD_OPEN_ERROR:
printf("Error reopening module: %s\n", filename);
break;
default:
printf("Unknown error reloading module: %s\n", filename);
}
}
ReloadResult hi_reload_module(const char *module_name) { ReloadResult hi_reload_module(const char *module_name) {
assert(module_infos); assert(module_infos);
void *new_handle = NULL; void *new_handle = NULL;
ReloadResult result = reload_module(module_infos, module_name, &new_handle); ReloadResult result = reload_module(module_infos, module_name, &new_handle);
print_reload_result(result, module_name);
return result; return result;
} }
@@ -215,8 +212,8 @@ void hi_print_module_infos() {
Dl_info info = {0}; Dl_info info = {0};
int has_info = 0; int has_info = 0;
sc_log_debug("%s: %p\n", sc_array_at(&modules->names, i), sc_array_at(&modules->handles, i)); sc_log_debug("'%s': %p\n", sc_array_at(&modules->names, i),
sc_array_at(&modules->handles, i));
const SymbolInfos *symbols = &sc_array_at(&modules->symbols, i); const SymbolInfos *symbols = &sc_array_at(&modules->symbols, i);
for (int j = 0; j < sc_array_size(&symbols->names); j++) { for (int j = 0; j < sc_array_size(&symbols->names); j++) {
@@ -225,7 +222,6 @@ void hi_print_module_infos() {
sc_log_debug(" %p: %s\n", addr, name); sc_log_debug(" %p: %s\n", addr, name);
} }
sc_log_debug("\n"); sc_log_debug("\n");
} }
} }

View File

@@ -14,6 +14,9 @@ HiloadResult memory_find_pointer(uptr ptr,
size_t *index) { size_t *index) {
for (size_t i = 0; i < sc_array_size(regions); i++) { for (size_t i = 0; i < sc_array_size(regions); i++) {
uptr start = regions->elems[i].region_start; uptr start = regions->elems[i].region_start;
// we assume a sorted region by start address, so we can do a quick discard here.
// very useful for relative vs absolute address checks
if (ptr < start) return HILOAD_FAIL;
uptr end = regions->elems[i].region_end; uptr end = regions->elems[i].region_end;
if (ptr_in_range(ptr, start, end)) { if (ptr_in_range(ptr, start, end)) {
if (index) if (index)

View File

@@ -22,9 +22,6 @@ add_ptr_offset_if_invalid(uptr p, uptr offset,
/** /**
* Gathers and populates symbols, given a dynamic module info * Gathers and populates symbols, given a dynamic module info
*
* Will clear and free the given SymbolInfo struct. Allocates enough memory to
* hold found symbols.
*/ */
HiloadResult hi_create_symbol_info(SymbolInfos *symbols, HiloadResult hi_create_symbol_info(SymbolInfos *symbols,
struct sc_array_memreg *const regions, struct sc_array_memreg *const regions,
@@ -35,6 +32,10 @@ HiloadResult hi_create_symbol_info(SymbolInfos *symbols,
hi_free_symbol_info(symbols); hi_free_symbol_info(symbols);
for (int i = 0; i < info->dlpi_phnum; i++) {
const ElfW(Phdr) *phdr = &info->dlpi_phdr[i];
}
for (int i = 0; i < info->dlpi_phnum; i++) { for (int i = 0; i < info->dlpi_phnum; i++) {
const ElfW(Phdr) *phdr = &info->dlpi_phdr[i]; const ElfW(Phdr) *phdr = &info->dlpi_phdr[i];
@@ -55,13 +56,12 @@ HiloadResult hi_create_symbol_info(SymbolInfos *symbols,
const ElfW(Dyn) *dyn = (const ElfW(Dyn) *)(info->dlpi_addr + phdr->p_vaddr); const ElfW(Dyn) *dyn = (const ElfW(Dyn) *)(info->dlpi_addr + phdr->p_vaddr);
const char *strtab = NULL; const char *strtab = NULL;
const ElfW(Sym) *symtab = NULL; const ElfW(Sym) *symtab = NULL;
size_t symtab_size = 0;
size_t strtab_size = 0; size_t strtab_size = 0;
uptr off = info->dlpi_addr; uptr off = info->dlpi_addr;
// Parse the dynamic table. Add offset if address is not in executable memory. // Parse the dynamic table. Add offset if address is not in executable
// NOTE: Haven't found a better way to differentiate with items that have // memory. NOTE: Haven't found a better way to differentiate with items that
// relative address, and items that don't. // have relative address, and items that don't.
for (; dyn->d_tag != DT_NULL; dyn++) { for (; dyn->d_tag != DT_NULL; dyn++) {
if (dyn->d_tag == DT_STRTAB) { if (dyn->d_tag == DT_STRTAB) {
@@ -76,34 +76,43 @@ HiloadResult hi_create_symbol_info(SymbolInfos *symbols,
uptr p = dyn->d_un.d_ptr; uptr p = dyn->d_un.d_ptr;
p = add_ptr_offset_if_invalid(p, off, regions); p = add_ptr_offset_if_invalid(p, off, regions);
symtab = (const ElfW(Sym) *)(p); symtab = (const ElfW(Sym) *)(p);
} else if (dyn->d_tag == DT_SYMENT) {
symtab_size = dyn->d_un.d_val;
} }
} }
// Ensure we found the symbol and string tables // Ensure we found the symbol and string tables
if (!strtab || !symtab || strtab_size == 0 || symtab_size == 0) { if (!strtab || !symtab || strtab_size == 0) {
sc_log_error("Failed to find symbol or string table in %s\n", sc_log_error("Failed to find symbol or string table in %s\n",
info->dlpi_name); info->dlpi_name);
return HILOAD_FAIL; return HILOAD_FAIL;
} }
// Iterate over the symbol table // Iterate over the symbol table
for (const ElfW(Sym) *sym = symtab; // We assume the dynsym and dynstr are linked
(const char *)sym < (const char *)symtab + symtab_size; sym++) { const char *strptr = strtab;
for (const ElfW(Sym) *sym = symtab;; sym++) {
// escape if we'd hit the end of string table
strptr = strtab + sym->st_name;
if (strptr + strlen(strptr) + 1 >= strtab + strtab_size)
break;
if (ELF64_ST_TYPE(sym->st_info) == STT_FUNC || if (ELF64_ST_TYPE(sym->st_info) == STT_FUNC ||
ELF64_ST_TYPE(sym->st_info) == STT_OBJECT) { ELF64_ST_TYPE(sym->st_info) == STT_OBJECT) {
const char *name = strdup(&strtab[sym->st_name]); const char *name = strdup(strtab + sym->st_name);
void *address = (void *)(info->dlpi_addr + sym->st_value); void *address = (void *)(info->dlpi_addr + sym->st_value);
// Store the symbol information in the arrays // Store the symbol information in the arrays
sc_array_add(&symbols->names, name); sc_array_add(&symbols->names, name);
sc_array_add(&symbols->addresses, address); sc_array_add(&symbols->addresses, address);
sc_log_debug("%s: %p\n", name, address);
} }
} }
} }
sc_log_debug("Symbols for %s gathered...\n", info->dlpi_name);
return HILOAD_OK; return HILOAD_OK;
} }