Commit ec17b702 authored by Hisham Muhammad's avatar Hisham Muhammad
Browse files

Convert affinity control from the deprecated PLPA to HWLOC

parent d07b043e
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2010 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
/* Detect topology change: registering for power management changes and check
* if for example hw.activecpu changed */
/* Apparently, Darwin people do not _want_ to provide binding functions. */
#include <private/autogen/config.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <stdlib.h>
#include <inttypes.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
void
hwloc_look_darwin(struct hwloc_topology *topology)
{
int64_t _nprocs;
unsigned nprocs;
int64_t _npackages;
unsigned i, j, cpu;
struct hwloc_obj *obj;
size_t size;
int64_t l1cachesize;
int64_t l2cachesize;
int64_t cachelinesize;
int64_t memsize;
if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
return;
nprocs = _nprocs;
topology->support.discovery->pu = 1;
hwloc_debug("%u procs\n", nprocs);
if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
unsigned npackages = _npackages;
int64_t _cores_per_package;
int64_t _logical_per_package;
unsigned logical_per_package;
hwloc_debug("%u packages\n", npackages);
if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
logical_per_package = _logical_per_package;
else
/* Assume the trivia. */
logical_per_package = nprocs / npackages;
hwloc_debug("%u threads per package\n", logical_per_package);
if (nprocs == npackages * logical_per_package)
for (i = 0; i < npackages; i++) {
obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, i);
obj->cpuset = hwloc_bitmap_alloc();
for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
hwloc_bitmap_set(obj->cpuset, cpu);
hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
i, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
}
if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) {
unsigned cores_per_package = _cores_per_package;
hwloc_debug("%u cores per package\n", cores_per_package);
if (!(logical_per_package % cores_per_package))
for (i = 0; i < npackages * cores_per_package; i++) {
obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
obj->cpuset = hwloc_bitmap_alloc();
for (cpu = i*(logical_per_package/cores_per_package);
cpu < (i+1)*(logical_per_package/cores_per_package);
cpu++)
hwloc_bitmap_set(obj->cpuset, cpu);
hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
i, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
}
}
}
if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1cachesize))
l1cachesize = 0;
if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
l2cachesize = 0;
if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
cachelinesize = 0;
if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
memsize = 0;
if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
unsigned n = size / sizeof(uint32_t);
uint64_t *cacheconfig = NULL;
uint64_t *cachesize = NULL;
uint32_t *cacheconfig32 = NULL;
cacheconfig = malloc(sizeof(uint64_t) * n);
if (NULL == cacheconfig) {
goto out;
}
cachesize = malloc(sizeof(uint64_t) * n);
if (NULL == cachesize) {
goto out;
}
cacheconfig32 = malloc(sizeof(uint32_t) * n);
if (NULL == cacheconfig32) {
goto out;
}
if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
/* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
* cacheconfig, with apparently no way for detection. Assume the machine
* won't have more than 4 billion cpus */
if (cacheconfig[0] > 0xFFFFFFFFUL) {
memcpy(cacheconfig32, cacheconfig, size);
for (i = 0 ; i < size / sizeof(uint32_t); i++)
cacheconfig[i] = cacheconfig32[i];
}
memset(cachesize, 0, sizeof(uint64_t) * n);
size = sizeof(uint64_t) * n;
if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
if (n > 0)
cachesize[0] = memsize;
if (n > 1)
cachesize[1] = l1cachesize;
if (n > 2)
cachesize[2] = l2cachesize;
}
hwloc_debug("%s", "caches");
for (i = 0; i < n && cacheconfig[i]; i++)
hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);
cacheconfig[i] = cacheconfig32[i];
/* Now we know how many caches there are */
n = i;
hwloc_debug("\n%u cache levels\n", n - 1);
/* For each cache level (0 is memory) */
for (i = 0; i < n; i++) {
/* cacheconfig tells us how many cpus share it, let's iterate on each cache */
for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
obj = hwloc_alloc_setup_object(i?HWLOC_OBJ_CACHE:HWLOC_OBJ_NODE, j);
if (!i) {
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, j);
}
obj->cpuset = hwloc_bitmap_alloc();
for (cpu = j*cacheconfig[i];
cpu < ((j+1)*cacheconfig[i]);
cpu++)
hwloc_bitmap_set(obj->cpuset, cpu);
if (i) {
hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
i, j, obj->cpuset);
obj->attr->cache.depth = i;
obj->attr->cache.size = cachesize[i];
obj->attr->cache.linesize = cachelinesize;
} else {
hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
j, obj->cpuset);
obj->memory.local_memory = cachesize[i];
obj->memory.page_types_len = 2;
obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
obj->memory.page_types[0].size = getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
}
hwloc_insert_object_by_cpuset(topology, obj);
}
}
}
out:
if (NULL != cacheconfig) {
free(cacheconfig);
}
if (NULL != cachesize) {
free(cachesize);
}
if (NULL != cacheconfig32) {
free(cacheconfig32);
}
}
/* add PU objects */
hwloc_setup_pu_level(topology, nprocs);
hwloc_add_object_info(topology->levels[0][0], "Backend", "Darwin");
}
void
hwloc_set_darwin_hooks(struct hwloc_topology *topology __hwloc_attribute_unused)
{
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2010 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <sys/types.h>
#include <stdlib.h>
#include <inttypes.h>
#include <sys/param.h>
#include <pthread.h>
#ifdef HAVE_PTHREAD_NP_H
#include <pthread_np.h>
#endif
#ifdef HAVE_SYS_CPUSET_H
#include <sys/cpuset.h>
#endif
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#ifdef HAVE_SYS_CPUSET_H
static void
hwloc_freebsd_bsd2hwloc(hwloc_bitmap_t hwloc_cpuset, const cpuset_t *cpuset)
{
unsigned cpu;
hwloc_bitmap_zero(hwloc_cpuset);
for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
if (CPU_ISSET(cpu, cpuset))
hwloc_bitmap_set(hwloc_cpuset, cpu);
}
static void
hwloc_freebsd_hwloc2bsd(hwloc_const_bitmap_t hwloc_cpuset, cpuset_t *cpuset)
{
unsigned cpu;
CPU_ZERO(cpuset);
for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
if (hwloc_bitmap_isset(hwloc_cpuset, cpu))
CPU_SET(cpu, cpuset);
}
static int
hwloc_freebsd_set_sth_affinity(hwloc_topology_t topology __hwloc_attribute_unused, cpulevel_t level, cpuwhich_t which, id_t id, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
{
cpuset_t cpuset;
hwloc_freebsd_hwloc2bsd(hwloc_cpuset, &cpuset);
if (cpuset_setaffinity(level, which, id, sizeof(cpuset), &cpuset))
return -1;
return 0;
}
static int
hwloc_freebsd_get_sth_affinity(hwloc_topology_t topology __hwloc_attribute_unused, cpulevel_t level, cpuwhich_t which, id_t id, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
{
cpuset_t cpuset;
if (cpuset_getaffinity(level, which, id, sizeof(cpuset), &cpuset))
return -1;
hwloc_freebsd_bsd2hwloc(hwloc_cpuset, &cpuset);
return 0;
}
static int
hwloc_freebsd_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, hwloc_cpuset, flags);
}
static int
hwloc_freebsd_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, hwloc_cpuset, flags);
}
static int
hwloc_freebsd_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, hwloc_cpuset, flags);
}
static int
hwloc_freebsd_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, hwloc_cpuset, flags);
}
static int
hwloc_freebsd_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, pid, hwloc_cpuset, flags);
}
static int
hwloc_freebsd_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, pid, hwloc_cpuset, flags);
}
#ifdef hwloc_thread_t
#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
#pragma weak pthread_setaffinity_np
static int
hwloc_freebsd_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
{
int err;
cpuset_t cpuset;
if (!pthread_setaffinity_np) {
errno = ENOSYS;
return -1;
}
hwloc_freebsd_hwloc2bsd(hwloc_cpuset, &cpuset);
err = pthread_setaffinity_np(tid, sizeof(cpuset), &cpuset);
if (err) {
errno = err;
return -1;
}
return 0;
}
#endif
#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
#pragma weak pthread_getaffinity_np
static int
hwloc_freebsd_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
{
int err;
cpuset_t cpuset;
if (!pthread_getaffinity_np) {
errno = ENOSYS;
return -1;
}
err = pthread_getaffinity_np(tid, sizeof(cpuset), &cpuset);
if (err) {
errno = err;
return -1;
}
hwloc_freebsd_bsd2hwloc(hwloc_cpuset, &cpuset);
return 0;
}
#endif
#endif
#endif
void
hwloc_look_freebsd(struct hwloc_topology *topology)
{
unsigned nbprocs = hwloc_fallback_nbprocessors(topology);
#ifdef HAVE__SC_LARGE_PAGESIZE
topology->levels[0][0]->attr->machine.huge_page_size_kB = sysconf(_SC_LARGE_PAGESIZE);
#endif
hwloc_set_freebsd_hooks(topology);
hwloc_look_x86(topology, nbprocs);
hwloc_setup_pu_level(topology, nbprocs);
hwloc_add_object_info(topology->levels[0][0], "Backend", "FreeBSD");
}
void
hwloc_set_freebsd_hooks(struct hwloc_topology *topology)
{
#ifdef HAVE_SYS_CPUSET_H
topology->set_thisproc_cpubind = hwloc_freebsd_set_thisproc_cpubind;
topology->get_thisproc_cpubind = hwloc_freebsd_get_thisproc_cpubind;
topology->set_thisthread_cpubind = hwloc_freebsd_set_thisthread_cpubind;
topology->get_thisthread_cpubind = hwloc_freebsd_get_thisthread_cpubind;
topology->set_proc_cpubind = hwloc_freebsd_set_proc_cpubind;
topology->get_proc_cpubind = hwloc_freebsd_get_proc_cpubind;
#ifdef hwloc_thread_t
#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
topology->set_thread_cpubind = hwloc_freebsd_set_thread_cpubind;
#endif
#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
topology->get_thread_cpubind = hwloc_freebsd_get_thread_cpubind;
#endif
#endif
#endif
/* TODO: get_last_cpu_location: find out ki_lastcpu */
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2010 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
/* TODO: psets? (Only for root)
* since 11i 1.6:
_SC_PSET_SUPPORT
pset_create/destroy/assign/setattr
pset_ctl/getattr
pset_bind()
pthread_pset_bind_np()
*/
#include <private/autogen/config.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <sys/mpctl.h>
#include <sys/mman.h>
#include <pthread.h>
static ldom_t
hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set)
{
int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
hwloc_obj_t obj;
if (!has_numa)
return -1;
obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set);
if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set) || obj->type != HWLOC_OBJ_NODE) {
/* Does not correspond to exactly one node */
return -1;
}
return obj->os_index;
}
static spu_t
hwloc_hpux_find_spu(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set)
{
spu_t cpu;
cpu = hwloc_bitmap_first(hwloc_set);
if (cpu != -1 && hwloc_bitmap_weight(hwloc_set) == 1)
return cpu;
return -1;
}
/* Note: get_cpubind not available on HP-UX */
static int
hwloc_hpux_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
{
ldom_t ldom;
spu_t cpu;
/* Drop previous binding */
mpctl(MPC_SETLDOM, MPC_LDOMFLOAT, pid);
mpctl(MPC_SETPROCESS, MPC_SPUFLOAT, pid);
if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology)))
return 0;
ldom = hwloc_hpux_find_ldom(topology, hwloc_set);
if (ldom != -1)
return mpctl(MPC_SETLDOM, ldom, pid);
cpu = hwloc_hpux_find_spu(topology, hwloc_set);
if (cpu != -1)
return mpctl(flags & HWLOC_CPUBIND_STRICT ? MPC_SETPROCESS_FORCE : MPC_SETPROCESS, cpu, pid);
errno = EXDEV;
return -1;
}
static int
hwloc_hpux_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_hpux_set_proc_cpubind(topology, MPC_SELFPID, hwloc_set, flags);
}
#ifdef hwloc_thread_t
static int
hwloc_hpux_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_const_bitmap_t hwloc_set, int flags)
{
ldom_t ldom, ldom2;
spu_t cpu, cpu2;
/* Drop previous binding */
pthread_ldom_bind_np(&ldom2, PTHREAD_LDOMFLOAT_NP, pthread);
pthread_processor_bind_np(PTHREAD_BIND_ADVISORY_NP, &cpu2, PTHREAD_SPUFLOAT_NP, pthread);
if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology)))
return 0;
ldom = hwloc_hpux_find_ldom(topology, hwloc_set);
if (ldom != -1)
return pthread_ldom_bind_np(&ldom2, ldom, pthread);
cpu = hwloc_hpux_find_spu(topology, hwloc_set);
if (cpu != -1)
return pthread_processor_bind_np(flags & HWLOC_CPUBIND_STRICT ? PTHREAD_BIND_FORCED_NP : PTHREAD_BIND_ADVISORY_NP, &cpu2, cpu, pthread);
errno = EXDEV;
return -1;
}
static int
hwloc_hpux_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_hpux_set_thread_cpubind(topology, PTHREAD_SELFTID_NP, hwloc_set, flags);
}
#endif
/* According to HP docs, HP-UX up to 11iv2 don't support migration */
#ifdef MAP_MEM_FIRST_TOUCH
static void*
hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int mmap_flags;
/* Can not give a set of nodes. */
if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) {
errno = EXDEV;
return hwloc_alloc_or_fail(topology, len, flags);
}
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
mmap_flags = 0;
break;
case HWLOC_MEMBIND_FIRSTTOUCH:
mmap_flags = MAP_MEM_FIRST_TOUCH;
break;
case HWLOC_MEMBIND_INTERLEAVE:
mmap_flags = MAP_MEM_INTERLEAVED;
break;
default:
errno = ENOSYS;
return NULL;
}
return mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0);
}
#endif /* MAP_MEM_FIRST_TOUCH */
void
hwloc_look_hpux(struct hwloc_topology *topology)
{
int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
hwloc_obj_t *nodes = NULL, obj;
spu_t currentcpu;
ldom_t currentnode;
int i, nbnodes = 0;
#ifdef HAVE__SC_LARGE_PAGESIZE
topology->levels[0][0]->attr->machine.huge_page_size_kB = sysconf(_SC_LARGE_PAGESIZE);
#endif
if (has_numa) {
nbnodes = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0);
hwloc_debug("%d nodes\n", nbnodes);
nodes = malloc(nbnodes * sizeof(*nodes));
i = 0;
currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0);
while (currentnode != -1 && i < nbnodes) {
hwloc_debug("node %d is %d\n", i, currentnode);
nodes[i] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, currentnode);
obj->cpuset = hwloc_bitmap_alloc();
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, currentnode);
/* TODO: obj->attr->node.memory_kB */
/* TODO: obj->attr->node.huge_page_free */
currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0);
i++;
}
}
i = 0;
currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0);
while (currentcpu != -1) {
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, currentcpu);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->cpuset, currentcpu);
hwloc_debug("cpu %d\n", currentcpu);
if (nodes) {
/* Add this cpu to its node */
currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0);
if ((ldom_t) nodes[i]->os_index != currentnode)
for (i = 0; i < nbnodes; i++)
if ((ldom_t) nodes[i]->os_index == currentnode)
break;
if (i < nbnodes) {
hwloc_bitmap_set(nodes[i]->cpuset, currentcpu);
hwloc_debug("is in node %d\n", i);
} else {
hwloc_debug("%s", "is in no node?!\n");
}
}
/* Add cpu */
hwloc_insert_object_by_cpuset(topology, obj);
currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0);
}
if (nodes) {
/* Add nodes */
for (i = 0 ; i < nbnodes ; i++)
hwloc_insert_object_by_cpuset(topology, nodes[i]);
free(nodes);
}
topology->support.discovery->pu = 1;
hwloc_add_object_info(topology->levels[0][0], "Backend", "HP-UX");
}
void
hwloc_set_hpux_hooks(struct hwloc_topology *topology)
{
topology->set_proc_cpubind = hwloc_hpux_set_proc_cpubind;
topology->set_thisproc_cpubind = hwloc_hpux_set_thisproc_cpubind;
#ifdef hwloc_thread_t
topology->set_thread_cpubind = hwloc_hpux_set_thread_cpubind;
topology->set_thisthread_cpubind = hwloc_hpux_set_thisthread_cpubind;
#endif
#ifdef MAP_MEM_FIRST_TOUCH
topology->alloc_membind = hwloc_hpux_alloc_membind;
topology->alloc = hwloc_alloc_mmap;
topology->free_membind = hwloc_free_mmap;
topology->support.membind->firsttouch_membind = 1;
topology->support.membind->bind_membind = 1;
topology->support.membind->interleave_membind = 1;
#endif /* MAP_MEM_FIRST_TOUCH */
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* Copyright © 2010 IBM
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <hwloc/linux.h>
#include <private/misc.h>
#include <private/private.h>
#include <private/misc.h>
#include <private/debug.h>
#include <limits.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sched.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND
#define migratepages migrate_pages /* workaround broken migratepages prototype in numaif.h before libnuma 2.0.2 */
#include <numaif.h>
#endif
#if !defined(HWLOC_HAVE_CPU_SET) && !(defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)) && defined(HWLOC_HAVE__SYSCALL3)
/* libc doesn't have support for sched_setaffinity, build system call
* ourselves: */
# include <linux/unistd.h>
# ifndef __NR_sched_setaffinity
# ifdef __i386__
# define __NR_sched_setaffinity 241
# elif defined(__x86_64__)
# define __NR_sched_setaffinity 203
# elif defined(__ia64__)
# define __NR_sched_setaffinity 1231
# elif defined(__hppa__)
# define __NR_sched_setaffinity 211
# elif defined(__alpha__)
# define __NR_sched_setaffinity 395
# elif defined(__s390__)
# define __NR_sched_setaffinity 239
# elif defined(__sparc__)
# define __NR_sched_setaffinity 261
# elif defined(__m68k__)
# define __NR_sched_setaffinity 311
# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
# define __NR_sched_setaffinity 222
# elif defined(__arm__)
# define __NR_sched_setaffinity 241
# elif defined(__cris__)
# define __NR_sched_setaffinity 241
/*# elif defined(__mips__)
# define __NR_sched_setaffinity TODO (32/64/nabi) */
# else
# warning "don't know the syscall number for sched_setaffinity on this architecture, will not support binding"
# define sched_setaffinity(pid, lg, mask) (errno = ENOSYS, -1)
# endif
# endif
# ifndef sched_setaffinity
_syscall3(int, sched_setaffinity, pid_t, pid, unsigned int, lg, const void *, mask)
# endif
# ifndef __NR_sched_getaffinity
# ifdef __i386__
# define __NR_sched_getaffinity 242
# elif defined(__x86_64__)
# define __NR_sched_getaffinity 204
# elif defined(__ia64__)
# define __NR_sched_getaffinity 1232
# elif defined(__hppa__)
# define __NR_sched_getaffinity 212
# elif defined(__alpha__)
# define __NR_sched_getaffinity 396
# elif defined(__s390__)
# define __NR_sched_getaffinity 240
# elif defined(__sparc__)
# define __NR_sched_getaffinity 260
# elif defined(__m68k__)
# define __NR_sched_getaffinity 312
# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
# define __NR_sched_getaffinity 223
# elif defined(__arm__)
# define __NR_sched_getaffinity 242
# elif defined(__cris__)
# define __NR_sched_getaffinity 242
/*# elif defined(__mips__)
# define __NR_sched_getaffinity TODO (32/64/nabi) */
# else
# warning "don't know the syscall number for sched_getaffinity on this architecture, will not support getting binding"
# define sched_getaffinity(pid, lg, mask) (errno = ENOSYS, -1)
# endif
# endif
# ifndef sched_getaffinity
_syscall3(int, sched_getaffinity, pid_t, pid, unsigned int, lg, void *, mask)
# endif
#endif
/* Added for ntohl() */
#include <arpa/inet.h>
#ifdef HAVE_OPENAT
/* Use our own filesystem functions if we have openat */
static const char *
hwloc_checkat(const char *path, int fsroot_fd)
{
const char *relative_path;
if (fsroot_fd < 0) {
errno = EBADF;
return NULL;
}
/* Skip leading slashes. */
for (relative_path = path; *relative_path == '/'; relative_path++);
return relative_path;
}
static int
hwloc_openat(const char *path, int fsroot_fd)
{
const char *relative_path;
relative_path = hwloc_checkat(path, fsroot_fd);
if (!relative_path)
return -1;
return openat (fsroot_fd, relative_path, O_RDONLY);
}
static FILE *
hwloc_fopenat(const char *path, const char *mode, int fsroot_fd)
{
int fd;
if (strcmp(mode, "r")) {
errno = ENOTSUP;
return NULL;
}
fd = hwloc_openat (path, fsroot_fd);
if (fd == -1)
return NULL;
return fdopen(fd, mode);
}
static int
hwloc_accessat(const char *path, int mode, int fsroot_fd)
{
const char *relative_path;
relative_path = hwloc_checkat(path, fsroot_fd);
if (!relative_path)
return -1;
return faccessat(fsroot_fd, relative_path, mode, 0);
}
static int
hwloc_fstatat(const char *path, struct stat *st, int flags, int fsroot_fd)
{
const char *relative_path;
relative_path = hwloc_checkat(path, fsroot_fd);
if (!relative_path)
return -1;
return fstatat(fsroot_fd, relative_path, st, flags);
}
static DIR*
hwloc_opendirat(const char *path, int fsroot_fd)
{
int dir_fd;
const char *relative_path;
relative_path = hwloc_checkat(path, fsroot_fd);
if (!relative_path)
return NULL;
dir_fd = openat(fsroot_fd, relative_path, O_RDONLY | O_DIRECTORY);
if (dir_fd < 0)
return NULL;
return fdopendir(dir_fd);
}
#endif /* HAVE_OPENAT */
/* Static inline version of fopen so that we can use openat if we have
it, but still preserve compiler parameter checking */
static __hwloc_inline int
hwloc_open(const char *p, int d __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
return hwloc_openat(p, d);
#else
return open(p, O_RDONLY);
#endif
}
static __hwloc_inline FILE *
hwloc_fopen(const char *p, const char *m, int d __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
return hwloc_fopenat(p, m, d);
#else
return fopen(p, m);
#endif
}
/* Static inline version of access so that we can use openat if we have
it, but still preserve compiler parameter checking */
static __hwloc_inline int
hwloc_access(const char *p, int m, int d __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
return hwloc_accessat(p, m, d);
#else
return access(p, m);
#endif
}
static __hwloc_inline int
hwloc_stat(const char *p, struct stat *st, int d __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
return hwloc_fstatat(p, st, 0, d);
#else
return stat(p, st);
#endif
}
/* Static inline version of opendir so that we can use openat if we have
it, but still preserve compiler parameter checking */
static __hwloc_inline DIR *
hwloc_opendir(const char *p, int d __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
return hwloc_opendirat(p, d);
#else
return opendir(p);
#endif
}
int
hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set __hwloc_attribute_unused)
{
/* TODO Kerrighed: Use
* int migrate (pid_t pid, int destination_node);
* int migrate_self (int destination_node);
* int thread_migrate (int thread_id, int destination_node);
*/
/* The resulting binding is always strict */
#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
cpu_set_t *plinux_set;
unsigned cpu;
int last;
size_t setsize;
int err;
last = hwloc_bitmap_last(hwloc_set);
if (last == -1) {
errno = EINVAL;
return -1;
}
setsize = CPU_ALLOC_SIZE(last+1);
plinux_set = CPU_ALLOC(last+1);
CPU_ZERO_S(setsize, plinux_set);
hwloc_bitmap_foreach_begin(cpu, hwloc_set)
CPU_SET_S(cpu, setsize, plinux_set);
hwloc_bitmap_foreach_end();
err = sched_setaffinity(tid, setsize, plinux_set);
CPU_FREE(plinux_set);
return err;
#elif defined(HWLOC_HAVE_CPU_SET)
cpu_set_t linux_set;
unsigned cpu;
CPU_ZERO(&linux_set);
hwloc_bitmap_foreach_begin(cpu, hwloc_set)
CPU_SET(cpu, &linux_set);
hwloc_bitmap_foreach_end();
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
return sched_setaffinity(tid, &linux_set);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
return sched_setaffinity(tid, sizeof(linux_set), &linux_set);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
#elif defined(HWLOC_HAVE__SYSCALL3)
unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set);
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
return sched_setaffinity(tid, (void*) &mask);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
return sched_setaffinity(tid, sizeof(mask), (void*) &mask);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
#else /* !_SYSCALL3 */
errno = ENOSYS;
return -1;
#endif /* !_SYSCALL3 */
}
#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
/*
* On some kernels, sched_getaffinity requires the output size to be larger
* than the kernel cpu_set size (defined by CONFIG_NR_CPUS).
* Try sched_affinity on ourself until we find a nr_cpus value that makes
* the kernel happy.
*/
static int
hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology)
{
static int nr_cpus = -1;
if (nr_cpus != -1)
/* already computed */
return nr_cpus;
/* start with a nr_cpus that may contain the whole topology */
nr_cpus = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset) + 1;
while (1) {
cpu_set_t *set = CPU_ALLOC(nr_cpus);
size_t setsize = CPU_ALLOC_SIZE(nr_cpus);
int err = sched_getaffinity(0, setsize, set); /* always works, unless setsize is too small */
CPU_FREE(set);
if (!err)
/* found it */
return nr_cpus;
nr_cpus *= 2;
}
}
#endif
int
hwloc_linux_get_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_bitmap_t hwloc_set __hwloc_attribute_unused)
{
int err __hwloc_attribute_unused;
/* TODO Kerrighed */
#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
cpu_set_t *plinux_set;
unsigned cpu;
int last;
size_t setsize;
int kernel_nr_cpus;
/* find the kernel nr_cpus so as to use a large enough cpu_set size */
kernel_nr_cpus = hwloc_linux_find_kernel_nr_cpus(topology);
setsize = CPU_ALLOC_SIZE(kernel_nr_cpus);
plinux_set = CPU_ALLOC(kernel_nr_cpus);
err = sched_getaffinity(tid, setsize, plinux_set);
if (err < 0) {
CPU_FREE(plinux_set);
return -1;
}
last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset);
assert(last != -1);
hwloc_bitmap_zero(hwloc_set);
for(cpu=0; cpu<=(unsigned) last; cpu++)
if (CPU_ISSET_S(cpu, setsize, plinux_set))
hwloc_bitmap_set(hwloc_set, cpu);
CPU_FREE(plinux_set);
#elif defined(HWLOC_HAVE_CPU_SET)
cpu_set_t linux_set;
unsigned cpu;
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = sched_getaffinity(tid, &linux_set);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = sched_getaffinity(tid, sizeof(linux_set), &linux_set);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
if (err < 0)
return -1;
hwloc_bitmap_zero(hwloc_set);
for(cpu=0; cpu<CPU_SETSIZE; cpu++)
if (CPU_ISSET(cpu, &linux_set))
hwloc_bitmap_set(hwloc_set, cpu);
#elif defined(HWLOC_HAVE__SYSCALL3)
unsigned long mask;
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = sched_getaffinity(tid, (void*) &mask);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = sched_getaffinity(tid, sizeof(mask), (void*) &mask);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
if (err < 0)
return -1;
hwloc_bitmap_from_ulong(hwloc_set, mask);
#else /* !_SYSCALL3 */
errno = ENOSYS;
return -1;
#endif /* !_SYSCALL3 */
return 0;
}
/* Get the array of tids of a process from the task directory in /proc */
static int
hwloc_linux_get_proc_tids(DIR *taskdir, unsigned *nr_tidsp, pid_t ** tidsp)
{
struct dirent *dirent;
unsigned nr_tids = 0;
unsigned max_tids = 32;
pid_t *tids;
struct stat sb;
/* take the number of links as a good estimate for the number of tids */
if (fstat(dirfd(taskdir), &sb) == 0)
max_tids = sb.st_nlink;
tids = malloc(max_tids*sizeof(pid_t));
if (!tids) {
errno = ENOMEM;
return -1;
}
rewinddir(taskdir);
while ((dirent = readdir(taskdir)) != NULL) {
if (nr_tids == max_tids) {
pid_t *newtids;
max_tids += 8;
newtids = realloc(tids, max_tids*sizeof(pid_t));
if (!newtids) {
free(tids);
errno = ENOMEM;
return -1;
}
tids = newtids;
}
if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
continue;
tids[nr_tids++] = atoi(dirent->d_name);
}
*nr_tidsp = nr_tids;
*tidsp = tids;
return 0;
}
/* Callbacks for binding each process sub-tid */
typedef int (*hwloc_linux_foreach_proc_tid_cb_t)(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags);
static int
hwloc_linux_foreach_proc_tid_set_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
hwloc_bitmap_t cpuset = data;
return hwloc_linux_set_tid_cpubind(topology, tid, cpuset);
}
static int
hwloc_linux_foreach_proc_tid_get_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags)
{
hwloc_bitmap_t *cpusets = data;
hwloc_bitmap_t cpuset = cpusets[0];
hwloc_bitmap_t tidset = cpusets[1];
if (hwloc_linux_get_tid_cpubind(topology, tid, tidset))
return -1;
/* reset the cpuset on first iteration */
if (!idx)
hwloc_bitmap_zero(cpuset);
if (flags & HWLOC_CPUBIND_STRICT) {
/* if STRICT, we want all threads to have the same binding */
if (!idx) {
/* this is the first thread, copy its binding */
hwloc_bitmap_copy(cpuset, tidset);
} else if (!hwloc_bitmap_isequal(cpuset, tidset)) {
/* this is not the first thread, and it's binding is different */
errno = EXDEV;
return -1;
}
} else {
/* if not STRICT, just OR all thread bindings */
hwloc_bitmap_or(cpuset, cpuset, tidset);
}
return 0;
}
/* Call the callback for each process tid. */
static int
hwloc_linux_foreach_proc_tid(hwloc_topology_t topology,
pid_t pid, hwloc_linux_foreach_proc_tid_cb_t cb,
void *data, int flags)
{
char taskdir_path[128];
DIR *taskdir;
pid_t *tids, *newtids;
unsigned i, nr, newnr;
int err;
if (pid)
snprintf(taskdir_path, sizeof(taskdir_path), "/proc/%u/task", (unsigned) pid);
else
snprintf(taskdir_path, sizeof(taskdir_path), "/proc/self/task");
taskdir = opendir(taskdir_path);
if (!taskdir) {
errno = ENOSYS;
err = -1;
goto out;
}
/* read the current list of threads */
err = hwloc_linux_get_proc_tids(taskdir, &nr, &tids);
if (err < 0)
goto out_with_dir;
retry:
/* apply the callback to all threads */
for(i=0; i<nr; i++) {
err = cb(topology, tids[i], data, i, flags);
if (err < 0)
goto out_with_tids;
}
/* re-read the list of thread and retry if it changed in the meantime */
err = hwloc_linux_get_proc_tids(taskdir, &newnr, &newtids);
if (err < 0)
goto out_with_tids;
if (newnr != nr || memcmp(newtids, tids, nr*sizeof(pid_t))) {
free(tids);
tids = newtids;
nr = newnr;
goto retry;
}
err = 0;
free(newtids);
out_with_tids:
free(tids);
out_with_dir:
closedir(taskdir);
out:
return err;
}
static int
hwloc_linux_set_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_linux_foreach_proc_tid(topology, pid,
hwloc_linux_foreach_proc_tid_set_cpubind_cb,
(void*) hwloc_set, flags);
}
static int
hwloc_linux_get_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
{
hwloc_bitmap_t tidset = hwloc_bitmap_alloc();
hwloc_bitmap_t cpusets[2];
int ret;
cpusets[0] = hwloc_set;
cpusets[1] = tidset;
ret = hwloc_linux_foreach_proc_tid(topology, pid,
hwloc_linux_foreach_proc_tid_get_cpubind_cb,
(void*) cpusets, flags);
hwloc_bitmap_free(tidset);
return ret;
}
static int
hwloc_linux_set_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
{
if (pid == 0)
pid = topology->pid;
if (flags & HWLOC_CPUBIND_THREAD)
return hwloc_linux_set_tid_cpubind(topology, pid, hwloc_set);
else
return hwloc_linux_set_pid_cpubind(topology, pid, hwloc_set, flags);
}
static int
hwloc_linux_get_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
{
if (pid == 0)
pid = topology->pid;
if (flags & HWLOC_CPUBIND_THREAD)
return hwloc_linux_get_tid_cpubind(topology, pid, hwloc_set);
else
return hwloc_linux_get_pid_cpubind(topology, pid, hwloc_set, flags);
}
static int
hwloc_linux_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_linux_set_pid_cpubind(topology, topology->pid, hwloc_set, flags);
}
static int
hwloc_linux_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
{
return hwloc_linux_get_pid_cpubind(topology, topology->pid, hwloc_set, flags);
}
static int
hwloc_linux_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
if (topology->pid) {
errno = ENOSYS;
return -1;
}
return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set);
}
static int
hwloc_linux_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
if (topology->pid) {
errno = ENOSYS;
return -1;
}
return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set);
}
#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
#pragma weak pthread_setaffinity_np
#pragma weak pthread_self
static int
hwloc_linux_set_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
int err;
if (topology->pid) {
errno = ENOSYS;
return -1;
}
if (!pthread_self) {
/* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */
errno = ENOSYS;
return -1;
}
if (tid == pthread_self())
return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set);
if (!pthread_setaffinity_np) {
errno = ENOSYS;
return -1;
}
/* TODO Kerrighed: Use
* int migrate (pid_t pid, int destination_node);
* int migrate_self (int destination_node);
* int thread_migrate (int thread_id, int destination_node);
*/
#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
/* Use a separate block so that we can define specific variable
types here */
{
cpu_set_t *plinux_set;
unsigned cpu;
int last;
size_t setsize;
last = hwloc_bitmap_last(hwloc_set);
if (last == -1) {
errno = EINVAL;
return -1;
}
setsize = CPU_ALLOC_SIZE(last+1);
plinux_set = CPU_ALLOC(last+1);
CPU_ZERO_S(setsize, plinux_set);
hwloc_bitmap_foreach_begin(cpu, hwloc_set)
CPU_SET_S(cpu, setsize, plinux_set);
hwloc_bitmap_foreach_end();
err = pthread_setaffinity_np(tid, setsize, plinux_set);
CPU_FREE(plinux_set);
}
#elif defined(HWLOC_HAVE_CPU_SET)
/* Use a separate block so that we can define specific variable
types here */
{
cpu_set_t linux_set;
unsigned cpu;
CPU_ZERO(&linux_set);
hwloc_bitmap_foreach_begin(cpu, hwloc_set)
CPU_SET(cpu, &linux_set);
hwloc_bitmap_foreach_end();
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = pthread_setaffinity_np(tid, &linux_set);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = pthread_setaffinity_np(tid, sizeof(linux_set), &linux_set);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
}
#else /* CPU_SET */
/* Use a separate block so that we can define specific variable
types here */
{
unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set);
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = pthread_setaffinity_np(tid, (void*) &mask);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = pthread_setaffinity_np(tid, sizeof(mask), (void*) &mask);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
}
#endif /* CPU_SET */
if (err) {
errno = err;
return -1;
}
return 0;
}
#endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */
#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
#pragma weak pthread_getaffinity_np
#pragma weak pthread_self
static int
hwloc_linux_get_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
int err;
if (topology->pid) {
errno = ENOSYS;
return -1;
}
if (!pthread_self) {
/* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */
errno = ENOSYS;
return -1;
}
if (tid == pthread_self())
return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set);
if (!pthread_getaffinity_np) {
errno = ENOSYS;
return -1;
}
/* TODO Kerrighed */
#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
/* Use a separate block so that we can define specific variable
types here */
{
cpu_set_t *plinux_set;
unsigned cpu;
int last;
size_t setsize;
last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset);
assert (last != -1);
setsize = CPU_ALLOC_SIZE(last+1);
plinux_set = CPU_ALLOC(last+1);
err = pthread_getaffinity_np(tid, setsize, plinux_set);
if (err) {
CPU_FREE(plinux_set);
errno = err;
return -1;
}
hwloc_bitmap_zero(hwloc_set);
for(cpu=0; cpu<(unsigned) last; cpu++)
if (CPU_ISSET_S(cpu, setsize, plinux_set))
hwloc_bitmap_set(hwloc_set, cpu);
CPU_FREE(plinux_set);
}
#elif defined(HWLOC_HAVE_CPU_SET)
/* Use a separate block so that we can define specific variable
types here */
{
cpu_set_t linux_set;
unsigned cpu;
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = pthread_getaffinity_np(tid, &linux_set);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = pthread_getaffinity_np(tid, sizeof(linux_set), &linux_set);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
if (err) {
errno = err;
return -1;
}
hwloc_bitmap_zero(hwloc_set);
for(cpu=0; cpu<CPU_SETSIZE; cpu++)
if (CPU_ISSET(cpu, &linux_set))
hwloc_bitmap_set(hwloc_set, cpu);
}
#else /* CPU_SET */
/* Use a separate block so that we can define specific variable
types here */
{
unsigned long mask;
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
err = pthread_getaffinity_np(tid, (void*) &mask);
#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
err = pthread_getaffinity_np(tid, sizeof(mask), (void*) &mask);
#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
if (err) {
errno = err;
return -1;
}
hwloc_bitmap_from_ulong(hwloc_set, mask);
}
#endif /* CPU_SET */
return 0;
}
#endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */
static int
hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid, hwloc_bitmap_t set)
{
/* read /proc/pid/stat.
* its second field contains the command name between parentheses,
* and the command itself may contain parentheses,
* so read the whole line and find the last closing parenthesis to find the third field.
*/
char buf[1024] = "";
char name[64];
char *tmp;
FILE *file;
int i;
if (!tid) {
#ifdef SYS_gettid
tid = syscall(SYS_gettid);
#else
errno = ENOSYS;
return -1;
#endif
}
snprintf(name, sizeof(name), "/proc/%lu/stat", (unsigned long) tid);
file = fopen(name, "r");
if (!file) {
errno = ENOSYS;
return -1;
}
tmp = fgets(buf, sizeof(buf), file);
fclose(file);
if (!tmp) {
errno = ENOSYS;
return -1;
}
tmp = strrchr(buf, ')');
if (!tmp) {
errno = ENOSYS;
return -1;
}
/* skip ') ' to find the actual third argument */
tmp += 2;
/* skip 35 fields */
for(i=0; i<36; i++) {
tmp = strchr(tmp, ' ');
if (!tmp) {
errno = ENOSYS;
return -1;
}
/* skip the ' ' itself */
tmp++;
}
/* read the last cpu in the 38th field now */
if (sscanf(tmp, "%d ", &i) != 1) {
errno = ENOSYS;
return -1;
}
hwloc_bitmap_only(set, i);
return 0;
}
static int
hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags __hwloc_attribute_unused)
{
hwloc_bitmap_t *cpusets = data;
hwloc_bitmap_t cpuset = cpusets[0];
hwloc_bitmap_t tidset = cpusets[1];
if (hwloc_linux_get_tid_last_cpu_location(topology, tid, tidset))
return -1;
/* reset the cpuset on first iteration */
if (!idx)
hwloc_bitmap_zero(cpuset);
hwloc_bitmap_or(cpuset, cpuset, tidset);
return 0;
}
static int
hwloc_linux_get_pid_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
{
hwloc_bitmap_t tidset = hwloc_bitmap_alloc();
hwloc_bitmap_t cpusets[2];
int ret;
cpusets[0] = hwloc_set;
cpusets[1] = tidset;
ret = hwloc_linux_foreach_proc_tid(topology, pid,
hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb,
(void*) cpusets, flags);
hwloc_bitmap_free(tidset);
return ret;
}
static int
hwloc_linux_get_proc_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
{
if (pid == 0)
pid = topology->pid;
if (flags & HWLOC_CPUBIND_THREAD)
return hwloc_linux_get_tid_last_cpu_location(topology, pid, hwloc_set);
else
return hwloc_linux_get_pid_last_cpu_location(topology, pid, hwloc_set, flags);
}
static int
hwloc_linux_get_thisproc_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
{
return hwloc_linux_get_pid_last_cpu_location(topology, topology->pid, hwloc_set, flags);
}
static int
hwloc_linux_get_thisthread_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
if (topology->pid) {
errno = ENOSYS;
return -1;
}
return hwloc_linux_get_tid_last_cpu_location(topology, 0, hwloc_set);
}
#if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND
static int
hwloc_linux_membind_policy_from_hwloc(int *linuxpolicy, hwloc_membind_policy_t policy, int flags)
{
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_FIRSTTOUCH:
*linuxpolicy = MPOL_DEFAULT;
break;
case HWLOC_MEMBIND_BIND:
if (flags & HWLOC_MEMBIND_STRICT)
*linuxpolicy = MPOL_BIND;
else
*linuxpolicy = MPOL_PREFERRED;
break;
case HWLOC_MEMBIND_INTERLEAVE:
*linuxpolicy = MPOL_INTERLEAVE;
break;
/* TODO: next-touch when (if?) patch applied upstream */
default:
errno = ENOSYS;
return -1;
}
return 0;
}
static int
hwloc_linux_membind_mask_from_nodeset(hwloc_topology_t topology __hwloc_attribute_unused,
hwloc_const_nodeset_t nodeset,
unsigned *max_os_index_p, unsigned long **linuxmaskp)
{
unsigned max_os_index = 0; /* highest os_index + 1 */
unsigned long *linuxmask;
unsigned i;
hwloc_nodeset_t linux_nodeset = NULL;
if (hwloc_bitmap_isfull(nodeset)) {
linux_nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_only(linux_nodeset, 0);
nodeset = linux_nodeset;
}
max_os_index = hwloc_bitmap_last(nodeset);
if (max_os_index == (unsigned) -1)
max_os_index = 0;
/* add 1 to convert the last os_index into a max_os_index,
* and round up to the nearest multiple of BITS_PER_LONG */
max_os_index = (max_os_index + 1 + HWLOC_BITS_PER_LONG - 1) & ~(HWLOC_BITS_PER_LONG - 1);
linuxmask = calloc(max_os_index/HWLOC_BITS_PER_LONG, sizeof(long));
if (!linuxmask) {
errno = ENOMEM;
return -1;
}
for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++)
linuxmask[i] = hwloc_bitmap_to_ith_ulong(nodeset, i);
if (linux_nodeset)
hwloc_bitmap_free(linux_nodeset);
*max_os_index_p = max_os_index;
*linuxmaskp = linuxmask;
return 0;
}
static void
hwloc_linux_membind_mask_to_nodeset(hwloc_topology_t topology __hwloc_attribute_unused,
hwloc_nodeset_t nodeset,
unsigned max_os_index, const unsigned long *linuxmask)
{
unsigned i;
#ifdef HWLOC_DEBUG
/* max_os_index comes from hwloc_linux_find_kernel_max_numnodes() so it's a multiple of HWLOC_BITS_PER_LONG */
assert(!(max_os_index%HWLOC_BITS_PER_LONG));
#endif
hwloc_bitmap_zero(nodeset);
for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++)
hwloc_bitmap_set_ith_ulong(nodeset, i, linuxmask[i]);
}
#endif /* HWLOC_HAVE_SET_MEMPOLICY || HWLOC_HAVE_MBIND */
#ifdef HWLOC_HAVE_MBIND
static int
hwloc_linux_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
unsigned max_os_index; /* highest os_index + 1 */
unsigned long *linuxmask;
size_t remainder;
int linuxpolicy;
unsigned linuxflags = 0;
int err;
remainder = (uintptr_t) addr & (sysconf(_SC_PAGESIZE)-1);
addr = (char*) addr - remainder;
len += remainder;
err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags);
if (err < 0)
return err;
if (linuxpolicy == MPOL_DEFAULT)
/* Some Linux kernels don't like being passed a set */
return mbind((void *) addr, len, linuxpolicy, NULL, 0, 0);
err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask);
if (err < 0)
goto out;
if (flags & HWLOC_MEMBIND_MIGRATE) {
#ifdef MPOL_MF_MOVE
linuxflags = MPOL_MF_MOVE;
if (flags & HWLOC_MEMBIND_STRICT)
linuxflags |= MPOL_MF_STRICT;
#else
if (flags & HWLOC_MEMBIND_STRICT) {
errno = ENOSYS;
goto out_with_mask;
}
#endif
}
err = mbind((void *) addr, len, linuxpolicy, linuxmask, max_os_index+1, linuxflags);
if (err < 0)
goto out_with_mask;
free(linuxmask);
return 0;
out_with_mask:
free(linuxmask);
out:
return -1;
}
static void *
hwloc_linux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
void *buffer;
int err;
buffer = hwloc_alloc_mmap(topology, len);
if (buffer == MAP_FAILED)
return NULL;
err = hwloc_linux_set_area_membind(topology, buffer, len, nodeset, policy, flags);
if (err < 0 && policy & HWLOC_MEMBIND_STRICT) {
munmap(buffer, len);
return NULL;
}
return buffer;
}
#endif /* HWLOC_HAVE_MBIND */
#ifdef HWLOC_HAVE_SET_MEMPOLICY
static int
hwloc_linux_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
unsigned max_os_index; /* highest os_index + 1 */
unsigned long *linuxmask;
int linuxpolicy;
int err;
err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags);
if (err < 0)
return err;
if (linuxpolicy == MPOL_DEFAULT)
/* Some Linux kernels don't like being passed a set */
return set_mempolicy(linuxpolicy, NULL, 0);
err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask);
if (err < 0)
goto out;
if (flags & HWLOC_MEMBIND_MIGRATE) {
#ifdef HWLOC_HAVE_MIGRATE_PAGES
unsigned long *fullmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
if (fullmask) {
memset(fullmask, 0xf, max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
err = migrate_pages(0, max_os_index+1, fullmask, linuxmask);
free(fullmask);
} else
err = -1;
if (err < 0 && (flags & HWLOC_MEMBIND_STRICT))
goto out_with_mask;
#else
errno = ENOSYS;
goto out_with_mask;
#endif
}
err = set_mempolicy(linuxpolicy, linuxmask, max_os_index+1);
if (err < 0)
goto out_with_mask;
free(linuxmask);
return 0;
out_with_mask:
free(linuxmask);
out:
return -1;
}
/*
* On some kernels, get_mempolicy requires the output size to be larger
* than the kernel MAX_NUMNODES (defined by CONFIG_NODES_SHIFT).
* Try get_mempolicy on ourself until we find a max_os_index value that
* makes the kernel happy.
*/
static int
hwloc_linux_find_kernel_max_numnodes(hwloc_topology_t topology __hwloc_attribute_unused)
{
static int max_numnodes = -1;
int linuxpolicy;
if (max_numnodes != -1)
/* already computed */
return max_numnodes;
/* start with a single ulong, it's the minimal and it's enough for most machines */
max_numnodes = HWLOC_BITS_PER_LONG;
while (1) {
unsigned long *mask = malloc(max_numnodes / HWLOC_BITS_PER_LONG * sizeof(long));
int err = get_mempolicy(&linuxpolicy, mask, max_numnodes, 0, 0);
free(mask);
if (!err || errno != EINVAL)
/* found it */
return max_numnodes;
max_numnodes *= 2;
}
}
static int
hwloc_linux_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
{
unsigned max_os_index;
unsigned long *linuxmask;
int linuxpolicy;
int err;
max_os_index = hwloc_linux_find_kernel_max_numnodes(topology);
linuxmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
if (!linuxmask) {
errno = ENOMEM;
goto out;
}
err = get_mempolicy(&linuxpolicy, linuxmask, max_os_index, 0, 0);
if (err < 0)
goto out_with_mask;
if (linuxpolicy == MPOL_DEFAULT) {
hwloc_bitmap_copy(nodeset, hwloc_topology_get_topology_nodeset(topology));
} else {
hwloc_linux_membind_mask_to_nodeset(topology, nodeset, max_os_index, linuxmask);
}
switch (linuxpolicy) {
case MPOL_DEFAULT:
*policy = HWLOC_MEMBIND_FIRSTTOUCH;
break;
case MPOL_PREFERRED:
case MPOL_BIND:
*policy = HWLOC_MEMBIND_BIND;
break;
case MPOL_INTERLEAVE:
*policy = HWLOC_MEMBIND_INTERLEAVE;
break;
default:
errno = EINVAL;
goto out_with_mask;
}
free(linuxmask);
return 0;
out_with_mask:
free(linuxmask);
out:
return -1;
}
#endif /* HWLOC_HAVE_SET_MEMPOLICY */
int
hwloc_backend_sysfs_init(struct hwloc_topology *topology, const char *fsroot_path __hwloc_attribute_unused)
{
#ifdef HAVE_OPENAT
int root;
assert(topology->backend_type == HWLOC_BACKEND_NONE);
if (!fsroot_path)
fsroot_path = "/";
root = open(fsroot_path, O_RDONLY | O_DIRECTORY);
if (root < 0)
return -1;
if (strcmp(fsroot_path, "/"))
topology->is_thissystem = 0;
topology->backend_params.sysfs.root_path = strdup(fsroot_path);
topology->backend_params.sysfs.root_fd = root;
#else
topology->backend_params.sysfs.root_path = NULL;
topology->backend_params.sysfs.root_fd = -1;
#endif
topology->backend_type = HWLOC_BACKEND_SYSFS;
return 0;
}
void
hwloc_backend_sysfs_exit(struct hwloc_topology *topology)
{
assert(topology->backend_type == HWLOC_BACKEND_SYSFS);
#ifdef HAVE_OPENAT
close(topology->backend_params.sysfs.root_fd);
free(topology->backend_params.sysfs.root_path);
topology->backend_params.sysfs.root_path = NULL;
#endif
topology->backend_type = HWLOC_BACKEND_NONE;
}
static int
hwloc_parse_sysfs_unsigned(const char *mappath, unsigned *value, int fsroot_fd)
{
char string[11];
FILE * fd;
fd = hwloc_fopen(mappath, "r", fsroot_fd);
if (!fd) {
*value = -1;
return -1;
}
if (!fgets(string, 11, fd)) {
*value = -1;
fclose(fd);
return -1;
}
*value = strtoul(string, NULL, 10);
fclose(fd);
return 0;
}
/* kernel cpumaps are composed of an array of 32bits cpumasks */
#define KERNEL_CPU_MASK_BITS 32
#define KERNEL_CPU_MAP_LEN (KERNEL_CPU_MASK_BITS/4+2)
int
hwloc_linux_parse_cpumap_file(FILE *file, hwloc_bitmap_t set)
{
unsigned long *maps;
unsigned long map;
int nr_maps = 0;
static int nr_maps_allocated = 8; /* only compute the power-of-two above the kernel cpumask size once */
int i;
maps = malloc(nr_maps_allocated * sizeof(*maps));
/* reset to zero first */
hwloc_bitmap_zero(set);
/* parse the whole mask */
while (fscanf(file, "%lx,", &map) == 1) /* read one kernel cpu mask and the ending comma */
{
if (nr_maps == nr_maps_allocated) {
nr_maps_allocated *= 2;
maps = realloc(maps, nr_maps_allocated * sizeof(*maps));
}
if (!map && !nr_maps)
/* ignore the first map if it's empty */
continue;
memmove(&maps[1], &maps[0], nr_maps*sizeof(*maps));
maps[0] = map;
nr_maps++;
}
/* convert into a set */
#if KERNEL_CPU_MASK_BITS == HWLOC_BITS_PER_LONG
for(i=0; i<nr_maps; i++)
hwloc_bitmap_set_ith_ulong(set, i, maps[i]);
#else
for(i=0; i<(nr_maps+1)/2; i++) {
unsigned long mask;
mask = maps[2*i];
if (2*i+1<nr_maps)
mask |= maps[2*i+1] << KERNEL_CPU_MASK_BITS;
hwloc_bitmap_set_ith_ulong(set, i, mask);
}
#endif
free(maps);
return 0;
}
static hwloc_bitmap_t
hwloc_parse_cpumap(const char *mappath, int fsroot_fd)
{
hwloc_bitmap_t set;
FILE * file;
file = hwloc_fopen(mappath, "r", fsroot_fd);
if (!file)
return NULL;
set = hwloc_bitmap_alloc();
hwloc_linux_parse_cpumap_file(file, set);
fclose(file);
return set;
}
static char *
hwloc_strdup_mntpath(const char *escapedpath, size_t length)
{
char *path = malloc(length+1);
const char *src = escapedpath, *tmp = src;
char *dst = path;
while ((tmp = strchr(src, '\\')) != NULL) {
strncpy(dst, src, tmp-src);
dst += tmp-src;
if (!strncmp(tmp+1, "040", 3))
*dst = ' ';
else if (!strncmp(tmp+1, "011", 3))
*dst = ' ';
else if (!strncmp(tmp+1, "012", 3))
*dst = '\n';
else
*dst = '\\';
dst++;
src = tmp+4;
}
strcpy(dst, src);
return path;
}
static void
hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, int fsroot_fd)
{
#define PROC_MOUNT_LINE_LEN 512
char line[PROC_MOUNT_LINE_LEN];
FILE *fd;
*cgroup_mntpnt = NULL;
*cpuset_mntpnt = NULL;
/* ideally we should use setmntent, getmntent, hasmntopt and endmntent,
* but they do not support fsroot_fd.
*/
fd = hwloc_fopen("/proc/mounts", "r", fsroot_fd);
if (!fd)
return;
while (fgets(line, sizeof(line), fd)) {
char *path;
char *type;
char *tmp;
/* remove the ending " 0 0\n" that the kernel always adds */
tmp = line + strlen(line) - 5;
if (tmp < line || strcmp(tmp, " 0 0\n"))
fprintf(stderr, "Unexpected end of /proc/mounts line `%s'\n", line);
else
*tmp = '\0';
/* path is after first field and a space */
tmp = strchr(line, ' ');
if (!tmp)
continue;
path = tmp+1;
/* type is after path, which may not contain spaces since the kernel escaped them to \040
* (see the manpage of getmntent) */
tmp = strchr(path, ' ');
if (!tmp)
continue;
type = tmp+1;
/* mark the end of path to ease upcoming strdup */
*tmp = '\0';
if (!strncmp(type, "cpuset ", 7)) {
/* found a cpuset mntpnt */
hwloc_debug("Found cpuset mount point on %s\n", path);
*cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path);
break;
} else if (!strncmp(type, "cgroup ", 7)) {
/* found a cgroup mntpnt */
char *opt, *opts;
int cpuset_opt = 0;
int noprefix_opt = 0;
/* find options */
tmp = strchr(type, ' ');
if (!tmp)
continue;
opts = tmp+1;
/* look at options */
while ((opt = strsep(&opts, ",")) != NULL) {
if (!strcmp(opt, "cpuset"))
cpuset_opt = 1;
else if (!strcmp(opt, "noprefix"))
noprefix_opt = 1;
}
if (!cpuset_opt)
continue;
if (noprefix_opt) {
hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", path);
*cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path);
} else {
hwloc_debug("Found cgroup/cpuset mount point on %s\n", path);
*cgroup_mntpnt = hwloc_strdup_mntpath(path, type-path);
}
break;
}
}
fclose(fd);
}
/*
* Linux cpusets may be managed directly or through cgroup.
* If cgroup is used, tasks get a /proc/pid/cgroup which may contain a
* single line %d:cpuset:<name>. If cpuset are used they get /proc/pid/cpuset
* containing <name>.
*/
static char *
hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid)
{
#define CPUSET_NAME_LEN 128
char cpuset_name[CPUSET_NAME_LEN];
FILE *fd;
char *tmp;
/* check whether a cgroup-cpuset is enabled */
if (!pid)
fd = hwloc_fopen("/proc/self/cgroup", "r", fsroot_fd);
else {
char path[] = "/proc/XXXXXXXXXX/cgroup";
snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
fd = hwloc_fopen(path, "r", fsroot_fd);
}
if (fd) {
/* find a cpuset line */
#define CGROUP_LINE_LEN 256
char line[CGROUP_LINE_LEN];
while (fgets(line, sizeof(line), fd)) {
char *end, *colon = strchr(line, ':');
if (!colon)
continue;
if (strncmp(colon, ":cpuset:", 8))
continue;
/* found a cgroup-cpuset line, return the name */
fclose(fd);
end = strchr(colon, '\n');
if (end)
*end = '\0';
hwloc_debug("Found cgroup-cpuset %s\n", colon+8);
return strdup(colon+8);
}
fclose(fd);
}
/* check whether a cpuset is enabled */
if (!pid)
fd = hwloc_fopen("/proc/self/cpuset", "r", fsroot_fd);
else {
char path[] = "/proc/XXXXXXXXXX/cpuset";
snprintf(path, sizeof(path), "/proc/%d/cpuset", pid);
fd = hwloc_fopen(path, "r", fsroot_fd);
}
if (!fd) {
/* found nothing */
hwloc_debug("%s", "No cgroup or cpuset found\n");
return NULL;
}
/* found a cpuset, return the name */
tmp = fgets(cpuset_name, sizeof(cpuset_name), fd);
fclose(fd);
if (!tmp)
return NULL;
tmp = strchr(cpuset_name, '\n');
if (tmp)
*tmp = '\0';
hwloc_debug("Found cpuset %s\n", cpuset_name);
return strdup(cpuset_name);
}
/*
* Then, the cpuset description is available from either the cgroup or
* the cpuset filesystem (usually mounted in / or /dev) where there
* are cgroup<name>/cpuset.{cpus,mems} or cpuset<name>/{cpus,mems} files.
*/
static char *
hwloc_read_linux_cpuset_mask(const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name, const char *attr_name, int fsroot_fd)
{
#define CPUSET_FILENAME_LEN 256
char cpuset_filename[CPUSET_FILENAME_LEN];
FILE *fd;
char *info = NULL, *tmp;
ssize_t ssize;
size_t size;
if (cgroup_mntpnt) {
/* try to read the cpuset from cgroup */
snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/cpuset.%s", cgroup_mntpnt, cpuset_name, attr_name);
hwloc_debug("Trying to read cgroup file <%s>\n", cpuset_filename);
fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd);
if (fd)
goto gotfile;
} else if (cpuset_mntpnt) {
/* try to read the cpuset directly */
snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/%s", cpuset_mntpnt, cpuset_name, attr_name);
hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename);
fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd);
if (fd)
goto gotfile;
}
/* found no cpuset description, ignore it */
hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name);
goto out;
gotfile:
ssize = getline(&info, &size, fd);
fclose(fd);
if (ssize < 0)
goto out;
if (!info)
goto out;
tmp = strchr(info, '\n');
if (tmp)
*tmp = '\0';
out:
return info;
}
static void
hwloc_admin_disable_set_from_cpuset(struct hwloc_topology *topology,
const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name,
const char *attr_name,
hwloc_bitmap_t admin_enabled_cpus_set)
{
char *cpuset_mask;
char *current, *comma, *tmp;
int prevlast, nextfirst, nextlast; /* beginning/end of enabled-segments */
hwloc_bitmap_t tmpset;
cpuset_mask = hwloc_read_linux_cpuset_mask(cgroup_mntpnt, cpuset_mntpnt, cpuset_name,
attr_name, topology->backend_params.sysfs.root_fd);
if (!cpuset_mask)
return;
hwloc_debug("found cpuset %s: %s\n", attr_name, cpuset_mask);
current = cpuset_mask;
prevlast = -1;
while (1) {
/* save a pointer to the next comma and erase it to simplify things */
comma = strchr(current, ',');
if (comma)
*comma = '\0';
/* find current enabled-segment bounds */
nextfirst = strtoul(current, &tmp, 0);
if (*tmp == '-')
nextlast = strtoul(tmp+1, NULL, 0);
else
nextlast = nextfirst;
if (prevlast+1 <= nextfirst-1) {
hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1);
hwloc_bitmap_clr_range(admin_enabled_cpus_set, prevlast+1, nextfirst-1);
}
/* switch to next enabled-segment */
prevlast = nextlast;
if (!comma)
break;
current = comma+1;
}
hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1);
/* no easy way to clear until the infinity */
tmpset = hwloc_bitmap_alloc();
hwloc_bitmap_set_range(tmpset, 0, prevlast);
hwloc_bitmap_and(admin_enabled_cpus_set, admin_enabled_cpus_set, tmpset);
hwloc_bitmap_free(tmpset);
free(cpuset_mask);
}
static void
hwloc_parse_meminfo_info(struct hwloc_topology *topology,
const char *path,
int prefixlength,
uint64_t *local_memory,
uint64_t *meminfo_hugepages_count,
uint64_t *meminfo_hugepages_size,
int onlytotal)
{
char string[64];
FILE *fd;
fd = hwloc_fopen(path, "r", topology->backend_params.sysfs.root_fd);
if (!fd)
return;
while (fgets(string, sizeof(string), fd) && *string != '\0')
{
unsigned long long number;
if (strlen(string) < (size_t) prefixlength)
continue;
if (sscanf(string+prefixlength, "MemTotal: %llu kB", (unsigned long long *) &number) == 1) {
*local_memory = number << 10;
if (onlytotal)
break;
}
else if (!onlytotal) {
if (sscanf(string+prefixlength, "Hugepagesize: %llu", (unsigned long long *) &number) == 1)
*meminfo_hugepages_size = number << 10;
else if (sscanf(string+prefixlength, "HugePages_Free: %llu", (unsigned long long *) &number) == 1)
/* these are free hugepages, not the total amount of huge pages */
*meminfo_hugepages_count = number;
}
}
fclose(fd);
}
#define SYSFS_NUMA_NODE_PATH_LEN 128
static void
hwloc_parse_hugepages_info(struct hwloc_topology *topology,
const char *dirpath,
struct hwloc_obj_memory_s *memory,
uint64_t *remaining_local_memory)
{
DIR *dir;
struct dirent *dirent;
unsigned long index_ = 1;
FILE *hpfd;
char line[64];
char path[SYSFS_NUMA_NODE_PATH_LEN];
dir = hwloc_opendir(dirpath, topology->backend_params.sysfs.root_fd);
if (dir) {
while ((dirent = readdir(dir)) != NULL) {
if (strncmp(dirent->d_name, "hugepages-", 10))
continue;
memory->page_types[index_].size = strtoul(dirent->d_name+10, NULL, 0) * 1024ULL;
sprintf(path, "%s/%s/nr_hugepages", dirpath, dirent->d_name);
hpfd = hwloc_fopen(path, "r", topology->backend_params.sysfs.root_fd);
if (hpfd) {
if (fgets(line, sizeof(line), hpfd)) {
fclose(hpfd);
/* these are the actual total amount of huge pages */
memory->page_types[index_].count = strtoull(line, NULL, 0);
*remaining_local_memory -= memory->page_types[index_].count * memory->page_types[index_].size;
index_++;
}
}
}
closedir(dir);
memory->page_types_len = index_;
}
}
static void
hwloc_get_kerrighed_node_meminfo_info(struct hwloc_topology *topology, unsigned long node, struct hwloc_obj_memory_s *memory)
{
char path[128];
uint64_t meminfo_hugepages_count, meminfo_hugepages_size = 0;
if (topology->is_thissystem) {
memory->page_types_len = 2;
memory->page_types = malloc(2*sizeof(*memory->page_types));
memset(memory->page_types, 0, 2*sizeof(*memory->page_types));
/* Try to get the hugepage size from sysconf in case we fail to get it from /proc/meminfo later */
#ifdef HAVE__SC_LARGE_PAGESIZE
memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
memory->page_types[0].size = getpagesize();
}
snprintf(path, sizeof(path), "/proc/nodes/node%lu/meminfo", node);
hwloc_parse_meminfo_info(topology, path, 0 /* no prefix */,
&memory->local_memory,
&meminfo_hugepages_count, &meminfo_hugepages_size,
memory->page_types == NULL);
if (memory->page_types) {
uint64_t remaining_local_memory = memory->local_memory;
if (meminfo_hugepages_size) {
memory->page_types[1].size = meminfo_hugepages_size;
memory->page_types[1].count = meminfo_hugepages_count;
remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size;
} else {
memory->page_types_len = 1;
}
memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size;
}
}
static void
hwloc_get_procfs_meminfo_info(struct hwloc_topology *topology, struct hwloc_obj_memory_s *memory)
{
uint64_t meminfo_hugepages_count, meminfo_hugepages_size = 0;
struct stat st;
int has_sysfs_hugepages = 0;
int types = 2;
int err;
err = hwloc_stat("/sys/kernel/mm/hugepages", &st, topology->backend_params.sysfs.root_fd);
if (!err) {
types = 1 + st.st_nlink-2;
has_sysfs_hugepages = 1;
}
if (topology->is_thissystem) {
memory->page_types_len = types;
memory->page_types = malloc(types*sizeof(*memory->page_types));
memset(memory->page_types, 0, types*sizeof(*memory->page_types));
/* Try to get the hugepage size from sysconf in case we fail to get it from /proc/meminfo later */
#ifdef HAVE__SC_LARGE_PAGESIZE
memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
memory->page_types[0].size = getpagesize();
}
hwloc_parse_meminfo_info(topology, "/proc/meminfo", 0 /* no prefix */,
&memory->local_memory,
&meminfo_hugepages_count, &meminfo_hugepages_size,
memory->page_types == NULL);
if (memory->page_types) {
uint64_t remaining_local_memory = memory->local_memory;
if (has_sysfs_hugepages) {
/* read from node%d/hugepages/hugepages-%skB/nr_hugepages */
hwloc_parse_hugepages_info(topology, "/sys/kernel/mm/hugepages", memory, &remaining_local_memory);
} else {
/* use what we found in meminfo */
if (meminfo_hugepages_size) {
memory->page_types[1].size = meminfo_hugepages_size;
memory->page_types[1].count = meminfo_hugepages_count;
remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size;
} else {
memory->page_types_len = 1;
}
}
memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size;
}
}
static void
hwloc_sysfs_node_meminfo_info(struct hwloc_topology *topology,
const char *syspath, int node,
struct hwloc_obj_memory_s *memory)
{
char path[SYSFS_NUMA_NODE_PATH_LEN];
char meminfopath[SYSFS_NUMA_NODE_PATH_LEN];
uint64_t meminfo_hugepages_count = 0;
uint64_t meminfo_hugepages_size = 0;
struct stat st;
int has_sysfs_hugepages = 0;
int types = 2;
int err;
sprintf(path, "%s/node%d/hugepages", syspath, node);
err = hwloc_stat(path, &st, topology->backend_params.sysfs.root_fd);
if (!err) {
types = 1 + st.st_nlink-2;
has_sysfs_hugepages = 1;
}
if (topology->is_thissystem) {
memory->page_types_len = types;
memory->page_types = malloc(types*sizeof(*memory->page_types));
memset(memory->page_types, 0, types*sizeof(*memory->page_types));
}
sprintf(meminfopath, "%s/node%d/meminfo", syspath, node);
hwloc_parse_meminfo_info(topology, meminfopath,
hwloc_snprintf(NULL, 0, "Node %d ", node),
&memory->local_memory,
&meminfo_hugepages_count, NULL /* no hugepage size in node-specific meminfo */,
memory->page_types == NULL);
if (memory->page_types) {
uint64_t remaining_local_memory = memory->local_memory;
if (has_sysfs_hugepages) {
/* read from node%d/hugepages/hugepages-%skB/nr_hugepages */
hwloc_parse_hugepages_info(topology, path, memory, &remaining_local_memory);
} else {
/* get hugepage size from machine-specific meminfo since there is no size in node-specific meminfo,
* hwloc_get_procfs_meminfo_info must have been called earlier */
meminfo_hugepages_size = topology->levels[0][0]->memory.page_types[1].size;
/* use what we found in meminfo */
if (meminfo_hugepages_size) {
memory->page_types[1].count = meminfo_hugepages_count;
memory->page_types[1].size = meminfo_hugepages_size;
remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size;
} else {
memory->page_types_len = 1;
}
}
/* update what's remaining as normal pages */
memory->page_types[0].size = getpagesize();
memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size;
}
}
static void
hwloc_parse_node_distance(const char *distancepath, unsigned nbnodes, float *distances, int fsroot_fd)
{
char string[4096]; /* enough for hundreds of nodes */
char *tmp, *next;
FILE * fd;
fd = hwloc_fopen(distancepath, "r", fsroot_fd);
if (!fd)
return;
if (!fgets(string, sizeof(string), fd)) {
fclose(fd);
return;
}
tmp = string;
while (tmp) {
unsigned distance = strtoul(tmp, &next, 0);
if (next == tmp)
break;
*distances = (float) distance;
distances++;
nbnodes--;
if (!nbnodes)
break;
tmp = next+1;
}
fclose(fd);
}
static void
look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *found)
{
unsigned osnode;
unsigned nbnodes = 0;
DIR *dir;
struct dirent *dirent;
hwloc_obj_t node;
hwloc_bitmap_t nodeset = hwloc_bitmap_alloc();
*found = 0;
/* Get the list of nodes first */
dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd);
if (dir)
{
while ((dirent = readdir(dir)) != NULL)
{
if (strncmp(dirent->d_name, "node", 4))
continue;
osnode = strtoul(dirent->d_name+4, NULL, 0);
hwloc_bitmap_set(nodeset, osnode);
nbnodes++;
}
closedir(dir);
}
if (nbnodes <= 1)
{
hwloc_bitmap_free(nodeset);
return;
}
/* For convenience, put these declarations inside a block. */
{
hwloc_obj_t * nodes = calloc(nbnodes, sizeof(hwloc_obj_t));
float * distances = calloc(nbnodes*nbnodes, sizeof(float));
unsigned *indexes = calloc(nbnodes, sizeof(unsigned));
unsigned index_;
if (NULL == indexes || NULL == distances || NULL == nodes) {
free(nodes);
free(indexes);
free(distances);
goto out;
}
/* Get node indexes now. We need them in order since Linux groups
* sparse distances but keep them in order in the sysfs distance files.
*/
index_ = 0;
hwloc_bitmap_foreach_begin (osnode, nodeset) {
indexes[index_] = osnode;
index_++;
} hwloc_bitmap_foreach_end();
hwloc_bitmap_free(nodeset);
#ifdef HWLOC_DEBUG
hwloc_debug("%s", "numa distance indexes: ");
for (index_ = 0; index_ < nbnodes; index_++) {
hwloc_debug(" %u", indexes[index_]);
}
hwloc_debug("%s", "\n");
#endif
/* Get actual distances now */
for (index_ = 0; index_ < nbnodes; index_++) {
char nodepath[SYSFS_NUMA_NODE_PATH_LEN];
hwloc_bitmap_t cpuset;
osnode = indexes[index_];
sprintf(nodepath, "%s/node%u/cpumap", path, osnode);
cpuset = hwloc_parse_cpumap(nodepath, topology->backend_params.sysfs.root_fd);
if (!cpuset)
continue;
node = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, osnode);
node->cpuset = cpuset;
node->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(node->nodeset, osnode);
hwloc_sysfs_node_meminfo_info(topology, path, osnode, &node->memory);
hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
osnode, node->cpuset);
hwloc_insert_object_by_cpuset(topology, node);
nodes[index_] = node;
/* Linux nodeX/distance file contains distance from X to other localities (from ACPI SLIT table or so),
* store them in slots X*N...X*N+N-1 */
sprintf(nodepath, "%s/node%u/distance", path, osnode);
hwloc_parse_node_distance(nodepath, nbnodes, distances+index_*nbnodes, topology->backend_params.sysfs.root_fd);
}
hwloc_topology__set_distance_matrix(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances);
}
out:
*found = nbnodes;
}
/* Reads the entire file and returns bytes read if bytes_read != NULL
* Returned pointer can be freed by using free(). */
static void *
hwloc_read_raw(const char *p, const char *p1, size_t *bytes_read, int root_fd)
{
char *fname = NULL;
char *ret = NULL;
struct stat fs;
int file = -1;
unsigned len;
len = strlen(p) + 1 + strlen(p1) + 1;
fname = malloc(len);
if (NULL == fname) {
return NULL;
}
snprintf(fname, len, "%s/%s", p, p1);
file = hwloc_open(fname, root_fd);
if (-1 == file) {
goto out;
}
if (fstat(file, &fs)) {
goto out;
}
ret = (char *) malloc(fs.st_size);
if (NULL != ret) {
ssize_t cb = read(file, ret, fs.st_size);
if (cb == -1) {
free(ret);
ret = NULL;
} else {
if (NULL != bytes_read)
*bytes_read = cb;
}
}
out:
close(file);
if (NULL != fname) {
free(fname);
}
return ret;
}
/* Reads the entire file and returns it as a 0-terminated string
* Returned pointer can be freed by using free(). */
static char *
hwloc_read_str(const char *p, const char *p1, int root_fd)
{
size_t cb = 0;
char *ret = hwloc_read_raw(p, p1, &cb, root_fd);
if ((NULL != ret) && (0 < cb) && (0 != ret[cb-1])) {
ret = realloc(ret, cb + 1);
ret[cb] = 0;
}
return ret;
}
/* Reads first 32bit bigendian value */
static ssize_t
hwloc_read_unit32be(const char *p, const char *p1, uint32_t *buf, int root_fd)
{
size_t cb = 0;
uint32_t *tmp = hwloc_read_raw(p, p1, &cb, root_fd);
if (sizeof(*buf) != cb) {
errno = EINVAL;
return -1;
}
*buf = htonl(*tmp);
free(tmp);
return sizeof(*buf);
}
typedef struct {
unsigned int n, allocated;
struct {
hwloc_bitmap_t cpuset;
uint32_t phandle;
uint32_t l2_cache;
char *name;
} *p;
} device_tree_cpus_t;
static void
add_device_tree_cpus_node(device_tree_cpus_t *cpus, hwloc_bitmap_t cpuset,
uint32_t l2_cache, uint32_t phandle, const char *name)
{
if (cpus->n == cpus->allocated) {
if (!cpus->allocated)
cpus->allocated = 64;
else
cpus->allocated *= 2;
cpus->p = realloc(cpus->p, cpus->allocated * sizeof(cpus->p[0]));
}
cpus->p[cpus->n].phandle = phandle;
cpus->p[cpus->n].cpuset = (NULL == cpuset)?NULL:hwloc_bitmap_dup(cpuset);
cpus->p[cpus->n].l2_cache = l2_cache;
cpus->p[cpus->n].name = strdup(name);
++cpus->n;
}
/* Walks over the cache list in order to detect nested caches and CPU mask for each */
static int
look_powerpc_device_tree_discover_cache(device_tree_cpus_t *cpus,
uint32_t phandle, unsigned int *level, hwloc_bitmap_t cpuset)
{
unsigned int i;
int ret = -1;
if ((NULL == level) || (NULL == cpuset) || phandle == (uint32_t) -1)
return ret;
for (i = 0; i < cpus->n; ++i) {
if (phandle != cpus->p[i].l2_cache)
continue;
if (NULL != cpus->p[i].cpuset) {
hwloc_bitmap_or(cpuset, cpuset, cpus->p[i].cpuset);
ret = 0;
} else {
++(*level);
if (0 == look_powerpc_device_tree_discover_cache(cpus,
cpus->p[i].phandle, level, cpuset))
ret = 0;
}
}
return ret;
}
static void
try_add_cache_from_device_tree_cpu(struct hwloc_topology *topology,
const char *cpu, unsigned int level, hwloc_bitmap_t cpuset)
{
/* Ignore Instruction caches */
/* d-cache-block-size - ignore */
/* d-cache-line-size - to read, in bytes */
/* d-cache-sets - ignore */
/* d-cache-size - to read, in bytes */
/* d-tlb-sets - ignore */
/* d-tlb-size - ignore, always 0 on power6 */
/* i-cache-* and i-tlb-* represent instruction cache, ignore */
uint32_t d_cache_line_size = 0, d_cache_size = 0;
struct hwloc_obj *c = NULL;
hwloc_read_unit32be(cpu, "d-cache-line-size", &d_cache_line_size,
topology->backend_params.sysfs.root_fd);
hwloc_read_unit32be(cpu, "d-cache-size", &d_cache_size,
topology->backend_params.sysfs.root_fd);
if ( (0 == d_cache_line_size) && (0 == d_cache_size) )
return;
c = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
c->attr->cache.depth = level;
c->attr->cache.linesize = d_cache_line_size;
c->attr->cache.size = d_cache_size;
c->cpuset = hwloc_bitmap_dup(cpuset);
hwloc_debug_1arg_bitmap("cache depth %d has cpuset %s\n", level, c->cpuset);
hwloc_insert_object_by_cpuset(topology, c);
}
/*
* Discovers L1/L2/L3 cache information on IBM PowerPC systems for old kernels (RHEL5.*)
* which provide NUMA nodes information without any details
*/
static void
look_powerpc_device_tree(struct hwloc_topology *topology)
{
device_tree_cpus_t cpus;
const char ofroot[] = "/proc/device-tree/cpus";
unsigned int i;
int root_fd = topology->backend_params.sysfs.root_fd;
DIR *dt = hwloc_opendir(ofroot, root_fd);
struct dirent *dirent;
cpus.n = 0;
cpus.p = NULL;
cpus.allocated = 0;
if (NULL == dt)
return;
while (NULL != (dirent = readdir(dt))) {
struct stat statbuf;
int err;
char *cpu;
char *device_type;
uint32_t reg = -1, l2_cache = -1, phandle = -1;
unsigned len;
if ('.' == dirent->d_name[0])
continue;
len = sizeof(ofroot) + 1 + strlen(dirent->d_name) + 1;
cpu = malloc(len);
if (NULL == cpu) {
continue;
}
snprintf(cpu, len, "%s/%s", ofroot, dirent->d_name);
err = hwloc_stat(cpu, &statbuf, root_fd);
if (err < 0 || !S_ISDIR(statbuf.st_mode))
goto cont;
device_type = hwloc_read_str(cpu, "device_type", root_fd);
if (NULL == device_type)
goto cont;
hwloc_read_unit32be(cpu, "reg", &reg, root_fd);
if (hwloc_read_unit32be(cpu, "next-level-cache", &l2_cache, root_fd) == -1)
hwloc_read_unit32be(cpu, "l2-cache", &l2_cache, root_fd);
if (hwloc_read_unit32be(cpu, "phandle", &phandle, root_fd) == -1)
if (hwloc_read_unit32be(cpu, "ibm,phandle", &phandle, root_fd) == -1)
hwloc_read_unit32be(cpu, "linux,phandle", &phandle, root_fd);
if (0 == strcmp(device_type, "cache")) {
add_device_tree_cpus_node(&cpus, NULL, l2_cache, phandle, dirent->d_name);
}
else if (0 == strcmp(device_type, "cpu")) {
/* Found CPU */
hwloc_bitmap_t cpuset = NULL;
size_t cb = 0;
uint32_t *threads = hwloc_read_raw(cpu, "ibm,ppc-interrupt-server#s", &cb, root_fd);
uint32_t nthreads = cb / sizeof(threads[0]);
if (NULL != threads) {
cpuset = hwloc_bitmap_alloc();
for (i = 0; i < nthreads; ++i) {
if (hwloc_bitmap_isset(topology->levels[0][0]->complete_cpuset, ntohl(threads[i])))
hwloc_bitmap_set(cpuset, ntohl(threads[i]));
}
free(threads);
} else if ((unsigned int)-1 != reg) {
cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_set(cpuset, reg);
}
if (NULL == cpuset) {
hwloc_debug("%s has no \"reg\" property, skipping\n", cpu);
} else {
struct hwloc_obj *core = NULL;
add_device_tree_cpus_node(&cpus, cpuset, l2_cache, phandle, dirent->d_name);
/* Add core */
core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, reg);
core->cpuset = hwloc_bitmap_dup(cpuset);
hwloc_insert_object_by_cpuset(topology, core);
/* Add L1 cache */
try_add_cache_from_device_tree_cpu(topology, cpu, 1, cpuset);
hwloc_bitmap_free(cpuset);
}
free(device_type);
}
cont:
free(cpu);
}
closedir(dt);
/* No cores and L2 cache were found, exiting */
if (0 == cpus.n) {
hwloc_debug("No cores and L2 cache were found in %s, exiting\n", ofroot);
return;
}
#ifdef HWLOC_DEBUG
for (i = 0; i < cpus.n; ++i) {
hwloc_debug("%i: %s ibm,phandle=%08X l2_cache=%08X ",
i, cpus.p[i].name, cpus.p[i].phandle, cpus.p[i].l2_cache);
if (NULL == cpus.p[i].cpuset) {
hwloc_debug("%s\n", "no cpuset");
} else {
hwloc_debug_bitmap("cpuset %s\n", cpus.p[i].cpuset);
}
}
#endif
/* Scan L2/L3/... caches */
for (i = 0; i < cpus.n; ++i) {
unsigned int level = 2;
hwloc_bitmap_t cpuset;
/* Skip real CPUs */
if (NULL != cpus.p[i].cpuset)
continue;
/* Calculate cache level and CPU mask */
cpuset = hwloc_bitmap_alloc();
if (0 == look_powerpc_device_tree_discover_cache(&cpus,
cpus.p[i].phandle, &level, cpuset)) {
char *cpu;
unsigned len;
len = sizeof(ofroot) + 1 + strlen(cpus.p[i].name) + 1;
cpu = malloc(len);
if (NULL == cpu) {
return;
}
snprintf(cpu, len, "%s/%s", ofroot, cpus.p[i].name);
try_add_cache_from_device_tree_cpu(topology, cpu, level, cpuset);
free(cpu);
}
hwloc_bitmap_free(cpuset);
}
/* Do cleanup */
for (i = 0; i < cpus.n; ++i) {
hwloc_bitmap_free(cpus.p[i].cpuset);
free(cpus.p[i].name);
}
free(cpus.p);
}
/* Look at Linux' /sys/devices/system/cpu/cpu%d/topology/ */
static void
look_sysfscpu(struct hwloc_topology *topology, const char *path)
{
hwloc_bitmap_t cpuset; /* Set of cpus for which we have topology information */
#define CPU_TOPOLOGY_STR_LEN 128
char str[CPU_TOPOLOGY_STR_LEN];
DIR *dir;
int i,j;
FILE *fd;
unsigned caches_added;
cpuset = hwloc_bitmap_alloc();
/* fill the cpuset of interesting cpus */
dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd);
if (dir) {
struct dirent *dirent;
while ((dirent = readdir(dir)) != NULL) {
unsigned long cpu;
char online[2];
if (strncmp(dirent->d_name, "cpu", 3))
continue;
cpu = strtoul(dirent->d_name+3, NULL, 0);
/* Maybe we don't have topology information but at least it exists */
hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, cpu);
/* check whether this processor is online */
sprintf(str, "%s/cpu%lu/online", path, cpu);
fd = hwloc_fopen(str, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
if (fgets(online, sizeof(online), fd)) {
fclose(fd);
if (atoi(online)) {
hwloc_debug("os proc %lu is online\n", cpu);
} else {
hwloc_debug("os proc %lu is offline\n", cpu);
hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpu);
}
} else {
fclose(fd);
}
}
/* check whether the kernel exports topology information for this cpu */
sprintf(str, "%s/cpu%lu/topology", path, cpu);
if (hwloc_access(str, X_OK, topology->backend_params.sysfs.root_fd) < 0 && errno == ENOENT) {
hwloc_debug("os proc %lu has no accessible %s/cpu%lu/topology\n",
cpu, path, cpu);
continue;
}
hwloc_bitmap_set(cpuset, cpu);
}
closedir(dir);
}
topology->support.discovery->pu = 1;
hwloc_debug_1arg_bitmap("found %d cpu topologies, cpuset %s\n",
hwloc_bitmap_weight(cpuset), cpuset);
caches_added = 0;
hwloc_bitmap_foreach_begin(i, cpuset)
{
struct hwloc_obj *sock, *core, *thread;
hwloc_bitmap_t socketset, coreset, threadset, savedcoreset;
unsigned mysocketid, mycoreid;
int threadwithcoreid = 0;
/* look at the socket */
mysocketid = 0; /* shut-up the compiler */
sprintf(str, "%s/cpu%d/topology/physical_package_id", path, i);
hwloc_parse_sysfs_unsigned(str, &mysocketid, topology->backend_params.sysfs.root_fd);
sprintf(str, "%s/cpu%d/topology/core_siblings", path, i);
socketset = hwloc_parse_cpumap(str, topology->backend_params.sysfs.root_fd);
if (socketset && hwloc_bitmap_first(socketset) == i) {
/* first cpu in this socket, add the socket */
sock = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, mysocketid);
sock->cpuset = socketset;
hwloc_debug_1arg_bitmap("os socket %u has cpuset %s\n",
mysocketid, socketset);
hwloc_insert_object_by_cpuset(topology, sock);
socketset = NULL; /* don't free it */
}
hwloc_bitmap_free(socketset);
/* look at the core */
mycoreid = 0; /* shut-up the compiler */
sprintf(str, "%s/cpu%d/topology/core_id", path, i);
hwloc_parse_sysfs_unsigned(str, &mycoreid, topology->backend_params.sysfs.root_fd);
sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i);
coreset = hwloc_parse_cpumap(str, topology->backend_params.sysfs.root_fd);
savedcoreset = coreset; /* store it for later work-arounds */
if (coreset && hwloc_bitmap_weight(coreset) > 1) {
/* check if this is hyperthreading or different coreids */
unsigned siblingid, siblingcoreid;
hwloc_bitmap_t set = hwloc_bitmap_dup(coreset);
hwloc_bitmap_clr(set, i);
siblingid = hwloc_bitmap_first(set);
siblingcoreid = mycoreid;
sprintf(str, "%s/cpu%d/topology/core_id", path, siblingid);
hwloc_parse_sysfs_unsigned(str, &siblingcoreid, topology->backend_params.sysfs.root_fd);
threadwithcoreid = (siblingcoreid != mycoreid);
hwloc_bitmap_free(set);
}
if (coreset && (hwloc_bitmap_first(coreset) == i || threadwithcoreid)) {
/* regular core */
core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, mycoreid);
if (threadwithcoreid) {
/* amd multicore compute-unit, create one core per thread */
core->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_set(core->cpuset, i);
} else {
core->cpuset = coreset;
}
hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
mycoreid, coreset);
hwloc_insert_object_by_cpuset(topology, core);
coreset = NULL; /* don't free it */
}
/* look at the thread */
threadset = hwloc_bitmap_alloc();
hwloc_bitmap_only(threadset, i);
/* add the thread */
thread = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i);
thread->cpuset = threadset;
hwloc_debug_1arg_bitmap("thread %d has cpuset %s\n",
i, threadset);
hwloc_insert_object_by_cpuset(topology, thread);
/* look at the caches */
for(j=0; j<10; j++) {
#define SHARED_CPU_MAP_STRLEN 128
char mappath[SHARED_CPU_MAP_STRLEN];
char str2[20]; /* enough for a level number (one digit) or a type (Data/Instruction/Unified) */
struct hwloc_obj *cache;
hwloc_bitmap_t cacheset;
unsigned long kB = 0;
unsigned linesize = 0;
int depth; /* 0 for L1, .... */
/* get the cache level depth */
sprintf(mappath, "%s/cpu%d/cache/index%d/level", path, i, j);
fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
if (fgets(str2,sizeof(str2), fd))
depth = strtoul(str2, NULL, 10)-1;
else
continue;
fclose(fd);
} else
continue;
/* ignore Instruction caches */
sprintf(mappath, "%s/cpu%d/cache/index%d/type", path, i, j);
fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
if (fgets(str2, sizeof(str2), fd)) {
fclose(fd);
if (!strncmp(str2, "Instruction", 11))
continue;
} else {
fclose(fd);
continue;
}
} else
continue;
/* get the cache size */
sprintf(mappath, "%s/cpu%d/cache/index%d/size", path, i, j);
fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
if (fgets(str2,sizeof(str2), fd))
kB = atol(str2); /* in kB */
fclose(fd);
}
/* get the line size */
sprintf(mappath, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j);
fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
if (fgets(str2,sizeof(str2), fd))
linesize = atol(str2); /* in bytes */
fclose(fd);
}
sprintf(mappath, "%s/cpu%d/cache/index%d/shared_cpu_map", path, i, j);
cacheset = hwloc_parse_cpumap(mappath, topology->backend_params.sysfs.root_fd);
if (cacheset) {
if (hwloc_bitmap_weight(cacheset) < 1) {
/* mask is wrong (useful for many itaniums) */
if (savedcoreset)
/* assume it's a core-specific cache */
hwloc_bitmap_copy(cacheset, savedcoreset);
else
/* assumes it's not shared */
hwloc_bitmap_only(cacheset, i);
}
if (hwloc_bitmap_first(cacheset) == i) {
/* first cpu in this cache, add the cache */
cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
cache->attr->cache.size = kB << 10;
cache->attr->cache.depth = depth+1;
cache->attr->cache.linesize = linesize;
cache->cpuset = cacheset;
hwloc_debug_1arg_bitmap("cache depth %d has cpuset %s\n",
depth, cacheset);
hwloc_insert_object_by_cpuset(topology, cache);
cacheset = NULL; /* don't free it */
++caches_added;
}
}
hwloc_bitmap_free(cacheset);
}
hwloc_bitmap_free(coreset);
}
hwloc_bitmap_foreach_end();
if (0 == caches_added)
look_powerpc_device_tree(topology);
hwloc_bitmap_free(cpuset);
}
/* Look at Linux' /proc/cpuinfo */
# define PROCESSOR "processor"
# define PHYSID "physical id"
# define COREID "core id"
#define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */
static int
look_cpuinfo(struct hwloc_topology *topology, const char *path,
hwloc_bitmap_t online_cpuset)
{
FILE *fd;
char *str = NULL;
char *endptr;
unsigned len;
unsigned proc_physids[HWLOC_NBMAXCPUS];
unsigned osphysids[HWLOC_NBMAXCPUS];
unsigned proc_coreids[HWLOC_NBMAXCPUS];
unsigned oscoreids[HWLOC_NBMAXCPUS];
unsigned proc_osphysids[HWLOC_NBMAXCPUS];
unsigned core_osphysids[HWLOC_NBMAXCPUS];
unsigned procid_max=0;
unsigned numprocs=0;
unsigned numsockets=0;
unsigned numcores=0;
unsigned long physid;
unsigned long coreid;
unsigned missingsocket;
unsigned missingcore;
unsigned long processor = (unsigned long) -1;
unsigned i;
hwloc_bitmap_t cpuset;
hwloc_obj_t obj;
for (i = 0; i < HWLOC_NBMAXCPUS; i++) {
proc_physids[i] = -1;
osphysids[i] = -1;
proc_coreids[i] = -1;
oscoreids[i] = -1;
proc_osphysids[i] = -1;
core_osphysids[i] = -1;
}
if (!(fd=hwloc_fopen(path,"r", topology->backend_params.sysfs.root_fd)))
{
hwloc_debug("%s", "could not open /proc/cpuinfo\n");
return -1;
}
cpuset = hwloc_bitmap_alloc();
/* Just record information and count number of sockets and cores */
len = strlen(PHYSID) + 1 + 9 + 1 + 1;
str = malloc(len);
hwloc_debug("%s", "\n\n * Topology extraction from /proc/cpuinfo *\n\n");
while (fgets(str,len,fd)!=NULL)
{
# define getprocnb_begin(field, var) \
if ( !strncmp(field,str,strlen(field))) \
{ \
char *c = strchr(str, ':')+1; \
var = strtoul(c,&endptr,0); \
if (endptr==c) \
{ \
hwloc_debug("%s", "no number in "field" field of /proc/cpuinfo\n"); \
hwloc_bitmap_free(cpuset); \
free(str); \
return -1; \
} \
else if (var==ULONG_MAX) \
{ \
hwloc_debug("%s", "too big "field" number in /proc/cpuinfo\n"); \
hwloc_bitmap_free(cpuset); \
free(str); \
return -1; \
} \
hwloc_debug(field " %lu\n", var)
# define getprocnb_end() \
}
getprocnb_begin(PROCESSOR,processor);
hwloc_bitmap_set(cpuset, processor);
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, processor);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_only(obj->cpuset, processor);
hwloc_debug_2args_bitmap("cpu %u (os %lu) has cpuset %s\n",
numprocs, processor, obj->cpuset);
numprocs++;
hwloc_insert_object_by_cpuset(topology, obj);
getprocnb_end() else
getprocnb_begin(PHYSID,physid);
proc_osphysids[processor]=physid;
for (i=0; i<numsockets; i++)
if (physid == osphysids[i])
break;
proc_physids[processor]=i;
hwloc_debug("%lu on socket %u (%lx)\n", processor, i, physid);
if (i==numsockets)
osphysids[(numsockets)++] = physid;
getprocnb_end() else
getprocnb_begin(COREID,coreid);
for (i=0; i<numcores; i++)
if (coreid == oscoreids[i] && proc_osphysids[processor] == core_osphysids[i])
break;
proc_coreids[processor]=i;
if (i==numcores)
{
core_osphysids[numcores] = proc_osphysids[processor];
oscoreids[numcores] = coreid;
(numcores)++;
}
getprocnb_end()
if (str[strlen(str)-1]!='\n')
{
/* ignore end of line */
if (fscanf(fd,"%*[^\n]") == EOF)
break;
getc(fd);
}
}
fclose(fd);
free(str);
if (processor == (unsigned long) -1) {
hwloc_bitmap_free(cpuset);
return -1;
}
topology->support.discovery->pu = 1;
/* setup the final number of procs */
procid_max = processor + 1;
hwloc_bitmap_copy(online_cpuset, cpuset);
hwloc_bitmap_free(cpuset);
hwloc_debug("%u online processors found, with id max %u\n", numprocs, procid_max);
hwloc_debug_bitmap("online processor cpuset: %s\n", online_cpuset);
hwloc_debug("%s", "\n * Topology summary *\n");
hwloc_debug("%u processors (%u max id)\n", numprocs, procid_max);
/* Some buggy Linuxes don't provide numbers for processor 0, which makes us
* provide bogus information. We should rather drop it. */
missingsocket=0;
missingcore=0;
hwloc_bitmap_foreach_begin(processor, online_cpuset)
if (proc_physids[processor] == (unsigned) -1)
missingsocket=1;
if (proc_coreids[processor] == (unsigned) -1)
missingcore=1;
if (missingcore && missingsocket)
/* No usable information, no need to continue */
break;
hwloc_bitmap_foreach_end();
hwloc_debug("%u sockets%s\n", numsockets, missingsocket ? ", but some missing socket" : "");
if (!missingsocket && numsockets>0)
hwloc_setup_level(procid_max, numsockets, osphysids, proc_physids, topology, HWLOC_OBJ_SOCKET);
look_powerpc_device_tree(topology);
hwloc_debug("%u cores%s\n", numcores, missingcore ? ", but some missing core" : "");
if (!missingcore && numcores>0)
hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE);
return 0;
}
static void
hwloc__get_dmi_one_info(struct hwloc_topology *topology, hwloc_obj_t obj, const char *sysfs_name, const char *hwloc_name)
{
char sysfs_path[128];
char dmi_line[64];
char *tmp;
FILE *fd;
snprintf(sysfs_path, sizeof(sysfs_path), "/sys/class/dmi/id/%s", sysfs_name);
dmi_line[0] = '\0';
fd = hwloc_fopen(sysfs_path, "r", topology->backend_params.sysfs.root_fd);
if (fd) {
tmp = fgets(dmi_line, sizeof(dmi_line), fd);
fclose (fd);
if (tmp && dmi_line[0] != '\0') {
tmp = strchr(dmi_line, '\n');
if (tmp)
*tmp = '\0';
hwloc_debug("found %s '%s'\n", hwloc_name, dmi_line);
hwloc_add_object_info(obj, hwloc_name, dmi_line);
}
}
}
static void
hwloc__get_dmi_info(struct hwloc_topology *topology, hwloc_obj_t obj)
{
hwloc__get_dmi_one_info(topology, obj, "product_name", "DMIProductName");
hwloc__get_dmi_one_info(topology, obj, "product_version", "DMIProductVersion");
hwloc__get_dmi_one_info(topology, obj, "product_serial", "DMIProductSerial");
hwloc__get_dmi_one_info(topology, obj, "product_uuid", "DMIProductUUID");
hwloc__get_dmi_one_info(topology, obj, "board_vendor", "DMIBoardVendor");
hwloc__get_dmi_one_info(topology, obj, "board_name", "DMIBoardName");
hwloc__get_dmi_one_info(topology, obj, "board_version", "DMIBoardVersion");
hwloc__get_dmi_one_info(topology, obj, "board_serial", "DMIBoardSerial");
hwloc__get_dmi_one_info(topology, obj, "board_asset_tag", "DMIBoardAssetTag");
hwloc__get_dmi_one_info(topology, obj, "chassis_vendor", "DMIChassisVendor");
hwloc__get_dmi_one_info(topology, obj, "chassis_type", "DMIChassisType");
hwloc__get_dmi_one_info(topology, obj, "chassis_version", "DMIChassisVersion");
hwloc__get_dmi_one_info(topology, obj, "chassis_serial", "DMIChassisSerial");
hwloc__get_dmi_one_info(topology, obj, "chassis_asset_tag", "DMIChassisAssetTag");
hwloc__get_dmi_one_info(topology, obj, "bios_vendor", "DMIBIOSVendor");
hwloc__get_dmi_one_info(topology, obj, "bios_version", "DMIBIOSVersion");
hwloc__get_dmi_one_info(topology, obj, "bios_date", "DMIBIOSDate");
hwloc__get_dmi_one_info(topology, obj, "sys_vendor", "DMISysVendor");
}
void
hwloc_look_linux(struct hwloc_topology *topology)
{
DIR *nodes_dir;
unsigned nbnodes;
char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL;
int err;
/* Gather the list of admin-disabled cpus and mems */
hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, topology->backend_params.sysfs.root_fd);
if (cgroup_mntpnt || cpuset_mntpnt) {
cpuset_name = hwloc_read_linux_cpuset_name(topology->backend_params.sysfs.root_fd, topology->pid);
if (cpuset_name) {
hwloc_admin_disable_set_from_cpuset(topology, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset);
hwloc_admin_disable_set_from_cpuset(topology, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset);
}
free(cgroup_mntpnt);
free(cpuset_mntpnt);
}
nodes_dir = hwloc_opendir("/proc/nodes", topology->backend_params.sysfs.root_fd);
if (nodes_dir) {
/* Kerrighed */
struct dirent *dirent;
char path[128];
hwloc_obj_t machine;
hwloc_bitmap_t machine_online_set;
/* replace top-level object type with SYSTEM and add some MACHINE underneath */
topology->levels[0][0]->type = HWLOC_OBJ_SYSTEM;
topology->levels[0][0]->name = strdup("Kerrighed");
/* No cpuset support for now. */
/* No sys support for now. */
while ((dirent = readdir(nodes_dir)) != NULL) {
unsigned long node;
if (strncmp(dirent->d_name, "node", 4))
continue;
machine_online_set = hwloc_bitmap_alloc();
node = strtoul(dirent->d_name+4, NULL, 0);
snprintf(path, sizeof(path), "/proc/nodes/node%lu/cpuinfo", node);
err = look_cpuinfo(topology, path, machine_online_set);
if (err < 0)
continue;
hwloc_bitmap_or(topology->levels[0][0]->online_cpuset, topology->levels[0][0]->online_cpuset, machine_online_set);
machine = hwloc_alloc_setup_object(HWLOC_OBJ_MACHINE, node);
machine->cpuset = machine_online_set;
hwloc_debug_1arg_bitmap("machine number %lu has cpuset %s\n",
node, machine_online_set);
hwloc_insert_object_by_cpuset(topology, machine);
/* Get the machine memory attributes */
hwloc_get_kerrighed_node_meminfo_info(topology, node, &machine->memory);
/* Gather DMI info */
/* FIXME: get the right DMI info of each machine */
hwloc__get_dmi_info(topology, machine);
}
closedir(nodes_dir);
} else {
/* Get the machine memory attributes */
hwloc_get_procfs_meminfo_info(topology, &topology->levels[0][0]->memory);
/* Gather NUMA information. Must be after hwloc_get_procfs_meminfo_info so that the hugepage size is known */
look_sysfsnode(topology, "/sys/devices/system/node", &nbnodes);
/* if we found some numa nodes, the machine object has no local memory */
if (nbnodes) {
unsigned i;
topology->levels[0][0]->memory.local_memory = 0;
if (topology->levels[0][0]->memory.page_types)
for(i=0; i<topology->levels[0][0]->memory.page_types_len; i++)
topology->levels[0][0]->memory.page_types[i].count = 0;
}
/* Gather the list of cpus now */
if (getenv("HWLOC_LINUX_USE_CPUINFO")
|| (hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, topology->backend_params.sysfs.root_fd) < 0
&& hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, topology->backend_params.sysfs.root_fd) < 0)) {
/* revert to reading cpuinfo only if /sys/.../topology unavailable (before 2.6.16)
* or not containing anything interesting */
err = look_cpuinfo(topology, "/proc/cpuinfo", topology->levels[0][0]->online_cpuset);
if (err < 0) {
if (topology->is_thissystem)
hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));
else
/* fsys-root but not this system, no way, assume there's just 1
* processor :/ */
hwloc_setup_pu_level(topology, 1);
}
} else {
look_sysfscpu(topology, "/sys/devices/system/cpu");
}
/* Gather DMI info */
hwloc__get_dmi_info(topology, topology->levels[0][0]);
}
hwloc_add_object_info(topology->levels[0][0], "Backend", "Linux");
if (cpuset_name) {
hwloc_add_object_info(topology->levels[0][0], "LinuxCgroup", cpuset_name);
free(cpuset_name);
}
/* gather uname info if fsroot wasn't changed */
if (topology->is_thissystem)
hwloc_add_uname_info(topology);
}
void
hwloc_set_linux_hooks(struct hwloc_topology *topology)
{
topology->set_thisthread_cpubind = hwloc_linux_set_thisthread_cpubind;
topology->get_thisthread_cpubind = hwloc_linux_get_thisthread_cpubind;
topology->set_thisproc_cpubind = hwloc_linux_set_thisproc_cpubind;
topology->get_thisproc_cpubind = hwloc_linux_get_thisproc_cpubind;
topology->set_proc_cpubind = hwloc_linux_set_proc_cpubind;
topology->get_proc_cpubind = hwloc_linux_get_proc_cpubind;
#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
topology->set_thread_cpubind = hwloc_linux_set_thread_cpubind;
#endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */
#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
topology->get_thread_cpubind = hwloc_linux_get_thread_cpubind;
#endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */
topology->get_thisthread_last_cpu_location = hwloc_linux_get_thisthread_last_cpu_location;
topology->get_thisproc_last_cpu_location = hwloc_linux_get_thisproc_last_cpu_location;
topology->get_proc_last_cpu_location = hwloc_linux_get_proc_last_cpu_location;
#ifdef HWLOC_HAVE_SET_MEMPOLICY
topology->set_thisthread_membind = hwloc_linux_set_thisthread_membind;
topology->get_thisthread_membind = hwloc_linux_get_thisthread_membind;
#endif /* HWLOC_HAVE_SET_MEMPOLICY */
#ifdef HWLOC_HAVE_MBIND
topology->set_area_membind = hwloc_linux_set_area_membind;
topology->alloc_membind = hwloc_linux_alloc_membind;
topology->alloc = hwloc_alloc_mmap;
topology->free_membind = hwloc_free_mmap;
topology->support.membind->firsttouch_membind = 1;
topology->support.membind->bind_membind = 1;
topology->support.membind->interleave_membind = 1;
#endif /* HWLOC_HAVE_MBIND */
#if (defined HWLOC_HAVE_MIGRATE_PAGES) || ((defined HWLOC_HAVE_MBIND) && (defined MPOL_MF_MOVE))
topology->support.membind->migrate_membind = 1;
#endif
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <numa.h>
#include <radset.h>
#include <cpuset.h>
#include <sys/mman.h>
/*
* TODO
*
* nsg_init(), nsg_attach_pid(), RAD_MIGRATE/RAD_WAIT
* assign_pid_to_pset()
*
* pthread_use_only_cpu too?
*/
static int
prepare_radset(hwloc_topology_t topology, radset_t *radset, hwloc_const_bitmap_t hwloc_set)
{
unsigned cpu;
cpuset_t target_cpuset;
cpuset_t cpuset, xor_cpuset;
radid_t radid;
int ret = 0;
int ret_errno = 0;
cpusetcreate(&target_cpuset);
cpuemptyset(target_cpuset);
hwloc_bitmap_foreach_begin(cpu, hwloc_set)
cpuaddset(target_cpuset, cpu);
hwloc_bitmap_foreach_end();
cpusetcreate(&cpuset);
cpusetcreate(&xor_cpuset);
for (radid = 0; radid < topology->backend_params.osf.nbnodes; radid++) {
cpuemptyset(cpuset);
if (rad_get_cpus(radid, cpuset)==-1) {
fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno));
continue;
}
cpuxorset(target_cpuset, cpuset, xor_cpuset);
if (cpucountset(xor_cpuset) == 0) {
/* Found it */
radsetcreate(radset);
rademptyset(*radset);
radaddset(*radset, radid);
ret = 1;
goto out;
}
}
/* radset containing exactly this set of CPUs not found */
ret_errno = EXDEV;
out:
cpusetdestroy(&target_cpuset);
cpusetdestroy(&cpuset);
cpusetdestroy(&xor_cpuset);
errno = ret_errno;
return ret;
}
/* Note: get_cpubind not available on OSF */
static int
hwloc_osf_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags)
{
radset_t radset;
if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
if ((errno = pthread_rad_detach(thread)))
return -1;
return 0;
}
/* Apparently OSF migrates pages */
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
if (!prepare_radset(topology, &radset, hwloc_set))
return -1;
if (flags & HWLOC_CPUBIND_STRICT) {
if ((errno = pthread_rad_bind(thread, radset, RAD_INSIST | RAD_WAIT)))
return -1;
} else {
if ((errno = pthread_rad_attach(thread, radset, RAD_WAIT)))
return -1;
}
radsetdestroy(&radset);
return 0;
}
static int
hwloc_osf_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
{
radset_t radset;
if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
if (rad_detach_pid(pid))
return -1;
return 0;
}
/* Apparently OSF migrates pages */
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
if (!prepare_radset(topology, &radset, hwloc_set))
return -1;
if (flags & HWLOC_CPUBIND_STRICT) {
if (rad_bind_pid(pid, radset, RAD_INSIST | RAD_WAIT))
return -1;
} else {
if (rad_attach_pid(pid, radset, RAD_WAIT))
return -1;
}
radsetdestroy(&radset);
return 0;
}
static int
hwloc_osf_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_osf_set_thread_cpubind(topology, pthread_self(), hwloc_set, flags);
}
static int
hwloc_osf_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_osf_set_proc_cpubind(topology, getpid(), hwloc_set, flags);
}
static int
hwloc_osf_prepare_mattr(hwloc_topology_t topology __hwloc_attribute_unused, memalloc_attr_t *mattr, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags __hwloc_attribute_unused)
{
unsigned long osf_policy;
int node;
switch (policy) {
case HWLOC_MEMBIND_FIRSTTOUCH:
osf_policy = MPOL_THREAD;
break;
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
osf_policy = MPOL_DIRECTED;
break;
case HWLOC_MEMBIND_INTERLEAVE:
osf_policy = MPOL_STRIPPED;
break;
case HWLOC_MEMBIND_REPLICATE:
osf_policy = MPOL_REPLICATED;
break;
default:
errno = ENOSYS;
return -1;
}
memset(mattr, 0, sizeof(*mattr));
mattr->mattr_policy = osf_policy;
mattr->mattr_rad = RAD_NONE;
radsetcreate(&mattr->mattr_radset);
rademptyset(mattr->mattr_radset);
hwloc_bitmap_foreach_begin(node, nodeset)
radaddset(mattr->mattr_radset, node);
hwloc_bitmap_foreach_end();
return 0;
}
static int
hwloc_osf_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
memalloc_attr_t mattr;
int behavior = 0;
int ret;
if (flags & HWLOC_MEMBIND_MIGRATE)
behavior |= MADV_CURRENT;
if (flags & HWLOC_MEMBIND_STRICT)
behavior |= MADV_INSIST;
if (hwloc_osf_prepare_mattr(topology, &mattr, nodeset, policy, flags))
return -1;
ret = nmadvise(addr, len, MADV_CURRENT, &mattr);
radsetdestroy(&mattr.mattr_radset);
return ret;
}
static void *
hwloc_osf_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
memalloc_attr_t mattr;
void *ptr;
if (hwloc_osf_prepare_mattr(topology, &mattr, nodeset, policy, flags))
return hwloc_alloc_or_fail(topology, len, flags);
/* TODO: rather use acreate/amalloc ? */
ptr = nmmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0, &mattr);
radsetdestroy(&mattr.mattr_radset);
return ptr;
}
void
hwloc_look_osf(struct hwloc_topology *topology)
{
cpu_cursor_t cursor;
unsigned nbnodes;
radid_t radid, radid2;
radset_t radset, radset2;
cpuid_t cpuid;
cpuset_t cpuset;
struct hwloc_obj *obj;
unsigned distance;
topology->backend_params.osf.nbnodes = nbnodes = rad_get_num();
cpusetcreate(&cpuset);
radsetcreate(&radset);
radsetcreate(&radset2);
{
hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t));
unsigned *indexes = calloc(nbnodes, sizeof(unsigned));
float *distances = calloc(nbnodes*nbnodes, sizeof(float));
unsigned nfound;
numa_attr_t attr;
attr.nattr_type = R_RAD;
attr.nattr_descr.rd_radset = radset;
attr.nattr_flags = 0;
for (radid = 0; radid < (radid_t) nbnodes; radid++) {
rademptyset(radset);
radaddset(radset, radid);
cpuemptyset(cpuset);
if (rad_get_cpus(radid, cpuset)==-1) {
fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno));
continue;
}
indexes[radid] = radid;
nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid);
obj->cpuset = hwloc_bitmap_alloc();
obj->memory.local_memory = rad_get_physmem(radid) * getpagesize();
obj->memory.page_types_len = 2;
obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
obj->memory.page_types[0].size = getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
cursor = SET_CURSOR_INIT;
while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE)
hwloc_bitmap_set(obj->cpuset, cpuid);
hwloc_debug_1arg_bitmap("node %d has cpuset %s\n",
radid, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
nfound = 0;
for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++)
distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE;
for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) {
attr.nattr_distance = distance;
/* get set of NUMA nodes at distance <= DISTANCE */
if (nloc(&attr, radset2)) {
fprintf(stderr,"nloc failed: %s\n", strerror(errno));
continue;
}
cursor = SET_CURSOR_INIT;
while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) {
if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) {
distances[radid*nbnodes+radid2] = (float) distance;
nfound++;
}
}
if (nfound == nbnodes)
/* Finished finding distances, no need to go up to RAD_DIST_REMOTE */
break;
}
}
hwloc_topology__set_distance_matrix(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances);
}
radsetdestroy(&radset2);
radsetdestroy(&radset);
cpusetdestroy(&cpuset);
/* add PU objects */
hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));
hwloc_add_object_info(topology->levels[0][0], "Backend", "OSF");
}
void
hwloc_set_osf_hooks(struct hwloc_topology *topology)
{
topology->set_thread_cpubind = hwloc_osf_set_thread_cpubind;
topology->set_thisthread_cpubind = hwloc_osf_set_thisthread_cpubind;
topology->set_proc_cpubind = hwloc_osf_set_proc_cpubind;
topology->set_thisproc_cpubind = hwloc_osf_set_thisproc_cpubind;
topology->set_area_membind = hwloc_osf_set_area_membind;
topology->alloc_membind = hwloc_osf_alloc_membind;
topology->alloc = hwloc_alloc_mmap;
topology->free_membind = hwloc_free_mmap;
topology->support.membind->firsttouch_membind = 1;
topology->support.membind->bind_membind = 1;
topology->support.membind->interleave_membind = 1;
topology->support.membind->replicate_membind = 1;
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <stdio.h>
#include <errno.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/processor.h>
#include <sys/procset.h>
#include <sys/types.h>
#include <sys/mman.h>
#ifdef HAVE_LIBLGRP
# include <sys/lgrp_user.h>
#endif
/* TODO: use psets? (only for root)
* TODO: get cache info from prtdiag? (it is setgid sys to be able to read from
* crw-r----- 1 root sys 88, 0 nov 3 14:35 /devices/pseudo/devinfo@0:devinfo
* and run (apparently undocumented) ioctls on it.
*/
static int
hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags)
{
unsigned target_cpu;
/* The resulting binding is always strict */
if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0)
return -1;
#ifdef HAVE_LIBLGRP
if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
if (depth >= 0) {
int n = hwloc_get_nbobjs_by_depth(topology, depth);
int i;
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
}
}
}
#endif /* HAVE_LIBLGRP */
return 0;
}
#ifdef HAVE_LIBLGRP
if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
if (depth >= 0) {
int n = hwloc_get_nbobjs_by_depth(topology, depth);
int i;
int ok;
hwloc_bitmap_t target = hwloc_bitmap_alloc();
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
hwloc_bitmap_or(target, target, obj->cpuset);
}
ok = hwloc_bitmap_isequal(target, hwloc_set);
hwloc_bitmap_free(target);
if (ok) {
/* Ok, managed to achieve hwloc_set by just combining NUMA nodes */
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) {
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
} else {
if (flags & HWLOC_CPUBIND_STRICT)
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
else
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
}
}
return 0;
}
}
}
#endif /* HAVE_LIBLGRP */
if (hwloc_bitmap_weight(hwloc_set) != 1) {
errno = EXDEV;
return -1;
}
target_cpu = hwloc_bitmap_first(hwloc_set);
if (processor_bind(idtype, id,
(processorid_t) (target_cpu), NULL) != 0)
return -1;
return 0;
}
static int
hwloc_solaris_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_set_sth_cpubind(topology, P_PID, pid, hwloc_set, flags);
}
static int
hwloc_solaris_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_set_sth_cpubind(topology, P_PID, P_MYID, hwloc_set, flags);
}
static int
hwloc_solaris_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_set_sth_cpubind(topology, P_LWPID, P_MYID, hwloc_set, flags);
}
#ifdef HAVE_LIBLGRP
static int
hwloc_solaris_get_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
int n;
int i;
if (depth < 0) {
errno = ENOSYS;
return -1;
}
hwloc_bitmap_zero(hwloc_set);
n = hwloc_get_nbobjs_by_depth(topology, depth);
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);
if (aff == LGRP_AFF_STRONG)
hwloc_bitmap_or(hwloc_set, hwloc_set, obj->cpuset);
}
if (hwloc_bitmap_iszero(hwloc_set))
hwloc_bitmap_copy(hwloc_set, hwloc_topology_get_complete_cpuset(topology));
return 0;
}
static int
hwloc_solaris_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_get_sth_cpubind(topology, P_PID, pid, hwloc_set, flags);
}
static int
hwloc_solaris_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_get_sth_cpubind(topology, P_PID, P_MYID, hwloc_set, flags);
}
static int
hwloc_solaris_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
{
return hwloc_solaris_get_sth_cpubind(topology, P_LWPID, P_MYID, hwloc_set, flags);
}
#endif /* HAVE_LIBLGRP */
/* TODO: given thread, probably not easy because of the historical n:m implementation */
#ifdef HAVE_LIBLGRP
static int
hwloc_solaris_set_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int depth;
int n, i;
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
break;
default:
errno = ENOSYS;
return -1;
}
if (flags & HWLOC_MEMBIND_NOCPUBIND) {
errno = ENOSYS;
return -1;
}
depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
if (depth < 0) {
errno = EXDEV;
return -1;
}
n = hwloc_get_nbobjs_by_depth(topology, depth);
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
if (hwloc_bitmap_isset(nodeset, obj->os_index)) {
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
} else {
if (flags & HWLOC_CPUBIND_STRICT)
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
else
lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
}
}
return 0;
}
static int
hwloc_solaris_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
return hwloc_solaris_set_sth_membind(topology, P_PID, pid, nodeset, policy, flags);
}
static int
hwloc_solaris_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
return hwloc_solaris_set_sth_membind(topology, P_PID, P_MYID, nodeset, policy, flags);
}
static int
hwloc_solaris_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
return hwloc_solaris_set_sth_membind(topology, P_LWPID, P_MYID, nodeset, policy, flags);
}
static int
hwloc_solaris_get_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
{
int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
int n;
int i;
if (depth < 0) {
errno = ENOSYS;
return -1;
}
hwloc_bitmap_zero(nodeset);
n = hwloc_get_nbobjs_by_depth(topology, depth);
for (i = 0; i < n; i++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);
if (aff == LGRP_AFF_STRONG)
hwloc_bitmap_set(nodeset, obj->os_index);
}
if (hwloc_bitmap_iszero(nodeset))
hwloc_bitmap_copy(nodeset, hwloc_topology_get_complete_nodeset(topology));
*policy = HWLOC_MEMBIND_DEFAULT;
return 0;
}
static int
hwloc_solaris_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
{
return hwloc_solaris_get_sth_membind(topology, P_PID, pid, nodeset, policy, flags);
}
static int
hwloc_solaris_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
{
return hwloc_solaris_get_sth_membind(topology, P_PID, P_MYID, nodeset, policy, flags);
}
static int
hwloc_solaris_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
{
return hwloc_solaris_get_sth_membind(topology, P_LWPID, P_MYID, nodeset, policy, flags);
}
#endif /* HAVE_LIBLGRP */
#ifdef MADV_ACCESS_LWP
static int
hwloc_solaris_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags __hwloc_attribute_unused)
{
int advice;
size_t remainder;
/* Can not give a set of nodes just for an area. */
if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) {
errno = EXDEV;
return -1;
}
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
advice = MADV_ACCESS_DEFAULT;
break;
case HWLOC_MEMBIND_FIRSTTOUCH:
case HWLOC_MEMBIND_NEXTTOUCH:
advice = MADV_ACCESS_LWP;
break;
case HWLOC_MEMBIND_INTERLEAVE:
advice = MADV_ACCESS_MANY;
break;
default:
errno = ENOSYS;
return -1;
}
remainder = (uintptr_t) addr & (sysconf(_SC_PAGESIZE)-1);
addr = (char*) addr - remainder;
len += remainder;
return madvise((void*) addr, len, advice);
}
#endif
#ifdef HAVE_LIBLGRP
static void
browse(struct hwloc_topology *topology, lgrp_cookie_t cookie, lgrp_id_t lgrp, hwloc_obj_t *glob_lgrps, unsigned *curlgrp)
{
int n;
hwloc_obj_t obj;
lgrp_mem_size_t mem_size;
n = lgrp_cpus(cookie, lgrp, NULL, 0, LGRP_CONTENT_HIERARCHY);
if (n == -1)
return;
/* Is this lgrp a NUMA node? */
if ((mem_size = lgrp_mem_size(cookie, lgrp, LGRP_MEM_SZ_INSTALLED, LGRP_CONTENT_DIRECT)) > 0)
{
int i;
processorid_t *cpuids;
cpuids = malloc(sizeof(processorid_t) * n);
assert(cpuids != NULL);
obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, lgrp);
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, lgrp);
obj->cpuset = hwloc_bitmap_alloc();
glob_lgrps[(*curlgrp)++] = obj;
lgrp_cpus(cookie, lgrp, cpuids, n, LGRP_CONTENT_HIERARCHY);
for (i = 0; i < n ; i++) {
hwloc_debug("node %ld's cpu %d is %d\n", lgrp, i, cpuids[i]);
hwloc_bitmap_set(obj->cpuset, cpuids[i]);
}
hwloc_debug_1arg_bitmap("node %ld has cpuset %s\n",
lgrp, obj->cpuset);
/* or LGRP_MEM_SZ_FREE */
hwloc_debug("node %ld has %lldkB\n", lgrp, mem_size/1024);
obj->memory.local_memory = mem_size;
obj->memory.page_types_len = 2;
obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
obj->memory.page_types[0].size = getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
hwloc_insert_object_by_cpuset(topology, obj);
free(cpuids);
}
n = lgrp_children(cookie, lgrp, NULL, 0);
{
lgrp_id_t *lgrps;
int i;
lgrps = malloc(sizeof(lgrp_id_t) * n);
assert(lgrps != NULL);
lgrp_children(cookie, lgrp, lgrps, n);
hwloc_debug("lgrp %ld has %d children\n", lgrp, n);
for (i = 0; i < n ; i++)
{
browse(topology, cookie, lgrps[i], glob_lgrps, curlgrp);
}
hwloc_debug("lgrp %ld's children done\n", lgrp);
free(lgrps);
}
}
static void
hwloc_look_lgrp(struct hwloc_topology *topology)
{
lgrp_cookie_t cookie;
unsigned curlgrp = 0;
int nlgrps;
lgrp_id_t root;
if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
cookie = lgrp_init(LGRP_VIEW_OS);
else
cookie = lgrp_init(LGRP_VIEW_CALLER);
if (cookie == LGRP_COOKIE_NONE)
{
hwloc_debug("lgrp_init failed: %s\n", strerror(errno));
return;
}
nlgrps = lgrp_nlgrps(cookie);
root = lgrp_root(cookie);
{
hwloc_obj_t *glob_lgrps = calloc(nlgrps, sizeof(hwloc_obj_t));
browse(topology, cookie, root, glob_lgrps, &curlgrp);
#ifdef HAVE_LGRP_LATENCY_COOKIE
{
float *distances = calloc(curlgrp*curlgrp, sizeof(float));
unsigned *indexes = calloc(curlgrp,sizeof(unsigned));
unsigned i, j;
for (i = 0; i < curlgrp; i++) {
indexes[i] = glob_lgrps[i]->os_index;
for (j = 0; j < curlgrp; j++)
distances[i*curlgrp+j] = (float) lgrp_latency_cookie(cookie, glob_lgrps[i]->os_index, glob_lgrps[j]->os_index, LGRP_LAT_CPU_TO_MEM);
}
hwloc_topology__set_distance_matrix(topology, HWLOC_OBJ_NODE, curlgrp, indexes, glob_lgrps, distances);
}
#endif /* HAVE_LGRP_LATENCY_COOKIE */
}
lgrp_fini(cookie);
}
#endif /* LIBLGRP */
#ifdef HAVE_LIBKSTAT
#include <kstat.h>
#define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */
static int
hwloc_look_kstat(struct hwloc_topology *topology)
{
kstat_ctl_t *kc = kstat_open();
kstat_t *ksp;
kstat_named_t *stat;
unsigned look_cores = 1, look_chips = 1;
unsigned numsockets = 0;
unsigned proc_physids[HWLOC_NBMAXCPUS];
unsigned proc_osphysids[HWLOC_NBMAXCPUS];
unsigned osphysids[HWLOC_NBMAXCPUS];
unsigned numcores = 0;
unsigned proc_coreids[HWLOC_NBMAXCPUS];
unsigned oscoreids[HWLOC_NBMAXCPUS];
unsigned core_osphysids[HWLOC_NBMAXCPUS];
unsigned numprocs = 0;
unsigned proc_procids[HWLOC_NBMAXCPUS];
unsigned osprocids[HWLOC_NBMAXCPUS];
unsigned physid, coreid, cpuid;
unsigned procid_max = 0;
unsigned i;
for (cpuid = 0; cpuid < HWLOC_NBMAXCPUS; cpuid++)
{
proc_procids[cpuid] = -1;
proc_physids[cpuid] = -1;
proc_osphysids[cpuid] = -1;
proc_coreids[cpuid] = -1;
}
if (!kc)
{
hwloc_debug("kstat_open failed: %s\n", strerror(errno));
return 0;
}
for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
{
if (strncmp("cpu_info", ksp->ks_module, 8))
continue;
cpuid = ksp->ks_instance;
if (cpuid > HWLOC_NBMAXCPUS)
{
fprintf(stderr,"CPU id too big: %u\n", cpuid);
continue;
}
if (kstat_read(kc, ksp, NULL) == -1)
{
fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno));
continue;
}
hwloc_debug("cpu%u\n", cpuid);
proc_procids[cpuid] = numprocs;
osprocids[numprocs] = cpuid;
numprocs++;
if (cpuid >= procid_max)
procid_max = cpuid + 1;
stat = (kstat_named_t *) kstat_data_lookup(ksp, "state");
if (!stat)
hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno));
else if (stat->data_type != KSTAT_DATA_CHAR)
hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type);
else
{
hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c);
if (strcmp(stat->value.c, "on-line"))
/* not online */
hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpuid);
}
if (look_chips) do {
/* Get Chip ID */
stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id");
if (!stat)
{
if (numsockets)
fprintf(stderr, "could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
else
hwloc_debug("could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
look_chips = 0;
continue;
}
switch (stat->data_type) {
case KSTAT_DATA_INT32:
physid = stat->value.i32;
break;
case KSTAT_DATA_UINT32:
physid = stat->value.ui32;
break;
#ifdef _INT64_TYPE
case KSTAT_DATA_UINT64:
physid = stat->value.ui64;
break;
case KSTAT_DATA_INT64:
physid = stat->value.i64;
break;
#endif
default:
fprintf(stderr, "chip_id type %d unknown\n", stat->data_type);
look_chips = 0;
continue;
}
proc_osphysids[cpuid] = physid;
for (i = 0; i < numsockets; i++)
if (physid == osphysids[i])
break;
proc_physids[cpuid] = i;
hwloc_debug("%u on socket %u (%u)\n", cpuid, i, physid);
if (i == numsockets)
osphysids[numsockets++] = physid;
} while(0);
if (look_cores) do {
/* Get Core ID */
stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id");
if (!stat)
{
if (numcores)
fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
else
hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
look_cores = 0;
continue;
}
switch (stat->data_type) {
case KSTAT_DATA_INT32:
coreid = stat->value.i32;
break;
case KSTAT_DATA_UINT32:
coreid = stat->value.ui32;
break;
#ifdef _INT64_TYPE
case KSTAT_DATA_UINT64:
coreid = stat->value.ui64;
break;
case KSTAT_DATA_INT64:
coreid = stat->value.i64;
break;
#endif
default:
fprintf(stderr, "core_id type %d unknown\n", stat->data_type);
look_cores = 0;
continue;
}
for (i = 0; i < numcores; i++)
if (coreid == oscoreids[i] && proc_osphysids[cpuid] == core_osphysids[i])
break;
proc_coreids[cpuid] = i;
hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid);
if (i == numcores)
{
core_osphysids[numcores] = proc_osphysids[cpuid];
oscoreids[numcores++] = coreid;
}
} while(0);
/* Note: there is also clog_id for the Thread ID (not unique) and
* pkg_core_id for the core ID (not unique). They are not useful to us
* however. */
}
if (look_chips)
hwloc_setup_level(procid_max, numsockets, osphysids, proc_physids, topology, HWLOC_OBJ_SOCKET);
if (look_cores)
hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE);
if (numprocs)
hwloc_setup_level(procid_max, numprocs, osprocids, proc_procids, topology, HWLOC_OBJ_PU);
kstat_close(kc);
return numprocs > 0;
}
#endif /* LIBKSTAT */
void
hwloc_look_solaris(struct hwloc_topology *topology)
{
unsigned nbprocs = hwloc_fallback_nbprocessors (topology);
#ifdef HAVE_LIBLGRP
hwloc_look_lgrp(topology);
#endif /* HAVE_LIBLGRP */
#ifdef HAVE_LIBKSTAT
nbprocs = 0;
if (hwloc_look_kstat(topology))
return;
#endif /* HAVE_LIBKSTAT */
hwloc_setup_pu_level(topology, nbprocs);
hwloc_add_object_info(topology->levels[0][0], "Backend", "Solaris");
}
void
hwloc_set_solaris_hooks(struct hwloc_topology *topology)
{
topology->set_proc_cpubind = hwloc_solaris_set_proc_cpubind;
topology->set_thisproc_cpubind = hwloc_solaris_set_thisproc_cpubind;
topology->set_thisthread_cpubind = hwloc_solaris_set_thisthread_cpubind;
#ifdef HAVE_LIBLGRP
topology->get_proc_cpubind = hwloc_solaris_get_proc_cpubind;
topology->get_thisproc_cpubind = hwloc_solaris_get_thisproc_cpubind;
topology->get_thisthread_cpubind = hwloc_solaris_get_thisthread_cpubind;
topology->set_proc_membind = hwloc_solaris_set_proc_membind;
topology->set_thisproc_membind = hwloc_solaris_set_thisproc_membind;
topology->set_thisthread_membind = hwloc_solaris_set_thisthread_membind;
topology->get_proc_membind = hwloc_solaris_get_proc_membind;
topology->get_thisproc_membind = hwloc_solaris_get_thisproc_membind;
topology->get_thisthread_membind = hwloc_solaris_get_thisthread_membind;
#endif /* HAVE_LIBLGRP */
#ifdef MADV_ACCESS_LWP
topology->set_area_membind = hwloc_solaris_set_area_membind;
topology->support.membind->firsttouch_membind = 1;
topology->support.membind->bind_membind = 1;
topology->support.membind->interleave_membind = 1;
topology->support.membind->nexttouch_membind = 1;
#endif
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/misc.h>
#include <private/debug.h>
#include <limits.h>
#include <assert.h>
#include <strings.h>
/* Read from DESCRIPTION a series of integers describing a symmetrical
topology and update `topology->synthetic_description' accordingly. On
success, return zero. */
int
hwloc_backend_synthetic_init(struct hwloc_topology *topology, const char *description)
{
const char *pos, *next_pos;
unsigned long item, count;
unsigned i;
int cache_depth = 0, group_depth = 0;
int nb_machine_levels = 0, nb_node_levels = 0;
int nb_pu_levels = 0;
assert(topology->backend_type == HWLOC_BACKEND_NONE);
for (pos = description, count = 1; *pos; pos = next_pos) {
#define HWLOC_OBJ_TYPE_UNKNOWN ((hwloc_obj_type_t) -1)
hwloc_obj_type_t type = HWLOC_OBJ_TYPE_UNKNOWN;
while (*pos == ' ')
pos++;
if (!*pos)
break;
if (*pos < '0' || *pos > '9') {
if (!hwloc_namecoloncmp(pos, "machines", 2)) {
type = HWLOC_OBJ_MACHINE;
} else if (!hwloc_namecoloncmp(pos, "nodes", 1))
type = HWLOC_OBJ_NODE;
else if (!hwloc_namecoloncmp(pos, "sockets", 1))
type = HWLOC_OBJ_SOCKET;
else if (!hwloc_namecoloncmp(pos, "cores", 2))
type = HWLOC_OBJ_CORE;
else if (!hwloc_namecoloncmp(pos, "caches", 2))
type = HWLOC_OBJ_CACHE;
else if (!hwloc_namecoloncmp(pos, "pus", 1) || !hwloc_namecoloncmp(pos, "procs", 1) /* backward compatiblity with 0.9 */)
type = HWLOC_OBJ_PU;
else if (!hwloc_namecoloncmp(pos, "misc", 2))
type = HWLOC_OBJ_MISC;
else if (!hwloc_namecoloncmp(pos, "group", 2))
type = HWLOC_OBJ_GROUP;
else
fprintf(stderr, "Unknown object type `%s'\n", pos);
next_pos = strchr(pos, ':');
if (!next_pos) {
fprintf(stderr,"synthetic string doesn't have a `:' after object type at '%s'\n", pos);
errno = EINVAL;
return -1;
}
pos = next_pos + 1;
}
item = strtoul(pos, (char **)&next_pos, 0);
if (next_pos == pos) {
fprintf(stderr,"synthetic string doesn't have a number of objects at '%s'\n", pos);
errno = EINVAL;
return -1;
}
if (count + 1 >= HWLOC_SYNTHETIC_MAX_DEPTH) {
fprintf(stderr,"Too many synthetic levels, max %d\n", HWLOC_SYNTHETIC_MAX_DEPTH);
errno = EINVAL;
return -1;
}
if (item > UINT_MAX) {
fprintf(stderr,"Too big arity, max %u\n", UINT_MAX);
errno = EINVAL;
return -1;
}
topology->backend_params.synthetic.arity[count-1] = (unsigned)item;
topology->backend_params.synthetic.type[count] = type;
count++;
}
if (count <= 0) {
fprintf(stderr,"synthetic string doesn't contain any object\n");
errno = EINVAL;
return -1;
}
for(i=count-1; i>0; i--) {
hwloc_obj_type_t type;
type = topology->backend_params.synthetic.type[i];
if (type == HWLOC_OBJ_TYPE_UNKNOWN) {
if (i == count-1)
type = HWLOC_OBJ_PU;
else {
switch (topology->backend_params.synthetic.type[i+1]) {
case HWLOC_OBJ_PU: type = HWLOC_OBJ_CORE; break;
case HWLOC_OBJ_CORE: type = HWLOC_OBJ_CACHE; break;
case HWLOC_OBJ_CACHE: type = HWLOC_OBJ_SOCKET; break;
case HWLOC_OBJ_SOCKET: type = HWLOC_OBJ_NODE; break;
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break;
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_MISC: type = HWLOC_OBJ_MISC; break;
default:
assert(0);
}
}
topology->backend_params.synthetic.type[i] = type;
}
switch (type) {
case HWLOC_OBJ_PU:
if (nb_pu_levels) {
fprintf(stderr,"synthetic string can not have several PU levels\n");
errno = EINVAL;
return -1;
}
nb_pu_levels++;
break;
case HWLOC_OBJ_CACHE:
cache_depth++;
break;
case HWLOC_OBJ_GROUP:
group_depth++;
break;
case HWLOC_OBJ_NODE:
nb_node_levels++;
break;
case HWLOC_OBJ_MACHINE:
nb_machine_levels++;
break;
default:
break;
}
}
if (nb_pu_levels > 1) {
fprintf(stderr,"synthetic string can not have several PU levels\n");
errno = EINVAL;
return -1;
}
if (nb_node_levels > 1) {
fprintf(stderr,"synthetic string can not have several NUMA node levels\n");
errno = EINVAL;
return -1;
}
if (nb_machine_levels > 1) {
fprintf(stderr,"synthetic string can not have several machine levels\n");
errno = EINVAL;
return -1;
}
if (nb_machine_levels)
topology->backend_params.synthetic.type[0] = HWLOC_OBJ_SYSTEM;
else {
topology->backend_params.synthetic.type[0] = HWLOC_OBJ_MACHINE;
nb_machine_levels++;
}
if (cache_depth == 1)
/* if there is a single cache level, make it L2 */
cache_depth = 2;
for (i=0; i<count; i++) {
hwloc_obj_type_t type = topology->backend_params.synthetic.type[i];
if (type == HWLOC_OBJ_GROUP)
topology->backend_params.synthetic.depth[i] = group_depth--;
else if (type == HWLOC_OBJ_CACHE)
topology->backend_params.synthetic.depth[i] = cache_depth--;
}
topology->backend_type = HWLOC_BACKEND_SYNTHETIC;
topology->backend_params.synthetic.arity[count-1] = 0;
topology->is_thissystem = 0;
return 0;
}
void
hwloc_backend_synthetic_exit(struct hwloc_topology *topology)
{
assert(topology->backend_type == HWLOC_BACKEND_SYNTHETIC);
topology->backend_type = HWLOC_BACKEND_NONE;
}
/*
* Recursively build objects whose cpu start at first_cpu
* - level gives where to look in the type, arity and id arrays
* - the id array is used as a variable to get unique IDs for a given level.
* - generated memory should be added to *memory_kB.
* - generated cpus should be added to parent_cpuset.
* - next cpu number to be used should be returned.
*/
static unsigned
hwloc__look_synthetic(struct hwloc_topology *topology,
int level, unsigned first_cpu,
hwloc_bitmap_t parent_cpuset)
{
hwloc_obj_t obj;
unsigned i;
hwloc_obj_type_t type = topology->backend_params.synthetic.type[level];
/* pre-hooks */
switch (type) {
case HWLOC_OBJ_MISC:
break;
case HWLOC_OBJ_GROUP:
break;
case HWLOC_OBJ_SYSTEM:
/* Shouldn't happen. */
abort();
break;
case HWLOC_OBJ_MACHINE:
break;
case HWLOC_OBJ_NODE:
break;
case HWLOC_OBJ_SOCKET:
break;
case HWLOC_OBJ_CACHE:
break;
case HWLOC_OBJ_CORE:
break;
case HWLOC_OBJ_PU:
break;
case HWLOC_OBJ_TYPE_MAX:
/* Should never happen */
assert(0);
break;
}
obj = hwloc_alloc_setup_object(type, topology->backend_params.synthetic.id[level]++);
obj->cpuset = hwloc_bitmap_alloc();
if (!topology->backend_params.synthetic.arity[level]) {
hwloc_bitmap_set(obj->cpuset, first_cpu++);
} else {
for (i = 0; i < topology->backend_params.synthetic.arity[level]; i++)
first_cpu = hwloc__look_synthetic(topology, level + 1, first_cpu, obj->cpuset);
}
if (type == HWLOC_OBJ_NODE) {
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, obj->os_index);
}
hwloc_bitmap_or(parent_cpuset, parent_cpuset, obj->cpuset);
/* post-hooks */
switch (type) {
case HWLOC_OBJ_MISC:
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.depth = topology->backend_params.synthetic.depth[level];
break;
case HWLOC_OBJ_SYSTEM:
abort();
break;
case HWLOC_OBJ_MACHINE:
break;
case HWLOC_OBJ_NODE:
/* 1GB in memory nodes, 256k 4k-pages. */
obj->memory.local_memory = 1024*1024*1024;
obj->memory.page_types_len = 1;
obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, sizeof(*obj->memory.page_types));
obj->memory.page_types[0].size = 4096;
obj->memory.page_types[0].count = 256*1024;
break;
case HWLOC_OBJ_SOCKET:
break;
case HWLOC_OBJ_CACHE:
obj->attr->cache.depth = topology->backend_params.synthetic.depth[level];
obj->attr->cache.linesize = 64;
if (obj->attr->cache.depth == 1)
/* 32Kb in L1 */
obj->attr->cache.size = 32*1024;
else
/* *4 at each level, starting from 1MB for L2 */
obj->attr->cache.size = 256*1024 << (2*obj->attr->cache.depth);
break;
case HWLOC_OBJ_CORE:
break;
case HWLOC_OBJ_PU:
break;
case HWLOC_OBJ_TYPE_MAX:
/* Should never happen */
assert(0);
break;
}
hwloc_insert_object_by_cpuset(topology, obj);
return first_cpu;
}
void
hwloc_look_synthetic(struct hwloc_topology *topology)
{
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
unsigned first_cpu = 0, i;
topology->support.discovery->pu = 1;
/* start with id=0 for each level */
for (i = 0; topology->backend_params.synthetic.arity[i] > 0; i++)
topology->backend_params.synthetic.id[i] = 0;
/* ... including the last one */
topology->backend_params.synthetic.id[i] = 0;
/* update first level type according to the synthetic type array */
topology->levels[0][0]->type = topology->backend_params.synthetic.type[0];
for (i = 0; i < topology->backend_params.synthetic.arity[0]; i++)
first_cpu = hwloc__look_synthetic(topology, 1, first_cpu, cpuset);
hwloc_bitmap_free(cpuset);
hwloc_add_object_info(topology->levels[0][0], "Backend", "Synthetic");
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2010 INRIA. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
/* To try to get all declarations duplicated below. */
#define _WIN32_WINNT 0x0601
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <windows.h>
#ifndef HAVE_KAFFINITY
typedef ULONG_PTR KAFFINITY, *PKAFFINITY;
#endif
#ifndef HAVE_PROCESSOR_CACHE_TYPE
typedef enum _PROCESSOR_CACHE_TYPE {
CacheUnified,
CacheInstruction,
CacheData,
CacheTrace
} PROCESSOR_CACHE_TYPE;
#endif
#ifndef CACHE_FULLY_ASSOCIATIVE
#define CACHE_FULLY_ASSOCIATIVE 0xFF
#endif
#ifndef HAVE_CACHE_DESCRIPTOR
typedef struct _CACHE_DESCRIPTOR {
BYTE Level;
BYTE Associativity;
WORD LineSize;
DWORD Size; /* in bytes */
PROCESSOR_CACHE_TYPE Type;
} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR;
#endif
#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP
typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
RelationProcessorCore,
RelationNumaNode,
RelationCache,
RelationProcessorPackage,
RelationGroup,
RelationAll = 0xffff
} LOGICAL_PROCESSOR_RELATIONSHIP;
#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
# ifndef HAVE_RELATIONPROCESSORPACKAGE
# define RelationProcessorPackage 3
# define RelationGroup 4
# define RelationAll 0xffff
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
ULONG_PTR ProcessorMask;
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
_ANONYMOUS_UNION
union {
struct {
BYTE flags;
} ProcessorCore;
struct {
DWORD NodeNumber;
} NumaNode;
CACHE_DESCRIPTOR Cache;
ULONGLONG Reserved[2];
} DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
#endif
/* Extended interface, for group support */
#ifndef HAVE_GROUP_AFFINITY
typedef struct _GROUP_AFFINITY {
KAFFINITY Mask;
WORD Group;
WORD Reserved[3];
} GROUP_AFFINITY, *PGROUP_AFFINITY;
#endif
#ifndef HAVE_PROCESSOR_RELATIONSHIP
typedef struct _PROCESSOR_RELATIONSHIP {
BYTE Flags;
BYTE Reserved[21];
WORD GroupCount;
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
#endif
#ifndef HAVE_NUMA_NODE_RELATIONSHIP
typedef struct _NUMA_NODE_RELATIONSHIP {
DWORD NodeNumber;
BYTE Reserved[20];
GROUP_AFFINITY GroupMask;
} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
#endif
#ifndef HAVE_CACHE_RELATIONSHIP
typedef struct _CACHE_RELATIONSHIP {
BYTE Level;
BYTE Associativity;
WORD LineSize;
DWORD CacheSize;
PROCESSOR_CACHE_TYPE Type;
BYTE Reserved[20];
GROUP_AFFINITY GroupMask;
} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
#endif
#ifndef HAVE_PROCESSOR_GROUP_INFO
typedef struct _PROCESSOR_GROUP_INFO {
BYTE MaximumProcessorCount;
BYTE ActiveProcessorCount;
BYTE Reserved[38];
KAFFINITY ActiveProcessorMask;
} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
#endif
#ifndef HAVE_GROUP_RELATIONSHIP
typedef struct _GROUP_RELATIONSHIP {
WORD MaximumGroupCount;
WORD ActiveGroupCount;
ULONGLONG Reserved[2];
PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
#endif
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
DWORD Size;
_ANONYMOUS_UNION
union {
PROCESSOR_RELATIONSHIP Processor;
NUMA_NODE_RELATIONSHIP NumaNode;
CACHE_RELATIONSHIP Cache;
GROUP_RELATIONSHIP Group;
/* Odd: no member to tell the cpu mask of the package... */
} DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
#endif
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
typedef union _PSAPI_WORKING_SET_EX_BLOCK {
ULONG_PTR Flags;
struct {
unsigned Valid :1;
unsigned ShareCount :3;
unsigned Win32Protection :11;
unsigned Shared :1;
unsigned Node :6;
unsigned Locked :1;
unsigned LargePage :1;
};
} PSAPI_WORKING_SET_EX_BLOCK;
#endif
#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION
typedef struct _PSAPI_WORKING_SET_EX_INFORMATION {
PVOID VirtualAddress;
PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes;
} PSAPI_WORKING_SET_EX_INFORMATION;
#endif
/* TODO: SetThreadIdealProcessor */
static int
hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags)
{
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
/* TODO: groups SetThreadGroupAffinity */
/* The resulting binding is always strict */
DWORD mask = hwloc_bitmap_to_ulong(hwloc_set);
if (!SetThreadAffinityMask(thread, mask))
return -1;
return 0;
}
/* TODO: SetThreadGroupAffinity to get affinity */
static int
hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags);
}
static int
hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int ret;
hwloc_cpuset_t cpuset;
if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
|| flags & HWLOC_MEMBIND_NOCPUBIND) {
errno = ENOSYS;
return -1;
}
cpuset = hwloc_bitmap_alloc();
hwloc_cpuset_from_nodeset(topology, cpuset, nodeset);
ret = hwloc_win_set_thisthread_cpubind(topology, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0);
hwloc_bitmap_free(cpuset);
return ret;
}
static int
hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags)
{
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
/* TODO: groups, hard: has to manually bind all threads into the other group,
* and the bind the process inside the group */
/* The resulting binding is always strict */
DWORD mask = hwloc_bitmap_to_ulong(hwloc_set);
if (!SetProcessAffinityMask(proc, mask))
return -1;
return 0;
}
static int
hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int ret;
hwloc_cpuset_t cpuset;
if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
|| flags & HWLOC_MEMBIND_NOCPUBIND) {
errno = ENOSYS;
return -1;
}
cpuset = hwloc_bitmap_alloc();
hwloc_cpuset_from_nodeset(topology, cpuset, nodeset);
ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0);
hwloc_bitmap_free(cpuset);
return ret;
}
static int
hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags)
{
DWORD_PTR proc_mask, sys_mask;
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
/* TODO: groups, GetProcessGroupAffinity, or merge SetThreadGroupAffinity for all threads */
if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask))
return -1;
hwloc_bitmap_from_ulong(hwloc_set, proc_mask);
return 0;
}
static int
hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
int ret;
hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0);
if (!ret) {
*policy = HWLOC_MEMBIND_BIND;
hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
}
hwloc_bitmap_free(cpuset);
return ret;
}
static int
hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags);
}
static int
hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
}
static int
hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags);
}
static int
hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
}
static LPVOID WINAPI (*VirtualAllocExNumaProc)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred);
static BOOL WINAPI (*VirtualFreeExProc)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType);
static BOOL WINAPI (*QueryWorkingSetExProc)(HANDLE hProcess, PVOID pv, DWORD cb);
static int hwloc_win_get_VirtualAllocExNumaProc(void) {
if (VirtualAllocExNumaProc == NULL) {
FARPROC alloc_fun = NULL, free_fun = NULL;
HMODULE kernel32;
kernel32 = LoadLibrary("kernel32.dll");
if (kernel32) {
alloc_fun = GetProcAddress(kernel32, "VirtualAllocExNuma");
free_fun = GetProcAddress(kernel32, "VirtualFreeEx");
}
if (!alloc_fun || !free_fun) {
VirtualAllocExNumaProc = (FARPROC) -1;
errno = ENOSYS;
return -1;
}
VirtualAllocExNumaProc = alloc_fun;
VirtualFreeExProc = free_fun;
} else if ((FARPROC) VirtualAllocExNumaProc == (FARPROC)-1) {
errno = ENOSYS;
return -1;
}
return 0;
}
static void *
hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) {
return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE);
}
static void *
hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) {
int node;
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
break;
default:
errno = ENOSYS;
return hwloc_alloc_or_fail(topology, len, flags);
}
if (flags & HWLOC_MEMBIND_STRICT) {
errno = ENOSYS;
return NULL;
}
if (hwloc_bitmap_weight(nodeset) != 1) {
/* Not a single node, can't do this */
errno = EXDEV;
return hwloc_alloc_or_fail(topology, len, flags);
}
node = hwloc_bitmap_first(nodeset);
return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node);
}
static int
hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) {
if (!addr)
return 0;
if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE))
return -1;
return 0;
}
static int hwloc_win_get_QueryWorkingSetExProc(void) {
if (QueryWorkingSetExProc == NULL) {
FARPROC fun = NULL;
HMODULE kernel32, psapi;
kernel32 = LoadLibrary("kernel32.dll");
if (kernel32)
fun = GetProcAddress(kernel32, "K32QueryWorkingSetEx");
if (!fun) {
psapi = LoadLibrary("psapi.dll");
if (psapi)
fun = GetProcAddress(psapi, "QueryWorkingSetEx");
}
if (!fun) {
QueryWorkingSetExProc = (FARPROC) -1;
errno = ENOSYS;
return -1;
}
QueryWorkingSetExProc = fun;
} else if ((FARPROC) QueryWorkingSetExProc == (FARPROC)-1) {
errno = ENOSYS;
return -1;
}
return 0;
}
static int
hwloc_win_get_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
SYSTEM_INFO SystemInfo;
DWORD page_size;
GetSystemInfo(&SystemInfo);
page_size = SystemInfo.dwPageSize;
uintptr_t start = (((uintptr_t) addr) / page_size) * page_size;
unsigned nb = (((uintptr_t) addr + len - start) + page_size - 1) / page_size;
if (!nb)
nb = 1;
{
PSAPI_WORKING_SET_EX_INFORMATION pv[nb];
unsigned i;
for (i = 0; i < nb; i++)
pv[i].VirtualAddress = (void*) (start + i * page_size);
if (!QueryWorkingSetExProc(GetCurrentProcess(), &pv, sizeof(pv)))
return -1;
*policy = HWLOC_MEMBIND_BIND;
if (flags & HWLOC_MEMBIND_STRICT) {
unsigned node = pv[0].VirtualAttributes.Node;
for (i = 1; i < nb; i++) {
if (pv[i].VirtualAttributes.Node != node) {
errno = EXDEV;
return -1;
}
}
hwloc_bitmap_only(nodeset, node);
return 0;
}
hwloc_bitmap_zero(nodeset);
for (i = 0; i < nb; i++)
hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node);
return 0;
}
}
void
hwloc_look_windows(struct hwloc_topology *topology)
{
BOOL WINAPI (*GetLogicalProcessorInformationProc)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength);
BOOL WINAPI (*GetLogicalProcessorInformationExProc)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
BOOL WINAPI (*GetNumaAvailableMemoryNodeProc)(UCHAR Node, PULONGLONG AvailableBytes);
BOOL WINAPI (*GetNumaAvailableMemoryNodeExProc)(USHORT Node, PULONGLONG AvailableBytes);
SYSTEM_INFO SystemInfo;
DWORD length;
HMODULE kernel32;
GetSystemInfo(&SystemInfo);
kernel32 = LoadLibrary("kernel32.dll");
if (kernel32) {
GetLogicalProcessorInformationProc = GetProcAddress(kernel32, "GetLogicalProcessorInformation");
GetNumaAvailableMemoryNodeProc = GetProcAddress(kernel32, "GetNumaAvailableMemoryNode");
GetNumaAvailableMemoryNodeExProc = GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx");
GetLogicalProcessorInformationExProc = GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo;
unsigned id;
unsigned i;
struct hwloc_obj *obj;
hwloc_obj_type_t type;
length = 0;
procInfo = NULL;
while (1) {
if (GetLogicalProcessorInformationProc(procInfo, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return;
procInfo = realloc(procInfo, length);
}
for (i = 0; i < length / sizeof(*procInfo); i++) {
/* Ignore non-data caches */
if (procInfo[i].Relationship == RelationCache
&& procInfo[i].Cache.Type != CacheUnified
&& procInfo[i].Cache.Type != CacheData)
continue;
id = -1;
switch (procInfo[i].Relationship) {
case RelationNumaNode:
type = HWLOC_OBJ_NODE;
id = procInfo[i].NumaNode.NodeNumber;
break;
case RelationProcessorPackage:
type = HWLOC_OBJ_SOCKET;
break;
case RelationCache:
type = HWLOC_OBJ_CACHE;
break;
case RelationProcessorCore:
type = HWLOC_OBJ_CORE;
break;
case RelationGroup:
default:
type = HWLOC_OBJ_GROUP;
break;
}
obj = hwloc_alloc_setup_object(type, id);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_debug("%s#%u mask %lx\n", hwloc_obj_type_string(type), id, procInfo[i].ProcessorMask);
hwloc_bitmap_from_ulong(obj->cpuset, procInfo[i].ProcessorMask);
switch (type) {
case HWLOC_OBJ_NODE:
{
ULONGLONG avail;
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, id);
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail)))
obj->memory.local_memory = avail;
obj->memory.page_types_len = 2;
obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types));
obj->memory.page_types_len = 1;
obj->memory.page_types[0].size = SystemInfo.dwPageSize;
#ifdef HAVE__SC_LARGE_PAGESIZE
obj->memory.page_types_len++;
obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
break;
}
case HWLOC_OBJ_CACHE:
obj->attr->cache.size = procInfo[i].Cache.Size;
obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
obj->attr->cache.depth = procInfo[i].Cache.Level;
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.depth = procInfo[i].Relationship == RelationGroup;
break;
default:
break;
}
hwloc_insert_object_by_cpuset(topology, obj);
}
free(procInfo);
}
if (GetLogicalProcessorInformationExProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, procInfo;
unsigned id;
struct hwloc_obj *obj;
hwloc_obj_type_t type;
length = 0;
procInfoTotal = NULL;
while (1) {
if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return;
procInfoTotal = realloc(procInfoTotal, length);
}
for (procInfo = procInfoTotal;
(void*) procInfo < (void*) ((unsigned long) procInfoTotal + length);
procInfo = (void*) ((unsigned long) procInfo + procInfo->Size)) {
unsigned num, i;
GROUP_AFFINITY *GroupMask;
/* Ignore non-data caches */
if (procInfo->Relationship == RelationCache
&& procInfo->Cache.Type != CacheUnified
&& procInfo->Cache.Type != CacheData)
continue;
id = -1;
switch (procInfo->Relationship) {
case RelationNumaNode:
type = HWLOC_OBJ_NODE;
num = 1;
GroupMask = &procInfo->NumaNode.GroupMask;
id = procInfo->NumaNode.NodeNumber;
break;
case RelationProcessorPackage:
type = HWLOC_OBJ_SOCKET;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationCache:
type = HWLOC_OBJ_CACHE;
num = 1;
GroupMask = &procInfo->Cache.GroupMask;
break;
case RelationProcessorCore:
type = HWLOC_OBJ_CORE;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationGroup:
/* So strange an interface... */
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
KAFFINITY mask;
obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, id);
obj->cpuset = hwloc_bitmap_alloc();
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
hwloc_debug("group %u %d cpus mask %lx\n", id,
procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask);
hwloc_bitmap_from_ith_ulong(obj->cpuset, id, mask);
hwloc_insert_object_by_cpuset(topology, obj);
}
continue;
default:
/* Don't know how to get the mask. */
hwloc_debug("unknown relation %d\n", procInfo->Relationship);
continue;
}
obj = hwloc_alloc_setup_object(type, id);
obj->cpuset = hwloc_bitmap_alloc();
for (i = 0; i < num; i++) {
hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_obj_type_string(type), id, i, GroupMask[i].Group, GroupMask[i].Mask);
hwloc_bitmap_from_ith_ulong(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask);
}
switch (type) {
case HWLOC_OBJ_NODE:
{
ULONGLONG avail;
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, id);
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail)))
obj->memory.local_memory = avail;
obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types));
memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types));
obj->memory.page_types_len = 1;
obj->memory.page_types[0].size = SystemInfo.dwPageSize;
#ifdef HAVE__SC_LARGE_PAGESIZE
obj->memory.page_types_len++;
obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
break;
}
case HWLOC_OBJ_CACHE:
obj->attr->cache.size = procInfo->Cache.CacheSize;
obj->attr->cache.linesize = procInfo->Cache.LineSize;
obj->attr->cache.depth = procInfo->Cache.Level;
break;
default:
break;
}
hwloc_insert_object_by_cpuset(topology, obj);
}
free(procInfoTotal);
}
}
/* add PU objects */
hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));
hwloc_add_object_info(topology->levels[0][0], "Backend", "Windows");
}
void
hwloc_set_windows_hooks(struct hwloc_topology *topology)
{
topology->set_proc_cpubind = hwloc_win_set_proc_cpubind;
topology->get_proc_cpubind = hwloc_win_get_proc_cpubind;
topology->set_thread_cpubind = hwloc_win_set_thread_cpubind;
topology->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind;
topology->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind;
topology->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind;
/* TODO: get_last_cpu_location: use GetCurrentProcessorNumber */
topology->set_proc_membind = hwloc_win_set_proc_membind;
topology->get_proc_membind = hwloc_win_get_proc_membind;
topology->set_thisproc_membind = hwloc_win_set_thisproc_membind;
topology->get_thisproc_membind = hwloc_win_get_thisproc_membind;
topology->set_thisthread_membind = hwloc_win_set_thisthread_membind;
if (!hwloc_win_get_VirtualAllocExNumaProc()) {
topology->alloc_membind = hwloc_win_alloc_membind;
topology->alloc = hwloc_win_alloc;
topology->free_membind = hwloc_win_free_membind;
topology->support.membind->bind_membind = 1;
}
if (!hwloc_win_get_QueryWorkingSetExProc())
topology->get_area_membind = hwloc_win_get_area_membind;
}
/*
* Copyright © 2010 INRIA. All rights reserved.
* Copyright © 2010-2011 Université Bordeaux 1
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*
*
* This backend is only used when the operating system does not export
* the necessary hardware topology information to user-space applications.
* Currently, only the FreeBSD backend relies on this x86 backend.
*
* Other backends such as Linux have their own way to retrieve various
* pieces of hardware topology information from the operating system
* on various architectures, without having to use this x86-specific code.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <private/cpuid.h>
#include <private/misc.h>
struct cacheinfo {
unsigned type;
unsigned level;
unsigned nbthreads_sharing;
unsigned linesize;
unsigned linepart;
unsigned ways;
unsigned sets;
unsigned size;
};
struct procinfo {
unsigned present;
unsigned apicid;
unsigned max_log_proc;
unsigned max_nbcores;
unsigned max_nbthreads;
unsigned socketid;
unsigned logprocid;
unsigned threadid;
unsigned coreid;
unsigned *otherids;
unsigned levels;
unsigned numcaches;
struct cacheinfo *cache;
};
enum cpuid_type {
intel,
amd,
unknown
};
static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpuid)
{
struct cacheinfo *cache;
unsigned cachenum;
unsigned size = 0;
if (level == 1)
size = ((cpuid >> 24)) << 10;
else if (level == 2)
size = ((cpuid >> 16)) << 10;
else if (level == 3)
size = ((cpuid >> 18)) << 19;
if (!size)
return;
cachenum = infos->numcaches++;
infos->cache = realloc(infos->cache, infos->numcaches*sizeof(*infos->cache));
cache = &infos->cache[cachenum];
cache->type = 1;
cache->level = level;
if (level <= 2)
cache->nbthreads_sharing = 1;
else
cache->nbthreads_sharing = infos->max_log_proc;
cache->linesize = cpuid & 0xff;
cache->linepart = 0;
if (level == 1)
cache->ways = (cpuid >> 16) & 0xff;
else {
static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0 };
unsigned ways = (cpuid >> 12) & 0xf;
cache->ways = ways_tab[ways];
}
cache->size = size;
cache->sets = 0;
hwloc_debug("cache L%u t%u linesize %u ways %u size %uKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
}
/* Fetch information from the processor itself thanks to cpuid and store it in
* infos for summarize to analyze them globally */
static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, enum cpuid_type cpuid_type)
{
unsigned eax, ebx, ecx = 0, edx;
unsigned cachenum;
struct cacheinfo *cache;
infos->present = 1;
eax = 0x01;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
infos->apicid = ebx >> 24;
if (edx & (1 << 28))
infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
else
infos->max_log_proc = 1;
hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc);
infos->socketid = infos->apicid / infos->max_log_proc;
infos->logprocid = infos->apicid % infos->max_log_proc;
infos->coreid = (unsigned) -1;
infos->threadid = (unsigned) -1;
hwloc_debug("phys %u thread %u\n", infos->socketid, infos->logprocid);
/* Intel doesn't actually provide 0x80000008 information */
if (cpuid_type != intel && highest_ext_cpuid >= 0x80000008) {
unsigned coreidsize;
eax = 0x80000008;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
coreidsize = (ecx >> 12) & 0xf;
hwloc_debug("core ID size: %u\n", coreidsize);
if (!coreidsize) {
infos->max_nbcores = (ecx & 0xff) + 1;
} else
infos->max_nbcores = 1 << coreidsize;
hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores);
/* Still no multithreaded AMD */
infos->max_nbthreads = 1 ;
hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads);
infos->threadid = infos->logprocid % infos->max_nbthreads;
infos->coreid = infos->logprocid / infos->max_nbthreads;
hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
}
infos->numcaches = 0;
infos->cache = NULL;
/* Intel doesn't actually provide 0x80000005 information */
if (cpuid_type != intel && highest_ext_cpuid >= 0x80000005) {
eax = 0x80000005;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
fill_amd_cache(infos, 1, ecx);
}
/* Intel doesn't actually provide 0x80000006 information */
if (cpuid_type != intel && highest_ext_cpuid >= 0x80000006) {
eax = 0x80000006;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
fill_amd_cache(infos, 2, ecx);
fill_amd_cache(infos, 3, edx);
}
/* AMD doesn't actually provide 0x04 information */
if (cpuid_type != amd && highest_cpuid >= 0x04) {
cachenum = 0;
for (cachenum = 0; ; cachenum++) {
unsigned type;
eax = 0x04;
ecx = cachenum;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
type = eax & 0x1f;
hwloc_debug("cache %u type %u\n", cachenum, type);
if (type == 0)
break;
if (type == 2)
/* Instruction cache */
continue;
infos->numcaches++;
}
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
for (cachenum = 0; ; cachenum++) {
unsigned linesize, linepart, ways, sets;
unsigned type;
eax = 0x04;
ecx = cachenum;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
type = eax & 0x1f;
if (type == 0)
break;
if (type == 2)
/* Instruction cache */
continue;
cache->type = type;
cache->level = (eax >> 5) & 0x7;
cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
infos->max_nbcores = ((eax >> 26) & 0x3f) + 1;
cache->linesize = linesize = (ebx & 0xfff) + 1;
cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
cache->ways = ways = ((ebx >> 22) & 0x3ff) + 1;
cache->sets = sets = ecx + 1;
cache->size = linesize * linepart * ways * sets;
hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
hwloc_debug("thus %u threads\n", infos->max_nbthreads);
infos->threadid = infos->logprocid % infos->max_nbthreads;
infos->coreid = infos->logprocid / infos->max_nbthreads;
hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
cache++;
}
}
if (cpuid_type == intel && highest_cpuid >= 0x0b) {
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
for (level = 0; ; level++) {
ecx = level;
eax = 0x0b;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
if (!eax && !ebx)
break;
}
if (level) {
infos->levels = level;
infos->otherids = malloc(level * sizeof(*infos->otherids));
for (level = 0; ; level++) {
ecx = level;
eax = 0x0b;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
if (!eax && !ebx)
break;
apic_nextshift = eax & 0x1f;
apic_number = ebx & 0xffff;
apic_type = (ecx & 0xff00) >> 8;
apic_id = edx;
id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
hwloc_debug("x2APIC %08x %d: nextshift %d num %2d type %d id %2d\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
infos->apicid = apic_id;
infos->otherids[level] = UINT_MAX;
switch (apic_type) {
case 1:
infos->threadid = id;
break;
case 2:
infos->coreid = id;
break;
default:
hwloc_debug("x2APIC %d: unknown type %d\n", level, apic_type);
infos->otherids[level] = apic_id >> apic_shift;
break;
}
apic_shift = apic_nextshift;
}
infos->socketid = apic_id >> apic_shift;
hwloc_debug("x2APIC remainder: %d\n", infos->socketid);
} else
infos->otherids = NULL;
} else
infos->otherids = NULL;
}
/* Analyse information stored in infos, and build topology levels accordingly */
static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigned nbprocs)
{
hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
unsigned i, j, l, level;
int one = -1;
for (i = 0; i < nbprocs; i++)
if (infos[i].present) {
hwloc_bitmap_set(complete_cpuset, i);
one = i;
}
if (one == -1)
return;
/* Look for sockets */
{
hwloc_bitmap_t sockets_cpuset = hwloc_bitmap_dup(complete_cpuset);
hwloc_bitmap_t socket_cpuset;
hwloc_obj_t sock;
while ((i = hwloc_bitmap_first(sockets_cpuset)) != (unsigned) -1) {
unsigned socketid = infos[i].socketid;
socket_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
if (infos[j].socketid == socketid) {
hwloc_bitmap_set(socket_cpuset, j);
hwloc_bitmap_clr(sockets_cpuset, j);
}
}
sock = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, socketid);
sock->cpuset = socket_cpuset;
hwloc_debug_1arg_bitmap("os socket %u has cpuset %s\n",
socketid, socket_cpuset);
hwloc_insert_object_by_cpuset(topology, sock);
}
hwloc_bitmap_free(sockets_cpuset);
}
/* Look for unknown objects */
if (infos[one].otherids) {
for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
if (infos[one].otherids[level] != UINT_MAX) {
hwloc_bitmap_t unknowns_cpuset = hwloc_bitmap_dup(complete_cpuset);
hwloc_bitmap_t unknown_cpuset;
hwloc_obj_t unknown;
while ((i = hwloc_bitmap_first(unknowns_cpuset)) != (unsigned) -1) {
unsigned unknownid = infos[i].otherids[level];
unknown_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
if (infos[j].otherids[level] == unknownid) {
hwloc_bitmap_set(unknown_cpuset, j);
hwloc_bitmap_clr(unknowns_cpuset, j);
}
}
unknown = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, unknownid);
unknown->cpuset = unknown_cpuset;
unknown->os_level = level;
hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n",
level, unknownid, unknown_cpuset);
hwloc_insert_object_by_cpuset(topology, unknown);
}
hwloc_bitmap_free(unknowns_cpuset);
}
}
}
/* Look for cores */
{
hwloc_bitmap_t cores_cpuset = hwloc_bitmap_dup(complete_cpuset);
hwloc_bitmap_t core_cpuset;
hwloc_obj_t core;
while ((i = hwloc_bitmap_first(cores_cpuset)) != (unsigned) -1) {
unsigned socketid = infos[i].socketid;
unsigned coreid = infos[i].coreid;
if (coreid == (unsigned) -1) {
hwloc_bitmap_clr(cores_cpuset, i);
continue;
}
core_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
if (infos[j].coreid == (unsigned) -1) {
hwloc_bitmap_clr(cores_cpuset, j);
continue;
}
if (infos[j].socketid == socketid && infos[j].coreid == coreid) {
hwloc_bitmap_set(core_cpuset, j);
hwloc_bitmap_clr(cores_cpuset, j);
}
}
core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid);
core->cpuset = core_cpuset;
hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
coreid, core_cpuset);
hwloc_insert_object_by_cpuset(topology, core);
}
hwloc_bitmap_free(cores_cpuset);
}
/* Look for caches */
/* First find max level */
level = 0;
for (i = 0; i < nbprocs; i++)
for (j = 0; j < infos[i].numcaches; j++)
if (infos[i].cache[j].level > level)
level = infos[i].cache[j].level;
while (level > 0) {
/* Look for caches at level level */
{
hwloc_bitmap_t caches_cpuset = hwloc_bitmap_dup(complete_cpuset);
hwloc_bitmap_t cache_cpuset;
hwloc_obj_t cache;
while ((i = hwloc_bitmap_first(caches_cpuset)) != (unsigned) -1) {
unsigned socketid = infos[i].socketid;
for (l = 0; l < infos[i].numcaches; l++) {
if (infos[i].cache[l].level == level)
break;
}
if (l == infos[i].numcaches) {
/* no cache Llevel in i, odd */
hwloc_bitmap_clr(caches_cpuset, i);
continue;
}
{
unsigned cacheid = infos[i].apicid / infos[i].cache[l].nbthreads_sharing;
cache_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
unsigned l2;
for (l2 = 0; l2 < infos[j].numcaches; l2++) {
if (infos[j].cache[l2].level == level)
break;
}
if (l2 == infos[j].numcaches) {
/* no cache Llevel in j, odd */
hwloc_bitmap_clr(caches_cpuset, j);
continue;
}
if (infos[j].socketid == socketid && infos[j].apicid / infos[j].cache[l2].nbthreads_sharing == cacheid) {
hwloc_bitmap_set(cache_cpuset, j);
hwloc_bitmap_clr(caches_cpuset, j);
}
}
cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid);
cache->attr->cache.depth = level;
cache->attr->cache.size = infos[i].cache[l].size;
cache->attr->cache.linesize = infos[i].cache[l].linesize;
cache->cpuset = cache_cpuset;
hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
level, cacheid, cache_cpuset);
hwloc_insert_object_by_cpuset(topology, cache);
}
}
hwloc_bitmap_free(caches_cpuset);
}
level--;
}
for (i = 0; i < nbprocs; i++) {
free(infos[i].cache);
if (infos[i].otherids)
free(infos[i].otherids);
}
}
#define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
#define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
#define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
#define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
#define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
#define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
void hwloc_look_x86(struct hwloc_topology *topology, unsigned nbprocs)
{
/* This function must always be here, but it's ok if it's empty. */
#if defined(HWLOC_HAVE_CPUID)
unsigned eax, ebx, ecx = 0, edx;
hwloc_bitmap_t orig_cpuset;
unsigned i;
unsigned highest_cpuid;
unsigned highest_ext_cpuid;
struct procinfo *infos = NULL;
enum cpuid_type cpuid_type = unknown;
if (!hwloc_have_cpuid())
return;
infos = malloc(sizeof(struct procinfo) * nbprocs);
if (NULL == infos) {
return;
}
eax = 0x00;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
highest_cpuid = eax;
if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
cpuid_type = intel;
if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
cpuid_type = amd;
hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
if (highest_cpuid < 0x01) {
goto free;
}
eax = 0x80000000;
hwloc_cpuid(&eax, &ebx, &ecx, &edx);
highest_ext_cpuid = eax;
hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
orig_cpuset = hwloc_bitmap_alloc();
if (topology->get_thisthread_cpubind && topology->set_thisthread_cpubind) {
if (!topology->get_thisthread_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
for (i = 0; i < nbprocs; i++) {
hwloc_bitmap_only(cpuset, i);
if (topology->set_thisthread_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT))
continue;
look_proc(&infos[i], highest_cpuid, highest_ext_cpuid, cpuid_type);
}
hwloc_bitmap_free(cpuset);
topology->set_thisthread_cpubind(topology, orig_cpuset, 0);
hwloc_bitmap_free(orig_cpuset);
summarize(topology, infos, nbprocs);
goto free;
}
}
if (topology->get_thisproc_cpubind && topology->set_thisproc_cpubind) {
if (!topology->get_thisproc_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
for (i = 0; i < nbprocs; i++) {
hwloc_bitmap_only(cpuset, i);
if (topology->set_thisproc_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT))
continue;
look_proc(&infos[i], highest_cpuid, highest_ext_cpuid, cpuid_type);
}
hwloc_bitmap_free(cpuset);
topology->set_thisproc_cpubind(topology, orig_cpuset, 0);
hwloc_bitmap_free(orig_cpuset);
summarize(topology, infos, nbprocs);
goto free;
}
}
#endif
hwloc_add_object_info(topology->levels[0][0], "Backend", "x86");
free:
if (NULL != infos) {
free(infos);
}
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#ifdef HWLOC_HAVE_XML
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <assert.h>
#include <strings.h>
int
hwloc_backend_xml_init(struct hwloc_topology *topology, const char *xmlpath, const char *xmlbuffer, int buflen)
{
xmlDoc *doc = NULL;
assert(topology->backend_type == HWLOC_BACKEND_NONE);
LIBXML_TEST_VERSION;
if (xmlpath)
doc = xmlReadFile(xmlpath, NULL, 0);
else if (xmlbuffer)
doc = xmlReadMemory(xmlbuffer, buflen, "", NULL, 0);
if (!doc)
return -1;
topology->backend_params.xml.doc = doc;
topology->is_thissystem = 0;
topology->backend_type = HWLOC_BACKEND_XML;
return 0;
}
void
hwloc_backend_xml_exit(struct hwloc_topology *topology)
{
assert(topology->backend_type == HWLOC_BACKEND_XML);
xmlFreeDoc((xmlDoc*)topology->backend_params.xml.doc);
topology->backend_type = HWLOC_BACKEND_NONE;
}
/******************************
********* XML import *********
******************************/
static void hwloc__xml_import_node(struct hwloc_topology *topology, struct hwloc_obj *parent, xmlNode *node, int depth);
static const xmlChar *
hwloc__xml_import_attr_value(xmlAttr *attr)
{
xmlNode *subnode;
/* use the first valid attribute content */
for (subnode = attr->children; subnode; subnode = subnode->next) {
if (subnode->type == XML_TEXT_NODE) {
if (subnode->content && subnode->content[0] != '\0' && subnode->content[0] != '\n')
return subnode->content;
} else {
fprintf(stderr, "ignoring unexpected xml attr node type %u\n", subnode->type);
}
}
return NULL;
}
static void
hwloc__xml_import_object_attr(struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj,
const xmlChar *_name, const xmlChar *_value)
{
const char *name = (const char *) _name;
const char *value = (const char *) _value;
if (!strcmp(name, "type")) {
/* already handled */
return;
}
else if (!strcmp(name, "os_level"))
obj->os_level = strtoul(value, NULL, 10);
else if (!strcmp(name, "os_index"))
obj->os_index = strtoul(value, NULL, 10);
else if (!strcmp(name, "cpuset")) {
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->cpuset, value);
} else if (!strcmp(name, "complete_cpuset")) {
obj->complete_cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->complete_cpuset,value);
} else if (!strcmp(name, "online_cpuset")) {
obj->online_cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->online_cpuset, value);
} else if (!strcmp(name, "allowed_cpuset")) {
obj->allowed_cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->allowed_cpuset, value);
} else if (!strcmp(name, "nodeset")) {
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->nodeset, value);
} else if (!strcmp(name, "complete_nodeset")) {
obj->complete_nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->complete_nodeset, value);
} else if (!strcmp(name, "allowed_nodeset")) {
obj->allowed_nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_sscanf(obj->allowed_nodeset, value);
} else if (!strcmp(name, "name"))
obj->name = strdup(value);
else if (!strcmp(name, "cache_size")) {
unsigned long long lvalue = strtoull(value, NULL, 10);
if (obj->type == HWLOC_OBJ_CACHE)
obj->attr->cache.size = lvalue;
else
fprintf(stderr, "ignoring cache_size attribute for non-cache object type\n");
}
else if (!strcmp(name, "cache_linesize")) {
unsigned long lvalue = strtoul(value, NULL, 10);
if (obj->type == HWLOC_OBJ_CACHE)
obj->attr->cache.linesize = lvalue;
else
fprintf(stderr, "ignoring cache_linesize attribute for non-cache object type\n");
}
else if (!strcmp(name, "local_memory"))
obj->memory.local_memory = strtoull(value, NULL, 10);
else if (!strcmp(name, "depth")) {
unsigned long lvalue = strtoul(value, NULL, 10);
switch (obj->type) {
case HWLOC_OBJ_CACHE:
obj->attr->cache.depth = lvalue;
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.depth = lvalue;
break;
default:
fprintf(stderr, "ignoring depth attribute for object type without depth\n");
break;
}
}
/*************************
* deprecated (from 1.0)
*/
else if (!strcmp(name, "dmi_board_vendor")) {
hwloc_add_object_info(obj, "DMIBoardVendor", strdup(value));
}
else if (!strcmp(name, "dmi_board_name")) {
hwloc_add_object_info(obj, "DMIBoardName", strdup(value));
}
/*************************
* deprecated (from 0.9)
*/
else if (!strcmp(name, "memory_kB")) {
unsigned long long lvalue = strtoull(value, NULL, 10);
switch (obj->type) {
case HWLOC_OBJ_CACHE:
obj->attr->cache.size = lvalue << 10;
break;
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_SYSTEM:
obj->memory.local_memory = lvalue << 10;
break;
default:
fprintf(stderr, "ignoring memory_kB attribute for object type without memory\n");
break;
}
}
else if (!strcmp(name, "huge_page_size_kB")) {
unsigned long lvalue = strtoul(value, NULL, 10);
switch (obj->type) {
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_SYSTEM:
if (!obj->memory.page_types) {
obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
obj->memory.page_types_len = 1;
}
obj->memory.page_types[0].size = lvalue << 10;
break;
default:
fprintf(stderr, "ignoring huge_page_size_kB attribute for object type without huge pages\n");
break;
}
}
else if (!strcmp(name, "huge_page_free")) {
unsigned long lvalue = strtoul(value, NULL, 10);
switch (obj->type) {
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_SYSTEM:
if (!obj->memory.page_types) {
obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
obj->memory.page_types_len = 1;
}
obj->memory.page_types[0].count = lvalue;
break;
default:
fprintf(stderr, "ignoring huge_page_free attribute for object type without huge pages\n");
break;
}
}
/*
* end of deprecated (from 0.9)
*******************************/
else
fprintf(stderr, "ignoring unknown object attribute %s\n", name);
}
static void
hwloc__xml_import_object_node(struct hwloc_topology *topology, struct hwloc_obj *parent, struct hwloc_obj *obj, xmlNode *node, int depth)
{
xmlAttr *attr = NULL;
/* first determine the object type */
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE && !strcmp((const char*) attr->name, "type")) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (!value) {
fprintf(stderr, "ignoring xml object without type attr %s\n", (const char*) value);
return;
}
obj->type = hwloc_obj_type_of_string((const char*) value);
if (obj->type == (hwloc_obj_type_t)-1) {
fprintf(stderr, "ignoring unknown object type %s\n", (const char*) value);
return;
}
break;
} else {
fprintf(stderr, "ignoring unexpected xml attr type %u\n", attr->type);
}
}
if (obj->type == HWLOC_OBJ_TYPE_MAX) {
fprintf(stderr, "ignoring object without type\n");
return;
}
/* process attributes now that the type is known */
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value)
hwloc__xml_import_object_attr(topology, obj, attr->name, value);
} else {
fprintf(stderr, "ignoring unexpected xml object attr type %u\n", attr->type);
}
}
if (depth > 0) { /* root object is already in place */
/* add object */
hwloc_insert_object_by_parent(topology, parent, obj);
}
/* process children */
if (node->children)
hwloc__xml_import_node(topology, obj, node->children, depth+1);
}
static void
hwloc__xml_import_pagetype_node(struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj, xmlNode *node)
{
uint64_t size = 0, count = 0;
xmlAttr *attr = NULL;
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value) {
if (!strcmp((char *) attr->name, "size"))
size = strtoul((char *) value, NULL, 10);
else if (!strcmp((char *) attr->name, "count"))
count = strtoul((char *) value, NULL, 10);
else
fprintf(stderr, "ignoring unknown pagetype attribute %s\n", (char *) attr->name);
}
} else {
fprintf(stderr, "ignoring unexpected xml pagetype attr type %u\n", attr->type);
}
}
if (size) {
int idx = obj->memory.page_types_len;
obj->memory.page_types = realloc(obj->memory.page_types, (idx+1)*sizeof(*obj->memory.page_types));
obj->memory.page_types_len = idx+1;
obj->memory.page_types[idx].size = size;
obj->memory.page_types[idx].count = count;
} else
fprintf(stderr, "ignoring pagetype attribute without size\n");
}
static void
hwloc__xml_import_distances_node(struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj, xmlNode *node)
{
unsigned long reldepth = 0, nbobjs = 0;
float latbase = 0;
xmlAttr *attr = NULL;
xmlNode *subnode;
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value) {
if (!strcmp((char *) attr->name, "nbobjs"))
nbobjs = strtoul((char *) value, NULL, 10);
else if (!strcmp((char *) attr->name, "relative_depth"))
reldepth = strtoul((char *) value, NULL, 10);
else if (!strcmp((char *) attr->name, "latency_base"))
latbase = (float) atof((char *) value);
else
fprintf(stderr, "ignoring unknown distances attribute %s\n", (char *) attr->name);
} else
fprintf(stderr, "ignoring unexpected xml distances attr name `%s' with no value\n", (const char*) attr->name);
} else {
fprintf(stderr, "ignoring unexpected xml distances attr type %u\n", attr->type);
}
}
if (nbobjs && reldepth && latbase) {
int idx = obj->distances_count;
unsigned nbcells, i;
float *matrix, latmax = 0;
nbcells = 0;
if (node->children)
for(subnode = node->children; subnode; subnode = subnode->next)
if (subnode->type == XML_ELEMENT_NODE)
nbcells++;
if (nbcells != nbobjs*nbobjs) {
fprintf(stderr, "ignoring distances with %u cells instead of %lu\n", nbcells, nbobjs*nbobjs);
return;
}
obj->distances = realloc(obj->distances, (idx+1)*sizeof(*obj->distances));
obj->distances_count = idx+1;
obj->distances[idx] = malloc(sizeof(**obj->distances));
obj->distances[idx]->relative_depth = reldepth;
obj->distances[idx]->nbobjs = nbobjs;
obj->distances[idx]->latency = matrix = malloc(nbcells*sizeof(float));
obj->distances[idx]->latency_base = latbase;
i = 0;
for(subnode = node->children; subnode; subnode = subnode->next)
if (subnode->type == XML_ELEMENT_NODE) {
/* read one cell */
for (attr = subnode->properties; attr; attr = attr->next)
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value) {
if (!strcmp((char *) attr->name, "value")) {
float val = (float) atof((char *) value);
matrix[i] = val;
if (val > latmax)
latmax = val;
} else
fprintf(stderr, "ignoring unknown distance attribute %s\n", (char *) attr->name);
} else
fprintf(stderr, "ignoring unexpected xml distance attr name `%s' with no value\n", (const char*) attr->name);
} else {
fprintf(stderr, "ignoring unexpected xml distance attr type %u\n", attr->type);
}
/* next matrix cell */
i++;
}
obj->distances[idx]->latency_max = latmax;
}
}
static void
hwloc__xml_import_info_node(struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj, xmlNode *node)
{
char *infoname = NULL;
char *infovalue = NULL;
xmlAttr *attr = NULL;
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value) {
if (!strcmp((char *) attr->name, "name"))
infoname = (char *) value;
else if (!strcmp((char *) attr->name, "value"))
infovalue = (char *) value;
else
fprintf(stderr, "ignoring unknown info attribute %s\n", (char *) attr->name);
}
} else {
fprintf(stderr, "ignoring unexpected xml info attr type %u\n", attr->type);
}
}
if (infoname)
/* empty strings are ignored by libxml */
hwloc_add_object_info(obj, infoname, infovalue ? infovalue : "");
else
fprintf(stderr, "ignoring info attribute without name\n");
}
static void
hwloc__xml_import_node(struct hwloc_topology *topology, struct hwloc_obj *parent, xmlNode *node, int depth)
{
for (; node; node = node->next) {
if (node->type == XML_ELEMENT_NODE) {
if (!strcmp((const char*) node->name, "object")) {
/* object attributes */
struct hwloc_obj *obj;
if (depth)
obj = hwloc_alloc_setup_object(HWLOC_OBJ_TYPE_MAX, -1);
else
obj = topology->levels[0][0];
hwloc__xml_import_object_node(topology, parent, obj, node, depth);
} else if (!strcmp((const char*) node->name, "page_type")) {
hwloc__xml_import_pagetype_node(topology, parent, node);
} else if (!strcmp((const char*) node->name, "info")) {
hwloc__xml_import_info_node(topology, parent, node);
} else if (!strcmp((const char*) node->name, "distances")) {
hwloc__xml_import_distances_node(topology, parent, node);
} else {
/* unknown class */
fprintf(stderr, "ignoring unexpected node class `%s'\n", (const char*) node->name);
continue;
}
} else if (node->type == XML_TEXT_NODE) {
if (node->content && node->content[0] != '\0' && node->content[0] != '\n')
fprintf(stderr, "ignoring object text content %s\n", (const char*) node->content);
} else {
fprintf(stderr, "ignoring unexpected xml node type %u\n", node->type);
}
}
}
static void
hwloc__xml_import_topology_node(struct hwloc_topology *topology, xmlNode *node)
{
xmlAttr *attr = NULL;
if (strcmp((const char *) node->name, "topology") && strcmp((const char *) node->name, "root")) {
/* root node should be in "topology" class (or "root" if importing from < 1.0) */
fprintf(stderr, "ignoring object of class `%s' not at the top the xml hierarchy\n", (const char *) node->name);
return;
}
/* process attributes */
for (attr = node->properties; attr; attr = attr->next) {
if (attr->type == XML_ATTRIBUTE_NODE) {
const xmlChar *value = hwloc__xml_import_attr_value(attr);
if (value) {
fprintf(stderr, "ignoring unknown root attribute %s\n", (char *) attr->name);
}
} else {
fprintf(stderr, "ignoring unexpected xml root attr type %u\n", attr->type);
}
}
/* process children */
if (node->children)
hwloc__xml_import_node(topology, NULL, node->children, 0);
}
void
hwloc_look_xml(struct hwloc_topology *topology)
{
xmlNode* root_node;
xmlDtd *dtd;
topology->support.discovery->pu = 1;
dtd = xmlGetIntSubset((xmlDoc*) topology->backend_params.xml.doc);
if (!dtd)
fprintf(stderr, "Loading XML topology without DTD\n");
else if (strcmp((char *) dtd->SystemID, "hwloc.dtd"))
fprintf(stderr, "Loading XML topology with wrong DTD SystemID (%s instead of %s)\n",
(char *) dtd->SystemID, "hwloc.dtd");
root_node = xmlDocGetRootElement((xmlDoc*) topology->backend_params.xml.doc);
hwloc__xml_import_topology_node(topology, root_node);
if (root_node->next)
fprintf(stderr, "ignoring non-first root nodes\n");
/* keep the "Backend" information intact */
/* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */
}
static void
hwloc_xml__check_distances(struct hwloc_topology *topology, hwloc_obj_t obj)
{
hwloc_obj_t child;
unsigned i=0;
while (i<obj->distances_count) {
unsigned depth = obj->depth + obj->distances[i]->relative_depth;
unsigned nbobjs = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->cpuset, depth);
if (nbobjs != obj->distances[i]->nbobjs) {
fprintf(stderr, "ignoring invalid distance matrix with %u objs instead of %u\n",
obj->distances[i]->nbobjs, nbobjs);
hwloc_free_logical_distances(obj->distances[i]);
memmove(&obj->distances[i], &obj->distances[i+1], (obj->distances_count-i-1)*sizeof(*obj->distances));
obj->distances_count--;
} else
i++;
}
child = obj->first_child;
while (child != NULL) {
hwloc_xml__check_distances(topology, child);
child = child->next_sibling;
}
}
void
hwloc_xml_check_distances(struct hwloc_topology *topology)
{
/* now that the topology tree has been properly setup,
* check that our distance matrice sizes make sense */
hwloc_xml__check_distances(topology, topology->levels[0][0]);
}
/******************************
********* XML export *********
******************************/
static void
hwloc__xml_export_object (hwloc_topology_t topology, hwloc_obj_t obj, xmlNodePtr root_node)
{
xmlNodePtr node = NULL, ptnode = NULL, dnode = NULL, dcnode = NULL;
char *cpuset = NULL;
char tmp[255];
unsigned i;
/* xmlNewChild() creates a new node, which is "attached" as child node
* of root_node node. */
node = xmlNewChild(root_node, NULL, BAD_CAST "object", NULL);
xmlNewProp(node, BAD_CAST "type", BAD_CAST hwloc_obj_type_string(obj->type));
sprintf(tmp, "%d", obj->os_level);
xmlNewProp(node, BAD_CAST "os_level", BAD_CAST tmp);
if (obj->os_index != (unsigned) -1) {
sprintf(tmp, "%u", obj->os_index);
xmlNewProp(node, BAD_CAST "os_index", BAD_CAST tmp);
}
if (obj->cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->cpuset);
xmlNewProp(node, BAD_CAST "cpuset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->complete_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->complete_cpuset);
xmlNewProp(node, BAD_CAST "complete_cpuset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->online_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->online_cpuset);
xmlNewProp(node, BAD_CAST "online_cpuset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->allowed_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->allowed_cpuset);
xmlNewProp(node, BAD_CAST "allowed_cpuset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->nodeset && !hwloc_bitmap_isfull(obj->nodeset)) {
hwloc_bitmap_asprintf(&cpuset, obj->nodeset);
xmlNewProp(node, BAD_CAST "nodeset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->complete_nodeset && !hwloc_bitmap_isfull(obj->complete_nodeset)) {
hwloc_bitmap_asprintf(&cpuset, obj->complete_nodeset);
xmlNewProp(node, BAD_CAST "complete_nodeset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->allowed_nodeset && !hwloc_bitmap_isfull(obj->allowed_nodeset)) {
hwloc_bitmap_asprintf(&cpuset, obj->allowed_nodeset);
xmlNewProp(node, BAD_CAST "allowed_nodeset", BAD_CAST cpuset);
free(cpuset);
}
if (obj->name)
xmlNewProp(node, BAD_CAST "name", BAD_CAST obj->name);
switch (obj->type) {
case HWLOC_OBJ_CACHE:
sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size);
xmlNewProp(node, BAD_CAST "cache_size", BAD_CAST tmp);
sprintf(tmp, "%u", obj->attr->cache.depth);
xmlNewProp(node, BAD_CAST "depth", BAD_CAST tmp);
sprintf(tmp, "%u", (unsigned) obj->attr->cache.linesize);
xmlNewProp(node, BAD_CAST "cache_linesize", BAD_CAST tmp);
break;
case HWLOC_OBJ_GROUP:
sprintf(tmp, "%u", obj->attr->group.depth);
xmlNewProp(node, BAD_CAST "depth", BAD_CAST tmp);
break;
default:
break;
}
if (obj->memory.local_memory) {
sprintf(tmp, "%llu", (unsigned long long) obj->memory.local_memory);
xmlNewProp(node, BAD_CAST "local_memory", BAD_CAST tmp);
}
for(i=0; i<obj->memory.page_types_len; i++) {
ptnode = xmlNewChild(node, NULL, BAD_CAST "page_type", NULL);
sprintf(tmp, "%llu", (unsigned long long) obj->memory.page_types[i].size);
xmlNewProp(ptnode, BAD_CAST "size", BAD_CAST tmp);
sprintf(tmp, "%llu", (unsigned long long) obj->memory.page_types[i].count);
xmlNewProp(ptnode, BAD_CAST "count", BAD_CAST tmp);
}
for(i=0; i<obj->infos_count; i++) {
ptnode = xmlNewChild(node, NULL, BAD_CAST "info", NULL);
xmlNewProp(ptnode, BAD_CAST "name", BAD_CAST obj->infos[i].name);
xmlNewProp(ptnode, BAD_CAST "value", BAD_CAST obj->infos[i].value);
}
for(i=0; i<obj->distances_count; i++) {
unsigned nbobjs = obj->distances[i]->nbobjs;
unsigned j;
dnode = xmlNewChild(node, NULL, BAD_CAST "distances", NULL);
sprintf(tmp, "%u", nbobjs);
xmlNewProp(dnode, BAD_CAST "nbobjs", BAD_CAST tmp);
sprintf(tmp, "%u", obj->distances[i]->relative_depth);
xmlNewProp(dnode, BAD_CAST "relative_depth", BAD_CAST tmp);
sprintf(tmp, "%f", obj->distances[i]->latency_base);
xmlNewProp(dnode, BAD_CAST "latency_base", BAD_CAST tmp);
for(j=0; j<nbobjs*nbobjs; j++) {
dcnode = xmlNewChild(dnode, NULL, BAD_CAST "latency", NULL);
sprintf(tmp, "%f", obj->distances[i]->latency[j]);
xmlNewProp(dcnode, BAD_CAST "value", BAD_CAST tmp);
}
}
if (obj->arity) {
unsigned x;
for (x=0; x<obj->arity; x++)
hwloc__xml_export_object (topology, obj->children[x], node);
}
}
static void
hwloc__xml_export_topology_info (hwloc_topology_t topology __hwloc_attribute_unused, xmlNodePtr root_node __hwloc_attribute_unused)
{
}
static xmlDocPtr
hwloc__topology_prepare_export(hwloc_topology_t topology)
{
xmlDocPtr doc = NULL; /* document pointer */
xmlNodePtr root_node = NULL; /* root pointer */
xmlDtdPtr dtd = NULL; /* DTD pointer */
LIBXML_TEST_VERSION;
/* Creates a new document, a node and set it as a root node. */
doc = xmlNewDoc(BAD_CAST "1.0");
root_node = xmlNewNode(NULL, BAD_CAST "topology");
xmlDocSetRootElement(doc, root_node);
/* Creates a DTD declaration. Isn't mandatory. */
dtd = xmlCreateIntSubset(doc, BAD_CAST "topology", NULL, BAD_CAST "hwloc.dtd");
hwloc__xml_export_object (topology, hwloc_get_root_obj(topology), root_node);
hwloc__xml_export_topology_info (topology, root_node);
return doc;
}
void hwloc_topology_export_xml(hwloc_topology_t topology, const char *filename)
{
xmlDocPtr doc = hwloc__topology_prepare_export(topology);
xmlSaveFormatFileEnc(filename, doc, "UTF-8", 1);
xmlFreeDoc(doc);
}
void hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen)
{
xmlDocPtr doc = hwloc__topology_prepare_export(topology);
xmlDocDumpFormatMemoryEnc(doc, (xmlChar **)xmlbuffer, buflen, "UTF-8", 1);
xmlFreeDoc(doc);
}
#endif /* HWLOC_HAVE_XML */
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#define _ATFILE_SOURCE
#include <assert.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <limits.h>
#include <float.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#ifdef HAVE_MACH_MACH_INIT_H
#include <mach/mach_init.h>
#endif
#ifdef HAVE_MACH_MACH_HOST_H
#include <mach/mach_host.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_SYS_SYSCTL_H
#include <sys/sysctl.h>
#endif
#ifdef HWLOC_WIN_SYS
#include <windows.h>
#endif
unsigned hwloc_get_api_version(void)
{
return HWLOC_API_VERSION;
}
void hwloc_report_os_error(const char *msg, int line)
{
static int reported = 0;
if (!reported) {
fprintf(stderr, "****************************************************************************\n");
fprintf(stderr, "* Hwloc has encountered what looks like an error from the operating system.\n");
fprintf(stderr, "*\n");
fprintf(stderr, "* %s\n", msg);
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
fprintf(stderr, "*\n");
fprintf(stderr, "* Please report this error message to the hwloc user's mailing list,\n");
fprintf(stderr, "* along with the output from the hwloc-gather-topology.sh script.\n");
fprintf(stderr, "****************************************************************************\n");
reported = 1;
}
}
static void
hwloc_topology_clear (struct hwloc_topology *topology);
#if defined(HAVE_SYSCTLBYNAME)
int hwloc_get_sysctlbyname(const char *name, int64_t *ret)
{
union {
int32_t i32;
int64_t i64;
} n;
size_t size = sizeof(n);
if (sysctlbyname(name, &n, &size, NULL, 0))
return -1;
switch (size) {
case sizeof(n.i32):
*ret = n.i32;
break;
case sizeof(n.i64):
*ret = n.i64;
break;
default:
return -1;
}
return 0;
}
#endif
#if defined(HAVE_SYSCTL)
int hwloc_get_sysctl(int name[], unsigned namelen, int *ret)
{
int n;
size_t size = sizeof(n);
if (sysctl(name, namelen, &n, &size, NULL, 0))
return -1;
if (size != sizeof(n))
return -1;
*ret = n;
return 0;
}
#endif
/* Return the OS-provided number of processors. Unlike other methods such as
reading sysfs on Linux, this method is not virtualizable; thus it's only
used as a fall-back method, allowing `hwloc_set_fsroot ()' to
have the desired effect. */
unsigned
hwloc_fallback_nbprocessors(struct hwloc_topology *topology) {
int n;
#if HAVE_DECL__SC_NPROCESSORS_ONLN
n = sysconf(_SC_NPROCESSORS_ONLN);
#elif HAVE_DECL__SC_NPROC_ONLN
n = sysconf(_SC_NPROC_ONLN);
#elif HAVE_DECL__SC_NPROCESSORS_CONF
n = sysconf(_SC_NPROCESSORS_CONF);
#elif HAVE_DECL__SC_NPROC_CONF
n = sysconf(_SC_NPROC_CONF);
#elif defined(HAVE_HOST_INFO) && HAVE_HOST_INFO
struct host_basic_info info;
mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
host_info(mach_host_self(), HOST_BASIC_INFO, (integer_t*) &info, &count);
n = info.avail_cpus;
#elif defined(HAVE_SYSCTLBYNAME)
int64_t n;
if (hwloc_get_sysctlbyname("hw.ncpu", &n))
n = -1;
#elif defined(HAVE_SYSCTL) && HAVE_DECL_CTL_HW && HAVE_DECL_HW_NCPU
static int name[2] = {CTL_HW, HW_NPCU};
if (hwloc_get_sysctl(name, sizeof(name)/sizeof(*name)), &n)
n = -1;
#elif defined(HWLOC_WIN_SYS)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
n = sysinfo.dwNumberOfProcessors;
#else
#ifdef __GNUC__
#warning No known way to discover number of available processors on this system
#warning hwloc_fallback_nbprocessors will default to 1
#endif
n = -1;
#endif
if (n >= 1)
topology->support.discovery->pu = 1;
else
n = 1;
return n;
}
/*
* Use the given number of processors and the optional online cpuset if given
* to set a PU level.
*/
void
hwloc_setup_pu_level(struct hwloc_topology *topology,
unsigned nb_pus)
{
struct hwloc_obj *obj;
unsigned oscpu,cpu;
hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
for (cpu=0,oscpu=0; cpu<nb_pus; oscpu++)
{
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, oscpu);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_only(obj->cpuset, oscpu);
hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n",
cpu, oscpu, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
cpu++;
}
}
static void
print_object(struct hwloc_topology *topology, int indent __hwloc_attribute_unused, hwloc_obj_t obj)
{
char line[256], *cpuset = NULL;
hwloc_debug("%*s", 2*indent, "");
hwloc_obj_snprintf(line, sizeof(line), topology, obj, "#", 1);
hwloc_debug("%s", line);
if (obj->cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->cpuset);
hwloc_debug(" cpuset %s", cpuset);
free(cpuset);
}
if (obj->complete_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->complete_cpuset);
hwloc_debug(" complete %s", cpuset);
free(cpuset);
}
if (obj->online_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->online_cpuset);
hwloc_debug(" online %s", cpuset);
free(cpuset);
}
if (obj->allowed_cpuset) {
hwloc_bitmap_asprintf(&cpuset, obj->allowed_cpuset);
hwloc_debug(" allowed %s", cpuset);
free(cpuset);
}
if (obj->nodeset) {
hwloc_bitmap_asprintf(&cpuset, obj->nodeset);
hwloc_debug(" nodeset %s", cpuset);
free(cpuset);
}
if (obj->complete_nodeset) {
hwloc_bitmap_asprintf(&cpuset, obj->complete_nodeset);
hwloc_debug(" completeN %s", cpuset);
free(cpuset);
}
if (obj->allowed_nodeset) {
hwloc_bitmap_asprintf(&cpuset, obj->allowed_nodeset);
hwloc_debug(" allowedN %s", cpuset);
free(cpuset);
}
if (obj->arity)
hwloc_debug(" arity %u", obj->arity);
hwloc_debug("%s", "\n");
}
/* Just for debugging. */
static void
print_objects(struct hwloc_topology *topology __hwloc_attribute_unused, int indent __hwloc_attribute_unused, hwloc_obj_t obj __hwloc_attribute_unused)
{
#ifdef HWLOC_DEBUG
print_object(topology, indent, obj);
for (obj = obj->first_child; obj; obj = obj->next_sibling)
print_objects(topology, indent + 1, obj);
#endif
}
void
hwloc_add_object_info(hwloc_obj_t obj, const char *name, const char *value)
{
#define OBJECT_INFO_ALLOC 8
/* nothing allocated initially, (re-)allocate by multiple of 8 */
unsigned alloccount = (obj->infos_count + 1 + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1);
if (obj->infos_count != alloccount)
obj->infos = realloc(obj->infos, alloccount*sizeof(*obj->infos));
obj->infos[obj->infos_count].name = strdup(name);
obj->infos[obj->infos_count].value = strdup(value);
obj->infos_count++;
}
static void
hwloc_clear_object_distances(hwloc_obj_t obj)
{
unsigned i;
for (i=0; i<obj->distances_count; i++)
hwloc_free_logical_distances(obj->distances[i]);
free(obj->distances);
obj->distances = NULL;
obj->distances_count = 0;
}
/* Free an object and all its content. */
void
hwloc_free_unlinked_object(hwloc_obj_t obj)
{
unsigned i;
switch (obj->type) {
default:
break;
}
for(i=0; i<obj->infos_count; i++) {
free(obj->infos[i].name);
free(obj->infos[i].value);
}
free(obj->infos);
hwloc_clear_object_distances(obj);
free(obj->memory.page_types);
free(obj->attr);
free(obj->children);
free(obj->name);
hwloc_bitmap_free(obj->cpuset);
hwloc_bitmap_free(obj->complete_cpuset);
hwloc_bitmap_free(obj->online_cpuset);
hwloc_bitmap_free(obj->allowed_cpuset);
hwloc_bitmap_free(obj->nodeset);
hwloc_bitmap_free(obj->complete_nodeset);
hwloc_bitmap_free(obj->allowed_nodeset);
free(obj);
}
/*
* How to compare objects based on types.
*
* Note that HIGHER/LOWER is only a (consistent) heuristic, used to sort
* objects with same cpuset consistently.
* Only EQUAL / not EQUAL can be relied upon.
*/
enum hwloc_type_cmp_e {
HWLOC_TYPE_HIGHER,
HWLOC_TYPE_DEEPER,
HWLOC_TYPE_EQUAL
};
/* WARNING: The indexes of this array MUST match the ordering that of
the obj_order_type[] array, below. Specifically, the values must
be laid out such that:
obj_order_type[obj_type_order[N]] = N
for all HWLOC_OBJ_* values of N. Put differently:
obj_type_order[A] = B
where the A values are in order of the hwloc_obj_type_t enum, and
the B values are the corresponding indexes of obj_order_type.
We can't use C99 syntax to initialize this in a little safer manner
-- bummer. :-(
*************************************************************
*** DO NOT CHANGE THE ORDERING OF THIS ARRAY WITHOUT TRIPLE
*** CHECKING ITS CORRECTNESS!
*************************************************************
*/
static unsigned obj_type_order[] = {
/* first entry is HWLOC_OBJ_SYSTEM */ 0,
/* next entry is HWLOC_OBJ_MACHINE */ 1,
/* next entry is HWLOC_OBJ_NODE */ 3,
/* next entry is HWLOC_OBJ_SOCKET */ 4,
/* next entry is HWLOC_OBJ_CACHE */ 5,
/* next entry is HWLOC_OBJ_CORE */ 6,
/* next entry is HWLOC_OBJ_PU */ 7,
/* next entry is HWLOC_OBJ_GROUP */ 2,
/* next entry is HWLOC_OBJ_MISC */ 8,
};
static const hwloc_obj_type_t obj_order_type[] = {
HWLOC_OBJ_SYSTEM,
HWLOC_OBJ_MACHINE,
HWLOC_OBJ_GROUP,
HWLOC_OBJ_NODE,
HWLOC_OBJ_SOCKET,
HWLOC_OBJ_CACHE,
HWLOC_OBJ_CORE,
HWLOC_OBJ_PU,
HWLOC_OBJ_MISC,
};
static unsigned __hwloc_attribute_const
hwloc_get_type_order(hwloc_obj_type_t type)
{
return obj_type_order[type];
}
#if !defined(NDEBUG)
static hwloc_obj_type_t hwloc_get_order_type(int order)
{
return obj_order_type[order];
}
#endif
int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2)
{
unsigned order1 = hwloc_get_type_order(type1);
unsigned order2 = hwloc_get_type_order(type2);
return order1 - order2;
}
static enum hwloc_type_cmp_e
hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2)
{
if (hwloc_compare_types(obj1->type, obj2->type) > 0)
return HWLOC_TYPE_DEEPER;
if (hwloc_compare_types(obj1->type, obj2->type) < 0)
return HWLOC_TYPE_HIGHER;
/* Caches have the same types but can have different depths. */
if (obj1->type == HWLOC_OBJ_CACHE) {
if (obj1->attr->cache.depth < obj2->attr->cache.depth)
return HWLOC_TYPE_DEEPER;
else if (obj1->attr->cache.depth > obj2->attr->cache.depth)
return HWLOC_TYPE_HIGHER;
}
/* Group objects have the same types but can have different depths. */
if (obj1->type == HWLOC_OBJ_GROUP) {
if (obj1->attr->group.depth < obj2->attr->group.depth)
return HWLOC_TYPE_DEEPER;
else if (obj1->attr->group.depth > obj2->attr->group.depth)
return HWLOC_TYPE_HIGHER;
}
return HWLOC_TYPE_EQUAL;
}
/*
* How to compare objects based on cpusets.
*/
enum hwloc_obj_cmp_e {
HWLOC_OBJ_EQUAL, /**< \brief Equal */
HWLOC_OBJ_INCLUDED, /**< \brief Strictly included into */
HWLOC_OBJ_CONTAINS, /**< \brief Strictly contains */
HWLOC_OBJ_INTERSECTS, /**< \brief Intersects, but no inclusion! */
HWLOC_OBJ_DIFFERENT /**< \brief No intersection */
};
static int
hwloc_obj_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2)
{
if (!obj1->cpuset || hwloc_bitmap_iszero(obj1->cpuset)
|| !obj2->cpuset || hwloc_bitmap_iszero(obj2->cpuset))
return HWLOC_OBJ_DIFFERENT;
if (hwloc_bitmap_isequal(obj1->cpuset, obj2->cpuset)) {
/* Same cpuset, subsort by type to have a consistent ordering. */
switch (hwloc_type_cmp(obj1, obj2)) {
case HWLOC_TYPE_DEEPER:
return HWLOC_OBJ_INCLUDED;
case HWLOC_TYPE_HIGHER:
return HWLOC_OBJ_CONTAINS;
case HWLOC_TYPE_EQUAL:
if (obj1->type == HWLOC_OBJ_MISC) {
/* Misc objects may vary by name */
int res = strcmp(obj1->name, obj2->name);
if (res < 0)
return HWLOC_OBJ_INCLUDED;
if (res > 0)
return HWLOC_OBJ_CONTAINS;
if (res == 0)
return HWLOC_OBJ_EQUAL;
}
/* Same level cpuset and type! Let's hope it's coherent. */
return HWLOC_OBJ_EQUAL;
}
/* For dumb compilers */
abort();
} else {
/* Different cpusets, sort by inclusion. */
if (hwloc_bitmap_isincluded(obj1->cpuset, obj2->cpuset))
return HWLOC_OBJ_INCLUDED;
if (hwloc_bitmap_isincluded(obj2->cpuset, obj1->cpuset))
return HWLOC_OBJ_CONTAINS;
if (hwloc_bitmap_intersects(obj1->cpuset, obj2->cpuset))
return HWLOC_OBJ_INTERSECTS;
return HWLOC_OBJ_DIFFERENT;
}
}
/*
* How to insert objects into the topology.
*
* Note: during detection, only the first_child and next_sibling pointers are
* kept up to date. Others are computed only once topology detection is
* complete.
*/
#define merge_index(new, old, field, type) \
if ((old)->field == (type) -1) \
(old)->field = (new)->field;
#define merge_sizes(new, old, field) \
if (!(old)->field) \
(old)->field = (new)->field;
#ifdef HWLOC_DEBUG
#define check_sizes(new, old, field) \
if ((new)->field) \
assert((old)->field == (new)->field)
#else
#define check_sizes(new, old, field)
#endif
/* Try to insert OBJ in CUR, recurse if needed */
static int
hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj,
hwloc_report_error_t report_error)
{
hwloc_obj_t child, container, *cur_children, *obj_children, next_child = NULL;
int put;
/* Make sure we haven't gone too deep. */
if (!hwloc_bitmap_isincluded(obj->cpuset, cur->cpuset)) {
fprintf(stderr,"recursion has gone too deep?!\n");
return -1;
}
/* Check whether OBJ is included in some child. */
container = NULL;
for (child = cur->first_child; child; child = child->next_sibling) {
switch (hwloc_obj_cmp(obj, child)) {
case HWLOC_OBJ_EQUAL:
merge_index(obj, child, os_level, signed);
if (obj->os_level != child->os_level) {
fprintf(stderr, "Different OS level\n");
return -1;
}
merge_index(obj, child, os_index, unsigned);
if (obj->os_index != child->os_index) {
fprintf(stderr, "Different OS indexes\n");
return -1;
}
switch(obj->type) {
case HWLOC_OBJ_NODE:
/* Do not check these, it may change between calls */
merge_sizes(obj, child, memory.local_memory);
merge_sizes(obj, child, memory.total_memory);
/* if both objects have a page_types array, just keep the biggest one for now */
if (obj->memory.page_types_len && child->memory.page_types_len)
hwloc_debug("%s", "merging page_types by keeping the biggest one only\n");
if (obj->memory.page_types_len < child->memory.page_types_len) {
free(obj->memory.page_types);
} else {
free(child->memory.page_types);
child->memory.page_types_len = obj->memory.page_types_len;
child->memory.page_types = obj->memory.page_types;
obj->memory.page_types = NULL;
obj->memory.page_types_len = 0;
}
break;
case HWLOC_OBJ_CACHE:
merge_sizes(obj, child, attr->cache.size);
check_sizes(obj, child, attr->cache.size);
merge_sizes(obj, child, attr->cache.linesize);
check_sizes(obj, child, attr->cache.linesize);
break;
default:
break;
}
/* Already present, no need to insert. */
return -1;
case HWLOC_OBJ_INCLUDED:
if (container) {
if (report_error)
report_error("object included in several different objects!", __LINE__);
/* We can't handle that. */
return -1;
}
/* This child contains OBJ. */
container = child;
break;
case HWLOC_OBJ_INTERSECTS:
if (report_error)
report_error("object intersection without inclusion!", __LINE__);
/* We can't handle that. */
return -1;
case HWLOC_OBJ_CONTAINS:
/* OBJ will be above CHILD. */
break;
case HWLOC_OBJ_DIFFERENT:
/* OBJ will be alongside CHILD. */
break;
}
}
if (container) {
/* OBJ is strictly contained is some child of CUR, go deeper. */
return hwloc___insert_object_by_cpuset(topology, container, obj, report_error);
}
/*
* Children of CUR are either completely different from or contained into
* OBJ. Take those that are contained (keeping sorting order), and sort OBJ
* along those that are different.
*/
/* OBJ is not put yet. */
put = 0;
/* These will always point to the pointer to their next last child. */
cur_children = &cur->first_child;
obj_children = &obj->first_child;
/* Construct CUR's and OBJ's children list. */
/* Iteration with prefetching to be completely safe against CHILD removal. */
for (child = cur->first_child, child ? next_child = child->next_sibling : NULL;
child;
child = next_child, child ? next_child = child->next_sibling : NULL) {
switch (hwloc_obj_cmp(obj, child)) {
case HWLOC_OBJ_DIFFERENT:
/* Leave CHILD in CUR. */
if (!put && hwloc_bitmap_compare_first(obj->cpuset, child->cpuset) < 0) {
/* Sort children by cpuset: put OBJ before CHILD in CUR's children. */
*cur_children = obj;
cur_children = &obj->next_sibling;
put = 1;
}
/* Now put CHILD in CUR's children. */
*cur_children = child;
cur_children = &child->next_sibling;
break;
case HWLOC_OBJ_CONTAINS:
/* OBJ contains CHILD, put the latter in the former. */
*obj_children = child;
obj_children = &child->next_sibling;
break;
case HWLOC_OBJ_EQUAL:
case HWLOC_OBJ_INCLUDED:
case HWLOC_OBJ_INTERSECTS:
/* Shouldn't ever happen as we have handled them above. */
abort();
}
}
/* Put OBJ last in CUR's children if not already done so. */
if (!put) {
*cur_children = obj;
cur_children = &obj->next_sibling;
}
/* Close children lists. */
*obj_children = NULL;
*cur_children = NULL;
return 0;
}
/* insertion routine that lets you change the error reporting callback */
int
hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj,
hwloc_report_error_t report_error)
{
int ret;
/* Start at the top. */
/* Add the cpuset to the top */
hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
if (obj->nodeset)
hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
ret = hwloc___insert_object_by_cpuset(topology, topology->levels[0][0], obj, report_error);
if (ret < 0)
hwloc_free_unlinked_object(obj);
return ret;
}
/* the default insertion routine warns in case of error.
* it's used by most backends */
void
hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj)
{
hwloc__insert_object_by_cpuset(topology, obj, hwloc_report_os_error);
}
void
hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj)
{
hwloc_obj_t child, next_child = obj->first_child;
hwloc_obj_t *current;
/* Append to the end of the list */
for (current = &parent->first_child; *current; current = &(*current)->next_sibling)
;
*current = obj;
obj->next_sibling = NULL;
obj->first_child = NULL;
/* Use the new object to insert children */
parent = obj;
/* Recursively insert children below */
while (next_child) {
child = next_child;
next_child = child->next_sibling;
hwloc_insert_object_by_parent(topology, parent, child);
}
}
static void
hwloc_connect_children(hwloc_obj_t parent);
/* Adds a misc object _after_ detection, and thus has to reconnect all the pointers */
hwloc_obj_t
hwloc_topology_insert_misc_object_by_cpuset(struct hwloc_topology *topology, hwloc_const_bitmap_t cpuset, const char *name)
{
hwloc_obj_t obj, child;
int err;
if (hwloc_bitmap_iszero(cpuset))
return NULL;
if (!hwloc_bitmap_isincluded(cpuset, hwloc_topology_get_complete_cpuset(topology)))
return NULL;
obj = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, -1);
if (name)
obj->name = strdup(name);
obj->cpuset = hwloc_bitmap_dup(cpuset);
/* initialize default cpusets, we'll adjust them later */
obj->complete_cpuset = hwloc_bitmap_dup(cpuset);
obj->allowed_cpuset = hwloc_bitmap_dup(cpuset);
obj->online_cpuset = hwloc_bitmap_dup(cpuset);
err = hwloc__insert_object_by_cpuset(topology, obj, NULL /* do not show errors on stdout */);
if (err < 0)
return NULL;
hwloc_connect_children(topology->levels[0][0]);
if ((child = obj->first_child) != NULL && child->cpuset) {
/* keep the main cpuset untouched, but update other cpusets and nodesets from children */
obj->nodeset = hwloc_bitmap_alloc();
obj->complete_nodeset = hwloc_bitmap_alloc();
obj->allowed_nodeset = hwloc_bitmap_alloc();
while (child) {
if (child->complete_cpuset)
hwloc_bitmap_or(obj->complete_cpuset, obj->complete_cpuset, child->complete_cpuset);
if (child->allowed_cpuset)
hwloc_bitmap_or(obj->allowed_cpuset, obj->allowed_cpuset, child->allowed_cpuset);
if (child->online_cpuset)
hwloc_bitmap_or(obj->online_cpuset, obj->online_cpuset, child->online_cpuset);
if (child->nodeset)
hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset);
if (child->complete_nodeset)
hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset);
if (child->allowed_nodeset)
hwloc_bitmap_or(obj->allowed_nodeset, obj->allowed_nodeset, child->allowed_nodeset);
child = child->next_sibling;
}
} else {
/* copy the parent nodesets */
obj->nodeset = hwloc_bitmap_dup(obj->parent->nodeset);
obj->complete_nodeset = hwloc_bitmap_dup(obj->parent->complete_nodeset);
obj->allowed_nodeset = hwloc_bitmap_dup(obj->parent->allowed_nodeset);
}
return obj;
}
hwloc_obj_t
hwloc_topology_insert_misc_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, const char *name)
{
hwloc_obj_t obj = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, -1);
if (name)
obj->name = strdup(name);
hwloc_insert_object_by_parent(topology, parent, obj);
hwloc_connect_children(topology->levels[0][0]);
/* no need to hwloc_connect_levels() since misc object are not in levels */
return obj;
}
/* Traverse children of a parent in a safe way: reread the next pointer as
* appropriate to prevent crash on child deletion: */
#define for_each_child_safe(child, parent, pchild) \
for (pchild = &(parent)->first_child, child = *pchild; \
child; \
/* Check whether the current child was not dropped. */ \
(*pchild == child ? pchild = &(child->next_sibling) : NULL), \
/* Get pointer to next childect. */ \
child = *pchild)
static int hwloc_memory_page_type_compare(const void *_a, const void *_b)
{
const struct hwloc_obj_memory_page_type_s *a = _a;
const struct hwloc_obj_memory_page_type_s *b = _b;
/* consider 0 as larger so that 0-size page_type go to the end */
return b->size ? (int)(a->size - b->size) : -1;
}
/* Propagate memory counts */
static void
propagate_total_memory(hwloc_obj_t obj)
{
hwloc_obj_t *temp, child;
unsigned i;
/* reset total before counting local and children memory */
obj->memory.total_memory = 0;
/* Propagate memory up */
for_each_child_safe(child, obj, temp) {
propagate_total_memory(child);
obj->memory.total_memory += child->memory.total_memory;
}
obj->memory.total_memory += obj->memory.local_memory;
/* By the way, sort the page_type array.
* Cannot do it on insert since some backends (e.g. XML) add page_types after inserting the object.
*/
qsort(obj->memory.page_types, obj->memory.page_types_len, sizeof(*obj->memory.page_types), hwloc_memory_page_type_compare);
/* Ignore 0-size page_types, they are at the end */
for(i=obj->memory.page_types_len; i>=1; i--)
if (obj->memory.page_types[i-1].size)
break;
obj->memory.page_types_len = i;
}
/* Collect the cpuset of all the PU objects. */
static void
collect_proc_cpuset(hwloc_obj_t obj, hwloc_obj_t sys)
{
hwloc_obj_t child, *temp;
if (sys) {
/* We are already given a pointer to a system object */
if (obj->type == HWLOC_OBJ_PU)
hwloc_bitmap_or(sys->cpuset, sys->cpuset, obj->cpuset);
} else {
if (obj->cpuset) {
/* This object is the root of a machine */
sys = obj;
/* Assume no PU for now */
hwloc_bitmap_zero(obj->cpuset);
}
}
for_each_child_safe(child, obj, temp)
collect_proc_cpuset(child, sys);
}
/* While traversing down and up, propagate the offline/disallowed cpus by
* and'ing them to and from the first object that has a cpuset */
static void
propagate_unused_cpuset(hwloc_obj_t obj, hwloc_obj_t sys)
{
hwloc_obj_t child, *temp;
if (obj->cpuset) {
if (sys) {
/* We are already given a pointer to an system object, update it and update ourselves */
hwloc_bitmap_t mask = hwloc_bitmap_alloc();
/* Apply the topology cpuset */
hwloc_bitmap_and(obj->cpuset, obj->cpuset, sys->cpuset);
/* Update complete cpuset down */
if (obj->complete_cpuset) {
hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, sys->complete_cpuset);
} else {
obj->complete_cpuset = hwloc_bitmap_dup(sys->complete_cpuset);
hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, obj->cpuset);
}
/* Update online cpusets */
if (obj->online_cpuset) {
/* Update ours */
hwloc_bitmap_and(obj->online_cpuset, obj->online_cpuset, sys->online_cpuset);
/* Update the given cpuset, but only what we know */
hwloc_bitmap_copy(mask, obj->cpuset);
hwloc_bitmap_not(mask, mask);
hwloc_bitmap_or(mask, mask, obj->online_cpuset);
hwloc_bitmap_and(sys->online_cpuset, sys->online_cpuset, mask);
} else {
/* Just take it as such */
obj->online_cpuset = hwloc_bitmap_dup(sys->online_cpuset);
hwloc_bitmap_and(obj->online_cpuset, obj->online_cpuset, obj->cpuset);
}
/* Update allowed cpusets */
if (obj->allowed_cpuset) {
/* Update ours */
hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, sys->allowed_cpuset);
/* Update the given cpuset, but only what we know */
hwloc_bitmap_copy(mask, obj->cpuset);
hwloc_bitmap_not(mask, mask);
hwloc_bitmap_or(mask, mask, obj->allowed_cpuset);
hwloc_bitmap_and(sys->allowed_cpuset, sys->allowed_cpuset, mask);
} else {
/* Just take it as such */
obj->allowed_cpuset = hwloc_bitmap_dup(sys->allowed_cpuset);
hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, obj->cpuset);
}
hwloc_bitmap_free(mask);
} else {
/* This object is the root of a machine */
sys = obj;
/* Apply complete cpuset to cpuset, online_cpuset and allowed_cpuset, it
* will automatically be applied below */
if (obj->complete_cpuset)
hwloc_bitmap_and(obj->cpuset, obj->cpuset, obj->complete_cpuset);
else
obj->complete_cpuset = hwloc_bitmap_dup(obj->cpuset);
if (obj->online_cpuset)
hwloc_bitmap_and(obj->online_cpuset, obj->online_cpuset, obj->complete_cpuset);
else
obj->online_cpuset = hwloc_bitmap_dup(obj->complete_cpuset);
if (obj->allowed_cpuset)
hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, obj->complete_cpuset);
else
obj->allowed_cpuset = hwloc_bitmap_dup(obj->complete_cpuset);
}
}
for_each_child_safe(child, obj, temp)
propagate_unused_cpuset(child, sys);
}
/* Force full nodeset for non-NUMA machines */
static void
add_default_object_sets(hwloc_obj_t obj, int parent_has_sets)
{
hwloc_obj_t child, *temp;
if (parent_has_sets || obj->cpuset) {
/* if the parent has non-NULL sets, or if the object has non-NULL cpusets,
* it must have non-NULL nodesets
*/
assert(obj->cpuset);
assert(obj->online_cpuset);
assert(obj->complete_cpuset);
assert(obj->allowed_cpuset);
if (!obj->nodeset)
obj->nodeset = hwloc_bitmap_alloc_full();
if (!obj->complete_nodeset)
obj->complete_nodeset = hwloc_bitmap_alloc_full();
if (!obj->allowed_nodeset)
obj->allowed_nodeset = hwloc_bitmap_alloc_full();
} else {
/* parent has no sets and object has NULL cpusets,
* it must have NULL nodesets
*/
assert(!obj->nodeset);
assert(!obj->complete_nodeset);
assert(!obj->allowed_nodeset);
}
for_each_child_safe(child, obj, temp)
add_default_object_sets(child, obj->cpuset != NULL);
}
/* Propagate nodesets up and down */
static void
propagate_nodeset(hwloc_obj_t obj, hwloc_obj_t sys)
{
hwloc_obj_t child, *temp;
hwloc_bitmap_t parent_nodeset = NULL;
int parent_weight = 0;
if (!sys && obj->nodeset) {
sys = obj;
if (!obj->complete_nodeset)
obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
if (!obj->allowed_nodeset)
obj->allowed_nodeset = hwloc_bitmap_dup(obj->complete_nodeset);
}
if (sys) {
if (obj->nodeset) {
/* Some existing nodeset coming from above, to possibly propagate down */
parent_nodeset = obj->nodeset;
parent_weight = hwloc_bitmap_weight(parent_nodeset);
} else
obj->nodeset = hwloc_bitmap_alloc();
}
for_each_child_safe(child, obj, temp) {
/* Propagate singleton nodesets down */
if (parent_weight == 1) {
if (!child->nodeset)
child->nodeset = hwloc_bitmap_dup(obj->nodeset);
else if (!hwloc_bitmap_isequal(child->nodeset, parent_nodeset)) {
hwloc_debug_bitmap("Oops, parent nodeset %s", parent_nodeset);
hwloc_debug_bitmap(" is different from child nodeset %s, ignoring the child one\n", child->nodeset);
hwloc_bitmap_copy(child->nodeset, parent_nodeset);
}
}
/* Recurse */
propagate_nodeset(child, sys);
/* Propagate children nodesets up */
if (sys && child->nodeset)
hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset);
}
}
/* Propagate allowed and complete nodesets */
static void
propagate_nodesets(hwloc_obj_t obj)
{
hwloc_bitmap_t mask = hwloc_bitmap_alloc();
hwloc_obj_t child, *temp;
for_each_child_safe(child, obj, temp) {
if (obj->nodeset) {
/* Update complete nodesets down */
if (child->complete_nodeset) {
hwloc_bitmap_and(child->complete_nodeset, child->complete_nodeset, obj->complete_nodeset);
} else if (child->nodeset) {
child->complete_nodeset = hwloc_bitmap_dup(obj->complete_nodeset);
hwloc_bitmap_and(child->complete_nodeset, child->complete_nodeset, child->nodeset);
} /* else the child doesn't have nodeset information, we can not provide a complete nodeset */
/* Update allowed nodesets down */
if (child->allowed_nodeset) {
hwloc_bitmap_and(child->allowed_nodeset, child->allowed_nodeset, obj->allowed_nodeset);
} else if (child->nodeset) {
child->allowed_nodeset = hwloc_bitmap_dup(obj->allowed_nodeset);
hwloc_bitmap_and(child->allowed_nodeset, child->allowed_nodeset, child->nodeset);
}
}
propagate_nodesets(child);
if (obj->nodeset) {
/* Update allowed nodesets up */
if (child->nodeset && child->allowed_nodeset) {
hwloc_bitmap_copy(mask, child->nodeset);
hwloc_bitmap_andnot(mask, mask, child->allowed_nodeset);
hwloc_bitmap_andnot(obj->allowed_nodeset, obj->allowed_nodeset, mask);
}
}
}
hwloc_bitmap_free(mask);
if (obj->nodeset) {
/* Apply complete nodeset to nodeset and allowed_nodeset */
if (obj->complete_nodeset)
hwloc_bitmap_and(obj->nodeset, obj->nodeset, obj->complete_nodeset);
else
obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
if (obj->allowed_nodeset)
hwloc_bitmap_and(obj->allowed_nodeset, obj->allowed_nodeset, obj->complete_nodeset);
else
obj->allowed_nodeset = hwloc_bitmap_dup(obj->complete_nodeset);
}
}
static void
apply_nodeset(hwloc_obj_t obj, hwloc_obj_t sys)
{
unsigned i;
hwloc_obj_t child, *temp;
if (sys) {
if (obj->type == HWLOC_OBJ_NODE && obj->os_index != (unsigned) -1 &&
!hwloc_bitmap_isset(sys->allowed_nodeset, obj->os_index)) {
hwloc_debug("Dropping memory from disallowed node %u\n", obj->os_index);
obj->memory.local_memory = 0;
obj->memory.total_memory = 0;
for(i=0; i<obj->memory.page_types_len; i++)
obj->memory.page_types[i].count = 0;
}
} else {
if (obj->allowed_nodeset) {
sys = obj;
}
}
for_each_child_safe(child, obj, temp)
apply_nodeset(child, sys);
}
static void
remove_unused_cpusets(hwloc_obj_t obj)
{
hwloc_obj_t child, *temp;
if (obj->cpuset) {
hwloc_bitmap_and(obj->cpuset, obj->cpuset, obj->online_cpuset);
hwloc_bitmap_and(obj->cpuset, obj->cpuset, obj->allowed_cpuset);
}
for_each_child_safe(child, obj, temp)
remove_unused_cpusets(child);
}
/* Remove an object from its parent and free it.
* Only updates next_sibling/first_child pointers,
* so may only be used during early discovery.
* Children are inserted where the object was.
*/
static void
unlink_and_free_single_object(hwloc_obj_t *pparent)
{
hwloc_obj_t parent = *pparent;
hwloc_obj_t child = parent->first_child;
/* Replace object with its list of children */
if (child) {
*pparent = child;
while (child->next_sibling)
child = child->next_sibling;
child->next_sibling = parent->next_sibling;
} else
*pparent = parent->next_sibling;
/* Remove ignored object */
hwloc_free_unlinked_object(parent);
}
/* Remove all ignored objects. */
static void
remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent)
{
hwloc_obj_t parent = *pparent, child, *pchild;
for_each_child_safe(child, parent, pchild)
remove_ignored(topology, pchild);
if (parent != topology->levels[0][0] &&
topology->ignored_types[parent->type] == HWLOC_IGNORE_TYPE_ALWAYS) {
hwloc_debug("%s", "\nDropping ignored object ");
print_object(topology, 0, parent);
unlink_and_free_single_object(pparent);
}
}
/* Remove an object and its children from its parent and free them.
* Only updates next_sibling/first_child pointers,
* so may only be used during early discovery.
*/
static void
unlink_and_free_object_and_children(hwloc_obj_t *pobj)
{
hwloc_obj_t obj = *pobj, child, *pchild;
for_each_child_safe(child, obj, pchild)
unlink_and_free_object_and_children(pchild);
*pobj = obj->next_sibling;
hwloc_free_unlinked_object(obj);
}
/* Remove all children whose cpuset is empty, except NUMA nodes
* since we want to keep memory information. */
static void
remove_empty(hwloc_topology_t topology, hwloc_obj_t *pobj)
{
hwloc_obj_t obj = *pobj, child, *pchild;
for_each_child_safe(child, obj, pchild)
remove_empty(topology, pchild);
if (obj->type != HWLOC_OBJ_NODE
&& obj->cpuset /* FIXME: needed for PCI devices? */
&& hwloc_bitmap_iszero(obj->cpuset)) {
/* Remove empty children */
hwloc_debug("%s", "\nRemoving empty object ");
print_object(topology, 0, obj);
unlink_and_free_object_and_children(pobj);
}
}
/* adjust object cpusets according the given droppedcpuset,
* drop object whose cpuset becomes empty,
* and mark dropped nodes in droppednodeset
*/
static void
restrict_object(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj, hwloc_const_cpuset_t droppedcpuset, hwloc_nodeset_t droppednodeset, int droppingparent)
{
hwloc_obj_t obj = *pobj, child, *pchild;
int dropping;
int modified = obj->complete_cpuset && hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset);
hwloc_clear_object_distances(obj);
if (obj->cpuset)
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
if (obj->complete_cpuset)
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
if (obj->online_cpuset)
hwloc_bitmap_andnot(obj->online_cpuset, obj->online_cpuset, droppedcpuset);
if (obj->allowed_cpuset)
hwloc_bitmap_andnot(obj->allowed_cpuset, obj->allowed_cpuset, droppedcpuset);
if (obj->type == HWLOC_OBJ_MISC) {
dropping = droppingparent && !(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC);
} else {
dropping = droppingparent || (obj->cpuset && hwloc_bitmap_iszero(obj->cpuset));
}
if (modified)
for_each_child_safe(child, obj, pchild)
restrict_object(topology, flags, pchild, droppedcpuset, droppednodeset, dropping);
if (dropping) {
hwloc_debug("%s", "\nRemoving object during restrict");
print_object(topology, 0, obj);
if (obj->type == HWLOC_OBJ_NODE)
hwloc_bitmap_set(droppednodeset, obj->os_index);
/* remove the object from the tree (no need to remove from levels, they will be entirely rebuilt by the caller) */
unlink_and_free_single_object(pobj);
/* do not remove children. if they were to be removed, they would have been already */
}
}
/* adjust object nodesets accordingly the given droppednodeset
*/
static void
restrict_object_nodeset(hwloc_topology_t topology, hwloc_obj_t *pobj, hwloc_nodeset_t droppednodeset)
{
hwloc_obj_t obj = *pobj, child, *pchild;
/* if this object isn't modified, don't bother looking at children */
if (obj->complete_nodeset && !hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset))
return;
if (obj->nodeset)
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
if (obj->complete_nodeset)
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
if (obj->allowed_nodeset)
hwloc_bitmap_andnot(obj->allowed_nodeset, obj->allowed_nodeset, droppednodeset);
for_each_child_safe(child, obj, pchild)
restrict_object_nodeset(topology, pchild, droppednodeset);
}
/*
* Merge with the only child if either the parent or the child has a type to be
* ignored while keeping structure
*/
static void
merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent)
{
hwloc_obj_t parent = *pparent, child, *pchild;
for_each_child_safe(child, parent, pchild)
merge_useless_child(topology, pchild);
child = parent->first_child;
if (!child || child->next_sibling)
/* There are no or several children, it's useful to keep them. */
return;
/* TODO: have a preference order? */
if (topology->ignored_types[parent->type] == HWLOC_IGNORE_TYPE_KEEP_STRUCTURE) {
/* Parent can be ignored in favor of the child. */
hwloc_debug("%s", "\nIgnoring parent ");
print_object(topology, 0, parent);
*pparent = child;
child->next_sibling = parent->next_sibling;
hwloc_free_unlinked_object(parent);
} else if (topology->ignored_types[child->type] == HWLOC_IGNORE_TYPE_KEEP_STRUCTURE) {
/* Child can be ignored in favor of the parent. */
hwloc_debug("%s", "\nIgnoring child ");
print_object(topology, 0, child);
parent->first_child = child->first_child;
hwloc_free_unlinked_object(child);
}
}
/*
* Initialize handy pointers in the whole topology.
* The topology only had first_child and next_sibling pointers.
* When this funtions return, all parent/children pointers are initialized.
* The remaining fields (levels, cousins, logical_index, depth, ...) will
* be setup later in hwloc_connect_levels().
*/
static void
hwloc_connect_children(hwloc_obj_t parent)
{
unsigned n;
hwloc_obj_t child, prev_child = NULL;
for (n = 0, child = parent->first_child;
child;
n++, prev_child = child, child = child->next_sibling) {
child->parent = parent;
child->sibling_rank = n;
child->prev_sibling = prev_child;
}
parent->last_child = prev_child;
parent->arity = n;
free(parent->children);
if (!n) {
parent->children = NULL;
return;
}
parent->children = malloc(n * sizeof(*parent->children));
for (n = 0, child = parent->first_child;
child;
n++, child = child->next_sibling) {
parent->children[n] = child;
hwloc_connect_children(child);
}
}
/*
* Check whether there is an object below ROOT that has the same type as OBJ
*/
static int
find_same_type(hwloc_obj_t root, hwloc_obj_t obj)
{
hwloc_obj_t child;
if (hwloc_type_cmp(root, obj) == HWLOC_TYPE_EQUAL)
return 1;
for (child = root->first_child; child; child = child->next_sibling)
if (find_same_type(child, obj))
return 1;
return 0;
}
static int
hwloc_levels_ignore_object(hwloc_obj_t obj)
{
return obj->type != HWLOC_OBJ_MISC;
}
/* traverse the array of current object and compare them with top_obj.
* if equal, take the object and put its children into the remaining objs.
* if not equal, put the object into the remaining objs.
*/
static int
hwloc_level_take_objects(hwloc_obj_t top_obj,
hwloc_obj_t *current_objs, unsigned n_current_objs,
hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused,
hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused)
{
unsigned taken_i = 0;
unsigned new_i = 0;
unsigned ignored = 0;
unsigned i, j;
for (i = 0; i < n_current_objs; i++)
if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_TYPE_EQUAL) {
/* Take it, add children. */
taken_objs[taken_i++] = current_objs[i];
for (j = 0; j < current_objs[i]->arity; j++) {
hwloc_obj_t obj = current_objs[i]->children[j];
if (hwloc_levels_ignore_object(obj))
remaining_objs[new_i++] = obj;
else
ignored++;
}
} else {
/* Leave it. */
hwloc_obj_t obj = current_objs[i];
if (hwloc_levels_ignore_object(obj))
remaining_objs[new_i++] = obj;
else
ignored++;
}
#ifdef HWLOC_DEBUG
/* Make sure we didn't mess up. */
assert(taken_i == n_taken_objs);
assert(new_i + ignored == n_current_objs - n_taken_objs + n_remaining_objs);
#endif
return new_i;
}
/*
* Do the remaining work that hwloc_connect_children() did not do earlier.
*/
static int
hwloc_connect_levels(hwloc_topology_t topology)
{
unsigned l, i=0;
hwloc_obj_t *objs, *taken_objs, *new_objs, top_obj;
unsigned n_objs, n_taken_objs, n_new_objs;
/* reset non-root levels (root was initialized during init and will not change here) */
for(l=1; l<HWLOC_DEPTH_MAX; l++)
free(topology->levels[l]);
memset(topology->levels+1, 0, (HWLOC_DEPTH_MAX-1)*sizeof(*topology->levels));
memset(topology->level_nbobjects+1, 0, (HWLOC_DEPTH_MAX-1)*sizeof(*topology->level_nbobjects));
topology->nb_levels = 1;
/* don't touch next_group_depth, the Group objects are still here */
/* initialize all depth to unknown */
for (l=1; l < HWLOC_OBJ_TYPE_MAX; l++)
topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN;
topology->type_depth[topology->levels[0][0]->type] = 0;
/* Start with children of the whole system. */
l = 0;
n_objs = topology->levels[0][0]->arity;
objs = malloc(n_objs * sizeof(objs[0]));
if (!objs) {
errno = ENOMEM;
hwloc_topology_clear(topology);
return -1;
}
{
hwloc_obj_t dummy_taken_objs;
/* copy all root children that must go into levels,
* root will go into dummy_taken_objs but we don't need it anyway
* because it stays alone in first level.
*/
n_objs = hwloc_level_take_objects(topology->levels[0][0],
topology->levels[0], 1,
&dummy_taken_objs, 1,
objs, n_objs);
#ifdef HWLOC_DEBUG
assert(dummy_taken_objs == topology->levels[0][0]);
#endif
}
/* Keep building levels while there are objects left in OBJS. */
while (n_objs) {
/* First find which type of object is the topmost.
* Don't use PU if there are other types since we want to keep PU at the bottom.
*/
for (i = 0; i < n_objs; i++)
if (objs[i]->type != HWLOC_OBJ_PU)
break;
top_obj = i == n_objs ? objs[0] : objs[i];
for (i = 0; i < n_objs; i++) {
if (hwloc_type_cmp(top_obj, objs[i]) != HWLOC_TYPE_EQUAL) {
if (find_same_type(objs[i], top_obj)) {
/* OBJS[i] is strictly above an object of the same type as TOP_OBJ, so it
* is above TOP_OBJ. */
top_obj = objs[i];
}
}
}
/* Now peek all objects of the same type, build a level with that and
* replace them with their children. */
/* First count them. */
n_taken_objs = 0;
n_new_objs = 0;
for (i = 0; i < n_objs; i++)
if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_TYPE_EQUAL) {
n_taken_objs++;
n_new_objs += objs[i]->arity;
}
/* New level. */
taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0]));
/* New list of pending objects. */
new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0]));
n_new_objs = hwloc_level_take_objects(top_obj,
objs, n_objs,
taken_objs, n_taken_objs,
new_objs, n_new_objs);
/* Ok, put numbers in the level. */
for (i = 0; i < n_taken_objs; i++) {
taken_objs[i]->depth = topology->nb_levels;
taken_objs[i]->logical_index = i;
if (i) {
taken_objs[i]->prev_cousin = taken_objs[i-1];
taken_objs[i-1]->next_cousin = taken_objs[i];
}
}
/* One more level! */
if (top_obj->type == HWLOC_OBJ_CACHE)
hwloc_debug("--- Cache level depth %u", top_obj->attr->cache.depth);
else
hwloc_debug("--- %s level", hwloc_obj_type_string(top_obj->type));
hwloc_debug(" has number %u\n\n", topology->nb_levels);
if (topology->type_depth[top_obj->type] == HWLOC_TYPE_DEPTH_UNKNOWN)
topology->type_depth[top_obj->type] = topology->nb_levels;
else
topology->type_depth[top_obj->type] = HWLOC_TYPE_DEPTH_MULTIPLE; /* mark as unknown */
taken_objs[n_taken_objs] = NULL;
topology->level_nbobjects[topology->nb_levels] = n_taken_objs;
topology->levels[topology->nb_levels] = taken_objs;
topology->nb_levels++;
free(objs);
objs = new_objs;
n_objs = n_new_objs;
}
/* It's empty now. */
free(objs);
return 0;
}
/*
* Empty binding hooks always returning success
*/
static int dontset_return_complete_cpuset(hwloc_topology_t topology, hwloc_cpuset_t set)
{
hwloc_const_cpuset_t cpuset = hwloc_topology_get_complete_cpuset(topology);
if (cpuset) {
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
return 0;
} else
return -1;
}
static int dontset_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_cpuset(topology, set);
}
static int dontset_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_cpuset(topology, set);
}
static int dontset_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_cpuset(topology, cpuset);
}
#ifdef hwloc_thread_t
static int dontset_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_cpuset(topology, cpuset);
}
#endif
static int dontset_return_complete_nodeset(hwloc_topology_t topology, hwloc_nodeset_t set, hwloc_membind_policy_t *policy)
{
hwloc_const_nodeset_t nodeset = hwloc_topology_get_complete_nodeset(topology);
if (nodeset) {
hwloc_bitmap_copy(set, hwloc_topology_get_complete_nodeset(topology));
*policy = HWLOC_MEMBIND_DEFAULT;
return 0;
} else
return -1;
}
static int dontset_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_nodeset(topology, set, policy);
}
static int dontset_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_nodeset(topology, set, policy);
}
static int dontset_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_nodeset(topology, set, policy);
}
static int dontset_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return 0;
}
static int dontget_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
{
return dontset_return_complete_nodeset(topology, set, policy);
}
static void * dontalloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
{
return malloc(size);
}
static int dontfree_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused)
{
free(addr);
return 0;
}
static void alloc_cpusets(hwloc_obj_t obj)
{
obj->cpuset = hwloc_bitmap_alloc_full();
obj->complete_cpuset = hwloc_bitmap_alloc();
obj->online_cpuset = hwloc_bitmap_alloc_full();
obj->allowed_cpuset = hwloc_bitmap_alloc_full();
obj->nodeset = hwloc_bitmap_alloc();
obj->complete_nodeset = hwloc_bitmap_alloc();
obj->allowed_nodeset = hwloc_bitmap_alloc_full();
}
/* Main discovery loop */
static int
hwloc_discover(struct hwloc_topology *topology)
{
if (topology->backend_type == HWLOC_BACKEND_SYNTHETIC) {
alloc_cpusets(topology->levels[0][0]);
hwloc_look_synthetic(topology);
#ifdef HWLOC_HAVE_XML
} else if (topology->backend_type == HWLOC_BACKEND_XML) {
hwloc_look_xml(topology);
#endif
} else {
/* Raw detection, from coarser levels to finer levels for more efficiency. */
/* hwloc_look_* functions should use hwloc_obj_add to add objects initialized
* through hwloc_alloc_setup_object. For node levels, nodeset, memory_Kb and
* huge_page_free must be initialized. For cache levels, memory_kB and
* attr->cache.depth must be initialized. For misc levels, attr->misc.depth
* must be initialized.
*/
/* There must be at least a PU object for each logical processor, at worse
* produced by hwloc_setup_pu_level()
*/
/* To be able to just use hwloc_insert_object_by_cpuset to insert the object
* in the topology according to the cpuset, the cpuset field must be
* initialized.
*/
/* A priori, All processors are visible in the topology, online, and allowed
* for the application.
*
* - If some processors exist but topology information is unknown for them
* (and thus the backend couldn't create objects for them), they should be
* added to the complete_cpuset field of the lowest object where the object
* could reside.
*
* - If some processors are not online, they should be dropped from the
* online_cpuset field.
*
* - If some processors are not allowed for the application (e.g. for
* administration reasons), they should be dropped from the allowed_cpuset
* field.
*
* The same applies to the node sets complete_nodeset and allowed_cpuset.
*
* If such field doesn't exist yet, it can be allocated, and initialized to
* zero (for complete), or to full (for online and allowed). The values are
* automatically propagated to the whole tree after detection.
*
* Here, we only allocate cpusets for the root object.
*/
alloc_cpusets(topology->levels[0][0]);
/* Each OS type should also fill the bind functions pointers, at least the
* set_cpubind one
*/
# ifdef HWLOC_LINUX_SYS
# define HAVE_OS_SUPPORT
hwloc_look_linux(topology);
# endif /* HWLOC_LINUX_SYS */
# ifdef HWLOC_AIX_SYS
# define HAVE_OS_SUPPORT
hwloc_look_aix(topology);
# endif /* HWLOC_AIX_SYS */
# ifdef HWLOC_OSF_SYS
# define HAVE_OS_SUPPORT
hwloc_look_osf(topology);
# endif /* HWLOC_OSF_SYS */
# ifdef HWLOC_SOLARIS_SYS
# define HAVE_OS_SUPPORT
hwloc_look_solaris(topology);
# endif /* HWLOC_SOLARIS_SYS */
# ifdef HWLOC_WIN_SYS
# define HAVE_OS_SUPPORT
hwloc_look_windows(topology);
# endif /* HWLOC_WIN_SYS */
# ifdef HWLOC_DARWIN_SYS
# define HAVE_OS_SUPPORT
hwloc_look_darwin(topology);
# endif /* HWLOC_DARWIN_SYS */
# ifdef HWLOC_FREEBSD_SYS
# define HAVE_OS_SUPPORT
hwloc_look_freebsd(topology);
# endif /* HWLOC_FREEBSD_SYS */
# ifdef HWLOC_HPUX_SYS
# define HAVE_OS_SUPPORT
hwloc_look_hpux(topology);
# endif /* HWLOC_HPUX_SYS */
# ifndef HAVE_OS_SUPPORT
hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));
# endif /* Unsupported OS */
# ifndef HWLOC_LINUX_SYS
if (topology->is_thissystem) {
/* gather uname info, except for Linux, which does it internally depending on load options */
hwloc_add_uname_info(topology);
}
# endif
}
/*
* Now that backends have detected objects, sort them and establish pointers.
*/
print_objects(topology, 0, topology->levels[0][0]);
/*
* Group levels by distances
*/
hwloc_convert_distances_indexes_into_objects(topology);
hwloc_group_by_distances(topology);
/* First tweak a bit to clean the topology. */
hwloc_debug("%s", "\nRestrict topology cpusets to existing PU and NODE objects\n");
collect_proc_cpuset(topology->levels[0][0], NULL);
hwloc_debug("%s", "\nPropagate offline and disallowed cpus down and up\n");
propagate_unused_cpuset(topology->levels[0][0], NULL);
if (topology->levels[0][0]->complete_nodeset && hwloc_bitmap_iszero(topology->levels[0][0]->complete_nodeset)) {
/* No nodeset, drop all of them */
hwloc_bitmap_free(topology->levels[0][0]->nodeset);
topology->levels[0][0]->nodeset = NULL;
hwloc_bitmap_free(topology->levels[0][0]->complete_nodeset);
topology->levels[0][0]->complete_nodeset = NULL;
hwloc_bitmap_free(topology->levels[0][0]->allowed_nodeset);
topology->levels[0][0]->allowed_nodeset = NULL;
}
hwloc_debug("%s", "\nPropagate nodesets\n");
propagate_nodeset(topology->levels[0][0], NULL);
propagate_nodesets(topology->levels[0][0]);
print_objects(topology, 0, topology->levels[0][0]);
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) {
hwloc_debug("%s", "\nRemoving unauthorized and offline cpusets from all cpusets\n");
remove_unused_cpusets(topology->levels[0][0]);
hwloc_debug("%s", "\nRemoving disallowed memory according to nodesets\n");
apply_nodeset(topology->levels[0][0], NULL);
print_objects(topology, 0, topology->levels[0][0]);
}
hwloc_debug("%s", "\nRemoving ignored objects\n");
remove_ignored(topology, &topology->levels[0][0]);
print_objects(topology, 0, topology->levels[0][0]);
hwloc_debug("%s", "\nRemoving empty objects except numa nodes and PCI devices\n");
remove_empty(topology, &topology->levels[0][0]);
if (!topology->levels[0][0]) {
fprintf(stderr, "Topology became empty, aborting!\n");
abort();
}
print_objects(topology, 0, topology->levels[0][0]);
hwloc_debug("%s", "\nRemoving objects whose type has HWLOC_IGNORE_TYPE_KEEP_STRUCTURE and have only one child or are the only child\n");
merge_useless_child(topology, &topology->levels[0][0]);
print_objects(topology, 0, topology->levels[0][0]);
hwloc_debug("%s", "\nAdd default object sets\n");
add_default_object_sets(topology->levels[0][0], 0);
hwloc_debug("%s", "\nOk, finished tweaking, now connect\n");
/* Now connect handy pointers. */
hwloc_connect_children(topology->levels[0][0]);
print_objects(topology, 0, topology->levels[0][0]);
/* Explore the resulting topology level by level. */
if (hwloc_connect_levels(topology) < 0)
return -1;
/* accumulate children memory in total_memory fields (only once parent is set) */
hwloc_debug("%s", "\nPropagate total memory up\n");
propagate_total_memory(topology->levels[0][0]);
/*
* Now that objects are numbered, take distance matrices from backends and put them in the main topology
*/
hwloc_finalize_logical_distances(topology);
# ifdef HWLOC_HAVE_XML
if (topology->backend_type == HWLOC_BACKEND_XML)
/* make sure the XML-imported distances are ok now that the tree is properly setup */
hwloc_xml_check_distances(topology);
# endif
/*
* Now set binding hooks.
* If the represented system is actually not this system, use dummy binding
* hooks.
*/
if (topology->flags & HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)
topology->is_thissystem = 1;
if (topology->is_thissystem) {
# ifdef HWLOC_LINUX_SYS
hwloc_set_linux_hooks(topology);
# endif /* HWLOC_LINUX_SYS */
# ifdef HWLOC_AIX_SYS
hwloc_set_aix_hooks(topology);
# endif /* HWLOC_AIX_SYS */
# ifdef HWLOC_OSF_SYS
hwloc_set_osf_hooks(topology);
# endif /* HWLOC_OSF_SYS */
# ifdef HWLOC_SOLARIS_SYS
hwloc_set_solaris_hooks(topology);
# endif /* HWLOC_SOLARIS_SYS */
# ifdef HWLOC_WIN_SYS
hwloc_set_windows_hooks(topology);
# endif /* HWLOC_WIN_SYS */
# ifdef HWLOC_DARWIN_SYS
hwloc_set_darwin_hooks(topology);
# endif /* HWLOC_DARWIN_SYS */
# ifdef HWLOC_FREEBSD_SYS
hwloc_set_freebsd_hooks(topology);
# endif /* HWLOC_FREEBSD_SYS */
# ifdef HWLOC_HPUX_SYS
hwloc_set_hpux_hooks(topology);
# endif /* HWLOC_HPUX_SYS */
} else {
topology->set_thisproc_cpubind = dontset_thisproc_cpubind;
topology->get_thisproc_cpubind = dontget_thisproc_cpubind;
topology->set_thisthread_cpubind = dontset_thisthread_cpubind;
topology->get_thisthread_cpubind = dontget_thisthread_cpubind;
topology->set_proc_cpubind = dontset_proc_cpubind;
topology->get_proc_cpubind = dontget_proc_cpubind;
#ifdef hwloc_thread_t
topology->set_thread_cpubind = dontset_thread_cpubind;
topology->get_thread_cpubind = dontget_thread_cpubind;
#endif
topology->get_thisproc_last_cpu_location = dontget_thisproc_cpubind; /* cpubind instead of last_cpu_location is ok */
topology->get_thisthread_last_cpu_location = dontget_thisthread_cpubind; /* cpubind instead of last_cpu_location is ok */
topology->get_proc_last_cpu_location = dontget_proc_cpubind; /* cpubind instead of last_cpu_location is ok */
/* TODO: get_thread_last_cpu_location */
topology->set_thisproc_membind = dontset_thisproc_membind;
topology->get_thisproc_membind = dontget_thisproc_membind;
topology->set_thisthread_membind = dontset_thisthread_membind;
topology->get_thisthread_membind = dontget_thisthread_membind;
topology->set_proc_membind = dontset_proc_membind;
topology->get_proc_membind = dontget_proc_membind;
topology->set_area_membind = dontset_area_membind;
topology->get_area_membind = dontget_area_membind;
topology->alloc_membind = dontalloc_membind;
topology->free_membind = dontfree_membind;
}
/* if not is_thissystem, set_cpubind is fake
* and get_cpubind returns the whole system cpuset,
* so don't report that set/get_cpubind as supported
*/
if (topology->is_thissystem) {
#define DO(which,kind) \
if (topology->kind) \
topology->support.which##bind->kind = 1;
DO(cpu,set_thisproc_cpubind);
DO(cpu,get_thisproc_cpubind);
DO(cpu,set_proc_cpubind);
DO(cpu,get_proc_cpubind);
DO(cpu,set_thisthread_cpubind);
DO(cpu,get_thisthread_cpubind);
DO(cpu,set_thread_cpubind);
DO(cpu,get_thread_cpubind);
DO(cpu,get_thisproc_last_cpu_location);
DO(cpu,get_proc_last_cpu_location);
DO(cpu,get_thisthread_last_cpu_location);
DO(mem,set_thisproc_membind);
DO(mem,get_thisproc_membind);
DO(mem,set_thisthread_membind);
DO(mem,get_thisthread_membind);
DO(mem,set_proc_membind);
DO(mem,get_proc_membind);
DO(mem,set_area_membind);
DO(mem,get_area_membind);
DO(mem,alloc_membind);
}
return 0;
}
/* To be before discovery is actually launched,
* Resets everything in case a previous load initialized some stuff.
*/
static void
hwloc_topology_setup_defaults(struct hwloc_topology *topology)
{
struct hwloc_obj *root_obj;
/* reset support */
topology->set_thisproc_cpubind = NULL;
topology->get_thisproc_cpubind = NULL;
topology->set_thisthread_cpubind = NULL;
topology->get_thisthread_cpubind = NULL;
topology->set_proc_cpubind = NULL;
topology->get_proc_cpubind = NULL;
#ifdef hwloc_thread_t
topology->set_thread_cpubind = NULL;
topology->get_thread_cpubind = NULL;
#endif
topology->set_thisproc_membind = NULL;
topology->get_thisproc_membind = NULL;
topology->set_thisthread_membind = NULL;
topology->get_thisthread_membind = NULL;
topology->set_proc_membind = NULL;
topology->get_proc_membind = NULL;
topology->set_area_membind = NULL;
topology->get_area_membind = NULL;
topology->alloc = NULL;
topology->alloc_membind = NULL;
topology->free_membind = NULL;
memset(topology->support.discovery, 0, sizeof(*topology->support.discovery));
memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind));
memset(topology->support.membind, 0, sizeof(*topology->support.membind));
/* Only the System object on top by default */
topology->nb_levels = 1; /* there's at least SYSTEM */
topology->next_group_depth = 0;
topology->levels[0] = malloc (sizeof (struct hwloc_obj));
topology->level_nbobjects[0] = 1;
/* NULLify other levels so that we can detect and free old ones in hwloc_connect_levels() if needed */
memset(topology->levels+1, 0, (HWLOC_DEPTH_MAX-1)*sizeof(*topology->levels));
/* Create the actual machine object, but don't touch its attributes yet
* since the OS backend may still change the object into something else
* (for instance System)
*/
root_obj = hwloc_alloc_setup_object(HWLOC_OBJ_MACHINE, 0);
root_obj->depth = 0;
root_obj->logical_index = 0;
root_obj->sibling_rank = 0;
topology->levels[0][0] = root_obj;
}
int
hwloc_topology_init (struct hwloc_topology **topologyp)
{
struct hwloc_topology *topology;
int i;
topology = malloc (sizeof (struct hwloc_topology));
if(!topology)
return -1;
/* Setup topology context */
topology->is_loaded = 0;
topology->flags = 0;
topology->is_thissystem = 1;
topology->backend_type = HWLOC_BACKEND_NONE; /* backend not specified by default */
topology->pid = 0;
topology->support.discovery = malloc(sizeof(*topology->support.discovery));
topology->support.cpubind = malloc(sizeof(*topology->support.cpubind));
topology->support.membind = malloc(sizeof(*topology->support.membind));
/* Only ignore useless cruft by default */
for(i=0; i< HWLOC_OBJ_TYPE_MAX; i++)
topology->ignored_types[i] = HWLOC_IGNORE_TYPE_NEVER;
topology->ignored_types[HWLOC_OBJ_GROUP] = HWLOC_IGNORE_TYPE_KEEP_STRUCTURE;
hwloc_topology_distances_init(topology);
/* Make the topology look like something coherent but empty */
hwloc_topology_setup_defaults(topology);
*topologyp = topology;
return 0;
}
static void
hwloc_backend_exit(struct hwloc_topology *topology)
{
switch (topology->backend_type) {
#ifdef HWLOC_LINUX_SYS
case HWLOC_BACKEND_SYSFS:
hwloc_backend_sysfs_exit(topology);
break;
#endif
#ifdef HWLOC_HAVE_XML
case HWLOC_BACKEND_XML:
hwloc_backend_xml_exit(topology);
break;
#endif
case HWLOC_BACKEND_SYNTHETIC:
hwloc_backend_synthetic_exit(topology);
break;
default:
break;
}
assert(topology->backend_type == HWLOC_BACKEND_NONE);
}
int
hwloc_topology_set_fsroot(struct hwloc_topology *topology, const char *fsroot_path __hwloc_attribute_unused)
{
/* cleanup existing backend */
hwloc_backend_exit(topology);
#ifdef HWLOC_LINUX_SYS
if (hwloc_backend_sysfs_init(topology, fsroot_path) < 0)
return -1;
#endif /* HWLOC_LINUX_SYS */
return 0;
}
int
hwloc_topology_set_pid(struct hwloc_topology *topology __hwloc_attribute_unused,
hwloc_pid_t pid __hwloc_attribute_unused)
{
#ifdef HWLOC_LINUX_SYS
topology->pid = pid;
return 0;
#else /* HWLOC_LINUX_SYS */
errno = ENOSYS;
return -1;
#endif /* HWLOC_LINUX_SYS */
}
int
hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *description)
{
/* cleanup existing backend */
hwloc_backend_exit(topology);
return hwloc_backend_synthetic_init(topology, description);
}
int
hwloc_topology_set_xml(struct hwloc_topology *topology __hwloc_attribute_unused,
const char *xmlpath __hwloc_attribute_unused)
{
#ifdef HWLOC_HAVE_XML
/* cleanup existing backend */
hwloc_backend_exit(topology);
return hwloc_backend_xml_init(topology, xmlpath, NULL, 0);
#else /* HWLOC_HAVE_XML */
errno = ENOSYS;
return -1;
#endif /* !HWLOC_HAVE_XML */
}
int
hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology __hwloc_attribute_unused,
const char *xmlbuffer __hwloc_attribute_unused,
int size __hwloc_attribute_unused)
{
#ifdef HWLOC_HAVE_XML
/* cleanup existing backend */
hwloc_backend_exit(topology);
return hwloc_backend_xml_init(topology, NULL, xmlbuffer, size);
#else /* HWLOC_HAVE_XML */
errno = ENOSYS;
return -1;
#endif /* !HWLOC_HAVE_XML */
}
int
hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
{
topology->flags = flags;
return 0;
}
int
hwloc_topology_ignore_type(struct hwloc_topology *topology, hwloc_obj_type_t type)
{
if (type >= HWLOC_OBJ_TYPE_MAX) {
errno = EINVAL;
return -1;
}
if (type == HWLOC_OBJ_PU) {
/* we need the PU level */
errno = EINVAL;
return -1;
}
topology->ignored_types[type] = HWLOC_IGNORE_TYPE_ALWAYS;
return 0;
}
int
hwloc_topology_ignore_type_keep_structure(struct hwloc_topology *topology, hwloc_obj_type_t type)
{
if (type >= HWLOC_OBJ_TYPE_MAX) {
errno = EINVAL;
return -1;
}
if (type == HWLOC_OBJ_PU) {
/* we need the PU level */
errno = EINVAL;
return -1;
}
topology->ignored_types[type] = HWLOC_IGNORE_TYPE_KEEP_STRUCTURE;
return 0;
}
int
hwloc_topology_ignore_all_keep_structure(struct hwloc_topology *topology)
{
unsigned type;
for(type=0; type<HWLOC_OBJ_TYPE_MAX; type++)
if (type != HWLOC_OBJ_PU)
topology->ignored_types[type] = HWLOC_IGNORE_TYPE_KEEP_STRUCTURE;
return 0;
}
static void
hwloc_topology_clear_tree (struct hwloc_topology *topology, struct hwloc_obj *root)
{
unsigned i;
for(i=0; i<root->arity; i++)
hwloc_topology_clear_tree (topology, root->children[i]);
hwloc_free_unlinked_object (root);
}
static void
hwloc_topology_clear (struct hwloc_topology *topology)
{
unsigned l;
hwloc_topology_distances_clear(topology);
hwloc_topology_clear_tree (topology, topology->levels[0][0]);
for (l=0; l<topology->nb_levels; l++)
free(topology->levels[l]);
}
void
hwloc_topology_destroy (struct hwloc_topology *topology)
{
hwloc_topology_clear(topology);
hwloc_topology_distances_destroy(topology);
hwloc_backend_exit(topology);
free(topology->support.discovery);
free(topology->support.cpubind);
free(topology->support.membind);
free(topology);
}
int
hwloc_topology_load (struct hwloc_topology *topology)
{
char *local_env;
int err;
if (topology->is_loaded) {
hwloc_topology_clear(topology);
hwloc_topology_setup_defaults(topology);
topology->is_loaded = 0;
}
/* enforce backend anyway if a FORCE variable was given */
#ifdef HWLOC_LINUX_SYS
{
char *fsroot_path_env = getenv("HWLOC_FORCE_FSROOT");
if (fsroot_path_env) {
hwloc_backend_exit(topology);
hwloc_backend_sysfs_init(topology, fsroot_path_env);
}
}
#endif
#ifdef HWLOC_HAVE_XML
{
char *xmlpath_env = getenv("HWLOC_FORCE_XMLFILE");
if (xmlpath_env) {
hwloc_backend_exit(topology);
hwloc_backend_xml_init(topology, xmlpath_env, NULL, 0);
}
}
#endif
/* only apply non-FORCE variables if we have not changed the backend yet */
#ifdef HWLOC_LINUX_SYS
if (topology->backend_type == HWLOC_BACKEND_NONE) {
char *fsroot_path_env = getenv("HWLOC_FSROOT");
if (fsroot_path_env)
hwloc_backend_sysfs_init(topology, fsroot_path_env);
}
#endif
#ifdef HWLOC_HAVE_XML
if (topology->backend_type == HWLOC_BACKEND_NONE) {
char *xmlpath_env = getenv("HWLOC_XMLFILE");
if (xmlpath_env)
hwloc_backend_xml_init(topology, xmlpath_env, NULL, 0);
}
#endif
/* always apply non-FORCE THISSYSTEM since it was explicitly designed to override setups from other backends */
local_env = getenv("HWLOC_THISSYSTEM");
if (local_env)
topology->is_thissystem = atoi(local_env);
/* if we haven't chosen the backend, set the OS-specific one if needed */
if (topology->backend_type == HWLOC_BACKEND_NONE) {
#ifdef HWLOC_LINUX_SYS
if (hwloc_backend_sysfs_init(topology, "/") < 0)
return -1;
#endif
}
/* get distance matrix from the environment are store them (as indexes) in the topology.
* indexes will be converted into objects later once the tree will be filled
*/
hwloc_store_distances_from_env(topology);
/* actual topology discovery */
err = hwloc_discover(topology);
if (err < 0)
return err;
/* enforce THISSYSTEM if given in a FORCE variable */
local_env = getenv("HWLOC_FORCE_THISSYSTEM");
if (local_env)
topology->is_thissystem = atoi(local_env);
#ifndef HWLOC_DEBUG
if (getenv("HWLOC_DEBUG_CHECK"))
#endif
hwloc_topology_check(topology);
topology->is_loaded = 1;
return 0;
}
int
hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags)
{
hwloc_bitmap_t droppedcpuset, droppednodeset;
/* make sure we'll keep something in the topology */
if (!hwloc_bitmap_intersects(cpuset, topology->levels[0][0]->cpuset)) {
errno = EINVAL;
return -1;
}
droppedcpuset = hwloc_bitmap_alloc();
droppednodeset = hwloc_bitmap_alloc();
/* drop object based on the reverse of cpuset, and fill the 'dropped' nodeset */
hwloc_bitmap_not(droppedcpuset, cpuset);
restrict_object(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset, 0 /* root cannot be removed */);
/* update nodesets according to dropped nodeset */
restrict_object_nodeset(topology, &topology->levels[0][0], droppednodeset);
hwloc_bitmap_free(droppedcpuset);
hwloc_bitmap_free(droppednodeset);
hwloc_connect_children(topology->levels[0][0]);
hwloc_connect_levels(topology);
propagate_total_memory(topology->levels[0][0]);
hwloc_restrict_distances(topology, flags);
hwloc_convert_distances_indexes_into_objects(topology);
hwloc_finalize_logical_distances(topology);
return 0;
}
int
hwloc_topology_is_thissystem(struct hwloc_topology *topology)
{
return topology->is_thissystem;
}
unsigned
hwloc_topology_get_depth(struct hwloc_topology *topology)
{
return topology->nb_levels;
}
/* check children between a parent object */
static void
hwloc__check_children(struct hwloc_obj *parent)
{
hwloc_bitmap_t remaining_parent_set;
unsigned j;
if (!parent->arity) {
/* check whether that parent has no children for real */
assert(!parent->children);
assert(!parent->first_child);
assert(!parent->last_child);
return;
}
/* check whether that parent has children for real */
assert(parent->children);
assert(parent->first_child);
assert(parent->last_child);
/* first child specific checks */
assert(parent->first_child->sibling_rank == 0);
assert(parent->first_child == parent->children[0]);
assert(parent->first_child->prev_sibling == NULL);
/* last child specific checks */
assert(parent->last_child->sibling_rank == parent->arity-1);
assert(parent->last_child == parent->children[parent->arity-1]);
assert(parent->last_child->next_sibling == NULL);
if (parent->cpuset) {
remaining_parent_set = hwloc_bitmap_dup(parent->cpuset);
for(j=0; j<parent->arity; j++) {
if (!parent->children[j]->cpuset)
continue;
/* check that child cpuset is included in the parent */
assert(hwloc_bitmap_isincluded(parent->children[j]->cpuset, remaining_parent_set));
#if !defined(NDEBUG)
/* check that children are correctly ordered (see below), empty ones may be anywhere */
if (!hwloc_bitmap_iszero(parent->children[j]->cpuset)) {
int firstchild = hwloc_bitmap_first(parent->children[j]->cpuset);
int firstparent = hwloc_bitmap_first(remaining_parent_set);
assert(firstchild == firstparent);
}
#endif
/* clear previously used parent cpuset bits so that we actually checked above
* that children cpusets do not intersect and are ordered properly
*/
hwloc_bitmap_andnot(remaining_parent_set, remaining_parent_set, parent->children[j]->cpuset);
}
assert(hwloc_bitmap_iszero(remaining_parent_set));
hwloc_bitmap_free(remaining_parent_set);
}
/* checks for all children */
for(j=1; j<parent->arity; j++) {
assert(parent->children[j]->sibling_rank == j);
assert(parent->children[j-1]->next_sibling == parent->children[j]);
assert(parent->children[j]->prev_sibling == parent->children[j-1]);
}
}
/* check a whole topology structure */
void
hwloc_topology_check(struct hwloc_topology *topology)
{
struct hwloc_obj *obj;
hwloc_obj_type_t type;
unsigned i, j, depth;
/* check type orders */
for (type = HWLOC_OBJ_SYSTEM; type < HWLOC_OBJ_TYPE_MAX; type++) {
assert(hwloc_get_order_type(hwloc_get_type_order(type)) == type);
}
for (i = hwloc_get_type_order(HWLOC_OBJ_SYSTEM);
i <= hwloc_get_type_order(HWLOC_OBJ_CORE); i++) {
assert(i == hwloc_get_type_order(hwloc_get_order_type(i)));
}
/* check that last level is PU */
assert(hwloc_get_depth_type(topology, hwloc_topology_get_depth(topology)-1) == HWLOC_OBJ_PU);
/* check that other levels are not PU */
for(i=1; i<hwloc_topology_get_depth(topology)-1; i++)
assert(hwloc_get_depth_type(topology, i) != HWLOC_OBJ_PU);
/* top-level specific checks */
assert(hwloc_get_nbobjs_by_depth(topology, 0) == 1);
obj = hwloc_get_root_obj(topology);
assert(obj);
depth = hwloc_topology_get_depth(topology);
/* check each level */
for(i=0; i<depth; i++) {
unsigned width = hwloc_get_nbobjs_by_depth(topology, i);
struct hwloc_obj *prev = NULL;
/* check each object of the level */
for(j=0; j<width; j++) {
obj = hwloc_get_obj_by_depth(topology, i, j);
/* check that the object is corrected placed horizontally and vertically */
assert(obj);
assert(obj->depth == i);
assert(obj->logical_index == j);
/* check that all objects in the level have the same type */
if (prev) {
assert(hwloc_type_cmp(obj, prev) == HWLOC_TYPE_EQUAL);
assert(prev->next_cousin == obj);
assert(obj->prev_cousin == prev);
}
if (obj->complete_cpuset) {
if (obj->cpuset)
assert(hwloc_bitmap_isincluded(obj->cpuset, obj->complete_cpuset));
if (obj->online_cpuset)
assert(hwloc_bitmap_isincluded(obj->online_cpuset, obj->complete_cpuset));
if (obj->allowed_cpuset)
assert(hwloc_bitmap_isincluded(obj->allowed_cpuset, obj->complete_cpuset));
}
if (obj->complete_nodeset) {
if (obj->nodeset)
assert(hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset));
if (obj->allowed_nodeset)
assert(hwloc_bitmap_isincluded(obj->allowed_nodeset, obj->complete_nodeset));
}
/* check children */
hwloc__check_children(obj);
prev = obj;
}
/* check first object of the level */
obj = hwloc_get_obj_by_depth(topology, i, 0);
assert(obj);
assert(!obj->prev_cousin);
/* check type */
assert(hwloc_get_depth_type(topology, i) == obj->type);
assert(i == (unsigned) hwloc_get_type_depth(topology, obj->type) ||
HWLOC_TYPE_DEPTH_MULTIPLE == hwloc_get_type_depth(topology, obj->type));
/* check last object of the level */
obj = hwloc_get_obj_by_depth(topology, i, width-1);
assert(obj);
assert(!obj->next_cousin);
/* check last+1 object of the level */
obj = hwloc_get_obj_by_depth(topology, i, width);
assert(!obj);
}
/* check bottom objects */
assert(hwloc_get_nbobjs_by_depth(topology, depth-1) > 0);
for(j=0; j<hwloc_get_nbobjs_by_depth(topology, depth-1); j++) {
obj = hwloc_get_obj_by_depth(topology, depth-1, j);
assert(obj);
/* bottom-level object must always be PU */
assert(obj->type == HWLOC_OBJ_PU);
}
}
const struct hwloc_topology_support *
hwloc_topology_get_support(struct hwloc_topology * topology)
{
return &topology->support;
}
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 INRIA. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/misc.h>
#include <private/debug.h>
int
hwloc_get_type_depth (struct hwloc_topology *topology, hwloc_obj_type_t type)
{
return topology->type_depth[type];
}
hwloc_obj_type_t
hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth)
{
if (depth >= topology->nb_levels)
return (hwloc_obj_type_t) -1;
return topology->levels[depth][0]->type;
}
unsigned
hwloc_get_nbobjs_by_depth (struct hwloc_topology *topology, unsigned depth)
{
if (depth >= topology->nb_levels)
return 0;
return topology->level_nbobjects[depth];
}
struct hwloc_obj *
hwloc_get_obj_by_depth (struct hwloc_topology *topology, unsigned depth, unsigned idx)
{
if (depth >= topology->nb_levels)
return NULL;
if (idx >= topology->level_nbobjects[depth])
return NULL;
return topology->levels[depth][idx];
}
unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max)
{
struct hwloc_obj *parent, *nextparent, **src_objs;
int i,src_nbobjects;
unsigned stored = 0;
if (!src->cpuset)
return 0;
src_nbobjects = topology->level_nbobjects[src->depth];
src_objs = topology->levels[src->depth];
parent = src;
while (stored < max) {
while (1) {
nextparent = parent->parent;
if (!nextparent)
goto out;
if (!nextparent->cpuset || !hwloc_bitmap_isequal(parent->cpuset, nextparent->cpuset))
break;
parent = nextparent;
}
if (!nextparent->cpuset)
break;
/* traverse src's objects and find those that are in nextparent and were not in parent */
for(i=0; i<src_nbobjects; i++) {
if (hwloc_bitmap_isincluded(src_objs[i]->cpuset, nextparent->cpuset)
&& !hwloc_bitmap_isincluded(src_objs[i]->cpuset, parent->cpuset)) {
objs[stored++] = src_objs[i];
if (stored == max)
goto out;
}
}
parent = nextparent;
}
out:
return stored;
}
static int
hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set,
struct hwloc_obj ***res, int *max)
{
int gotten = 0;
unsigned i;
/* the caller must ensure this */
if (*max <= 0)
return 0;
if (hwloc_bitmap_isequal(current->cpuset, set)) {
**res = current;
(*res)++;
(*max)--;
return 1;
}
for (i=0; i<current->arity; i++) {
hwloc_bitmap_t subset = hwloc_bitmap_dup(set);
int ret;
/* split out the cpuset part corresponding to this child and see if there's anything to do */
if (current->children[i]->cpuset) {
hwloc_bitmap_and(subset, subset, current->children[i]->cpuset);
if (hwloc_bitmap_iszero(subset)) {
hwloc_bitmap_free(subset);
continue;
}
}
ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max);
gotten += ret;
hwloc_bitmap_free(subset);
/* if no more room to store remaining objects, return what we got so far */
if (!*max)
break;
}
return gotten;
}
int
hwloc_get_largest_objs_inside_cpuset (struct hwloc_topology *topology, hwloc_const_bitmap_t set,
struct hwloc_obj **objs, int max)
{
struct hwloc_obj *current = topology->levels[0][0];
if (!current->cpuset || !hwloc_bitmap_isincluded(set, current->cpuset))
return -1;
if (max <= 0)
return 0;
return hwloc__get_largest_objs_inside_cpuset (current, set, &objs, &max);
}
const char *
hwloc_obj_type_string (hwloc_obj_type_t obj)
{
switch (obj)
{
case HWLOC_OBJ_SYSTEM: return "System";
case HWLOC_OBJ_MACHINE: return "Machine";
case HWLOC_OBJ_MISC: return "Misc";
case HWLOC_OBJ_GROUP: return "Group";
case HWLOC_OBJ_NODE: return "NUMANode";
case HWLOC_OBJ_SOCKET: return "Socket";
case HWLOC_OBJ_CACHE: return "Cache";
case HWLOC_OBJ_CORE: return "Core";
case HWLOC_OBJ_PU: return "PU";
default: return "Unknown";
}
}
hwloc_obj_type_t
hwloc_obj_type_of_string (const char * string)
{
if (!strcasecmp(string, "System")) return HWLOC_OBJ_SYSTEM;
if (!strcasecmp(string, "Machine")) return HWLOC_OBJ_MACHINE;
if (!strcasecmp(string, "Misc")) return HWLOC_OBJ_MISC;
if (!strcasecmp(string, "Group")) return HWLOC_OBJ_GROUP;
if (!strcasecmp(string, "NUMANode") || !strcasecmp(string, "Node")) return HWLOC_OBJ_NODE;
if (!strcasecmp(string, "Socket")) return HWLOC_OBJ_SOCKET;
if (!strcasecmp(string, "Cache")) return HWLOC_OBJ_CACHE;
if (!strcasecmp(string, "Core")) return HWLOC_OBJ_CORE;
if (!strcasecmp(string, "PU") || !strcasecmp(string, "proc") /* backward compatiliby with 0.9 */) return HWLOC_OBJ_PU;
return (hwloc_obj_type_t) -1;
}
#define hwloc_memory_size_printf_value(_size, _verbose) \
((_size) < (10ULL<<20) || _verbose ? (((_size)>>9)+1)>>1 : (_size) < (10ULL<<30) ? (((_size)>>19)+1)>>1 : (((_size)>>29)+1)>>1)
#define hwloc_memory_size_printf_unit(_size, _verbose) \
((_size) < (10ULL<<20) || _verbose ? "KB" : (_size) < (10ULL<<30) ? "MB" : "GB")
int
hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, int verbose)
{
hwloc_obj_type_t type = obj->type;
switch (type) {
case HWLOC_OBJ_MISC:
case HWLOC_OBJ_SYSTEM:
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_SOCKET:
case HWLOC_OBJ_CORE:
case HWLOC_OBJ_PU:
return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type));
case HWLOC_OBJ_CACHE:
return hwloc_snprintf(string, size, "L%u%s", obj->attr->cache.depth, verbose ? hwloc_obj_type_string(type): "");
case HWLOC_OBJ_GROUP:
/* TODO: more pretty presentation? */
return hwloc_snprintf(string, size, "%s%u", hwloc_obj_type_string(type), obj->attr->group.depth);
default:
if (size > 0)
*string = '\0';
return 0;
}
}
int
hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * separator, int verbose)
{
const char *prefix = "";
char *tmp = string;
ssize_t tmplen = size;
int ret = 0;
int res;
/* make sure we output at least an empty string */
if (size)
*string = '\0';
/* print memory attributes */
res = 0;
if (verbose) {
if (obj->memory.local_memory)
res = hwloc_snprintf(tmp, tmplen, "%slocal=%lu%s%stotal=%lu%s",
prefix,
(unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, verbose),
hwloc_memory_size_printf_unit(obj->memory.total_memory, verbose),
separator,
(unsigned long) hwloc_memory_size_printf_value(obj->memory.local_memory, verbose),
hwloc_memory_size_printf_unit(obj->memory.local_memory, verbose));
else if (obj->memory.total_memory)
res = hwloc_snprintf(tmp, tmplen, "%stotal=%lu%s",
prefix,
(unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, verbose),
hwloc_memory_size_printf_unit(obj->memory.total_memory, verbose));
} else {
if (obj->memory.total_memory)
res = hwloc_snprintf(tmp, tmplen, "%s%lu%s",
prefix,
(unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, verbose),
hwloc_memory_size_printf_unit(obj->memory.total_memory, verbose));
}
if (res < 0)
return -1;
ret += res;
if (ret > 0)
prefix = separator;
if (res >= tmplen)
res = tmplen>0 ? tmplen - 1 : 0;
tmp += res;
tmplen -= res;
/* printf type-specific attributes */
res = 0;
switch (obj->type) {
case HWLOC_OBJ_CACHE:
if (verbose)
res = hwloc_snprintf(tmp, tmplen, "%s%lu%s%sline=%u",
prefix,
(unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose),
hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose),
separator, obj->attr->cache.linesize);
else
res = hwloc_snprintf(tmp, tmplen, "%s%lu%s",
prefix,
(unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose),
hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose));
break;
default:
break;
}
if (res < 0)
return -1;
ret += res;
if (ret > 0)
prefix = separator;
if (res >= tmplen)
res = tmplen>0 ? tmplen - 1 : 0;
tmp += res;
tmplen -= res;
/* printf infos */
if (verbose) {
unsigned i;
for(i=0; i<obj->infos_count; i++) {
if (strchr(obj->infos[i].value, ' '))
res = hwloc_snprintf(tmp, tmplen, "%s%s=\"%s\"",
prefix,
obj->infos[i].name, obj->infos[i].value);
else
res = hwloc_snprintf(tmp, tmplen, "%s%s=%s",
prefix,
obj->infos[i].name, obj->infos[i].value);
if (res < 0)
return -1;
ret += res;
if (res >= tmplen)
res = tmplen>0 ? tmplen - 1 : 0;
tmp += res;
tmplen -= res;
if (ret > 0)
prefix = separator;
}
}
return ret;
}
int
hwloc_obj_snprintf(char *string, size_t size,
struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *l, const char *_indexprefix, int verbose)
{
const char *indexprefix = _indexprefix ? _indexprefix : "#";
char os_index[12] = "";
char type[64];
char attr[128];
int attrlen;
if (l->os_index != (unsigned) -1) {
hwloc_snprintf(os_index, 12, "%s%u", indexprefix, l->os_index);
}
hwloc_obj_type_snprintf(type, sizeof(type), l, verbose);
attrlen = hwloc_obj_attr_snprintf(attr, sizeof(attr), l, " ", verbose);
if (attrlen > 0)
return hwloc_snprintf(string, size, "%s%s(%s)", type, os_index, attr);
else
return hwloc_snprintf(string, size, "%s%s", type, os_index);
}
int hwloc_obj_cpuset_snprintf(char *str, size_t size, size_t nobj, struct hwloc_obj * const *objs)
{
hwloc_bitmap_t set = hwloc_bitmap_alloc();
int res;
unsigned i;
hwloc_bitmap_zero(set);
for(i=0; i<nobj; i++)
if (objs[i]->cpuset)
hwloc_bitmap_or(set, set, objs[i]->cpuset);
res = hwloc_bitmap_snprintf(str, size, set);
hwloc_bitmap_free(set);
return res;
}
PLPA Authors
============
The IDs in parenthesis are those used in Subversion commit notices.
Current Authors
---------------
Indiana University:
- Jeff Squyres (jsquyres)
Lawrence Berkeley National Lab:
- Paul Hargrove (phargrov)
Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
University Research and Technology
Corporation. All rights reserved.
Copyright (c) 2004-2005 The Regents of the University of California.
All rights reserved.
Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
Portions copyright:
Copyright (c) 2004-2005 The University of Tennessee and The University
of Tennessee Research Foundation. All rights
reserved.
Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
University of Stuttgart. All rights reserved.
Copyright (c) 2006, 2007 Advanced Micro Devices, Inc.
All rights reserved.
$COPYRIGHT$
Additional copyrights may follow
$HEADER$
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer listed
in this license in the documentation and/or other materials
provided with the distribution.
- Neither the name of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
The copyright holders provide no reassurances that the source code
provided does not infringe any patent, copyright, or any other
intellectual property rights of third parties. The copyright holders
disclaim any liability to any recipient for claims brought against
recipient by any third party for infringement of that parties
intellectual property rights.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
SUBDIRS = src
DIST_SUBDIRS = $(SUBDIRS)
EXTRA_DIST = README VERSION LICENSE AUTHORS
Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
University Research and Technology
Corporation. All rights reserved.
Copyright (c) 2004-2005 The Regents of the University of California.
All rights reserved.
Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
$COPYRIGHT$
Additional copyrights may follow
$HEADER$
===========================================================================
This file contains the main features as well as overviews of specific
bug fixes (and other actions) for each version of PLPA since
version 1.0.
1.3.2
-----
- Removed a clause from AMD's license notice in plpa_map.c by
direction from Barry S. Newberger, Ph.D., Assistant General Counsel,
Advanced Micro Devices, Inc. per a Red Hat licensing concern
(initiated at https://bugzilla.redhat.com/show_bug.cgi?id=530230).
Removing this clause alleviates Red Hat's concern, even though this
resolution not shown on the above-cited bugzilla ticket.
- Julian Seward noticed that PLPA's use of the Valgrind API in the
affinity API probe was incorrect and suggested an improvement. Even
though we're ramping down PLPA development, I couldn't really ignore
the Valgrind author's suggestion!
1.3.1
-----
- Fixed bug identified by Eugene Loh at Sun; plpa-taskset -c ranges
were accidentally excluding the processor ID at the top of the
range.
- For standalone PLPA builds, the .so library is now versioned
according to the GNU Libtool guidelines.
1.3
---
- Added --with-valgrind support that allows building support for
Valgrind into PLPA (i.e., tell Valgrind to not warn about things
that we know are ok).
1.2
---
- Allow mapping from simple/contiguous processor, socket, and core
numbers to the corresponding back-end Linux ID (and clean up
documentation references between "X number" and "X ID", where X is
one of processor, socket, core).
- Support CPU hot plugging.
- Add explicit control over the PLPA topology information cache.
1.1.1
-----
- Ensure that --enable-included also disables executables by default.
Thanks to Brian Curtis for identifying the problem and supplying a
patch.
- Add ENABLE_EXECUTABLES m4 macro to allow behavior in m4
configuration analogous to "--enable-included --enable-executables".
1.1
---
- New command: plpa-taskset. It is intended as a drop-in replacement
for the "taskset" command, except it also understands sockets and
cores. See "plpa-taskset --help" for more details
- Renamed "plpa_info" to "plpa-info".
- Added PLPA_{MAJOR|MINOR|RELEASE}_VERSION integer macros in plpa.h.
This release, they have the values of 1, 1, and 0, respectively.
- Add new API functions to map from (socket,core) back and forth from
the Linux virtual processor ID. Thanks to AMD for the initial code
contribution that made this possible. See the documentation for
plpa_map_to_processor_id() as a starting point for more
information.
- Various fixes to "included" mode.
1.0.5
-----
- Fix an issue where the PLPA get/set affinity functions would only
operate on the current process; the PID argument to these functions
was effectively ignored.
1.0.4
-----
- Fix some 64 bit issues. Thanks to David Creasy for reporting the
problems.
- Fix plpa.h to be C++-friendly. Thanks to Emmanuel Paris for
pointing out this problem.
1.0.3
-----
- Resolve some sizing / units ambiguities that mistakenly did not make
it into 1.0.1. Thanks to Bert Wesarg for pointing these problems out.
1.0.2
-----
- Ensure that plpa_sched_[set|get]affinity() returns 0 upon success.
Thanks to David Creasy for bringing this to our attention.
1.0.1
-----
- Specify that cpusetsize should be in units of bytes and add some
missing "* 8"'s in plpa_dispatch.c.
1.0
---
- Initial release.
0.9a2
-----
- Change the back-end type of the plpa_bitmask_t to unsigned long
instead of unsigned char to avoid an endian problem.
- No longer attempt to set the affinity in plpa_api_probe() to avoid a
possible race condition (setting it after getting it).
- Fix PLPA_CPU_ZERO macro. Thanks to Bogdan Costescu for pointing
this out.
0.9a1
-----
Initial public release.
Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
University Research and Technology
Corporation. All rights reserved.
Copyright (c) 2004-2005 The Regents of the University of California.
All rights reserved.
Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
$COPYRIGHT$
See LICENSE file for a rollup of all copyright notices.
$HEADER$
===========================================================================
This is the Portable Linux Processor Affinity (PLPA) package
(pronounced "pli-pa"). The PLPA has evolved over time to provide the
following capabilities:
1. Provide a stable API on Linux for processor affinity (Linux has
provided three different API signatures over time).
2. Provide a simple API that translates between Linux processor ID and
(socket ID, core ID) tuples, and allows querying processor topology
information on supported platforms.
3. Provide a command-line executable (plpa-taskset(1)) that provides
all the same functionality as the venerable taskset(1) command, and
several extensions, including the ability to bind processes to
specific (socket, core) tuples on supported platforms.
Note that the PLPA is fully embeddable, meaning that it can be wholly
contained in larger software packages that wish to have a single,
stable version of processor affinity API functionality. See below for
more details on embedding.
Also note that PLPA's socket/core and other topology information is
only available on certain platforms. Specifically, PLPA reads the
/sys filesystem to glean its information; if your system does not
export processor topology information through /sys, the PLPA cannot
provide that information. For example, AMD/Intel processor topology
support was included in Linux kernel v2.6.16, but POWER processor
topology information is not yet supported as of Linux kernel v2.6.26.
In a world where the processor counts in hosts are [again] increasing,
particularly where at least some of them are NUMA-based architectures,
processor affinity is becoming more important. We hope that the PLPA
is helpful to you. Enjoy.
Note that if you're looking into processor affinity, and if you're on
a NUMA machine, you probably also want to look into libnuma:
ftp://ftp.suse.com/pub/people/ak/numa/
If you are a developer, keep reading. If you are a system
administrator or other end-user, you're probably more interested in
using the plpa-info(1) and plpa-taskset(1) executable commands; see
the output of "plpa-info" and "plpa-taskset --help" for more
information.
===========================================================================
The following text is specific technical information about the
original problem that PLPA Was created to solve.
The original intent for the PLPA was for developers who wished to use
Linux processor affinity via the sched_setaffinity() and
sched_getaffinity() library calls, but don't want to wade through the
morass of 3 different APIs that have been offered through the life of
these calls in various Linux distributions and glibc versions.
Specifically, to compile for any given Linux system, you need some
complex compile-time tests to figure out which of the 3 APIs to use.
And if you want your application to be binary portable across
different Linux distributions, more complex run-time tests (and horrid
compile-time trickery) are required to figure out which API the system
you are running on uses.
These problems all stem from the fact that the same 2 symbols have had
three different APIs (with different numbers and types of
parameters) throughout their life in Linux. Ick.
The PLPA is an attempt to solve this problem by providing a single API
that developers can write to. It provides three things:
1. A single API that developers can write to, regardless of what
back-end API the system you are compiling on has.
2. A run-time test and dispatch that will invoke the Right back-end
API depending on what back-end API the system you are running on
has.
3. Mapping information between (socket ID, core ID) tuples and Linux
virtual processor IDs.
===========================================================================
What, exactly, is the problem? History.
----------------------------------------
There are at least 3 different ways that sched_setaffinity is
implemented in glibc (only one of which is documented in the
sched_setaffinity(2) man page), and some corresponding changes
to what the kernel considers to be valid arguments:
1. int sched_setaffinity(pid_t pid, unsigned int len, unsigned
long *mask);
This originated in the time period of 2.5 kernels and some distros
back-ported it to their 2.4 kernels and libraries. It's unknown if
this version was ever packaged with any 2.6 kernels.
2. int sched_setaffinity (pid_t __pid, size_t __cpusetsize,
const cpu_set_t *__cpuset);
This appears to be in recent distros using 2.6 kernels. We don't
know exactly when #1 changed into #2. However, this prototype is nice
because the cpu_set_t type is accompanied by fdset-like CPU_ZERO(),
CPU_SET(), CPU_ISSET(), etc. macros.
3. int sched_setaffinity (pid_t __pid, const cpu_set_t *__mask);
(note the missing len parameter) This is in at least some Linux
distros (e.g., MDK 10.0 with a 2.6.3 kernel, and SGI Altix, even
though the Altix uses a 2.4-based kernel and therefore likely
back-ported the 2.5 work or originated it in the first place).
Similar to #2, the cpu_set_t type is accompanied by fdset-like
CPU_ZERO(), CPU_SET(), CPU_ISSET(), etc. macros.
But wait, it gets worse.
Remember that getting/setting processor affinity has to involve the
kernel. The sched_[sg]etaffinity() glibc functions typically do a
little error checking and then make a syscall down into the kernel to
actually do the work. There are multiple possibilities for problems
here as the amount of checking has changed:
1. The glibc may support the affinity functions, but the kernel may
not (and vice versa).
This is typically only an issue with slightly older Linux distributions.
Mandrake 9.2 is an example of this. PLPA can detect this at run-time
and turn its internal functions into no-ops and return appropriate error
codes (ENOSYS).
2. The glibc affinity functions may be buggy (i.e., they pass bad data
down to the syscall).
This is fortunately restricted to some older versions of glibc, and
is relatively easy to check for at run-time. PLPA reliably detects
this situation at run-time and returns appropriate error codes
(ENOSYS).
The original SuSE 9.1 version seems to have this problem, but it was
fixed it somewhere in the SuSE patching history (it is unknown exactly
when). Specifically, updating to the latest SuSE 9.1 patch level
(as of Dec 2005) seems to fix the problem.
3. The CPU_* macros for manipulating cpu_set_t bitmasks may not
compile because of typo bugs in system header files.
PLPA avoids this problem by providing its own PLPA_CPU_* macros for
manipulating CPU bitmasks. See "How do I use PLPA?", below, for
more details.
The PLPA avoids all the glibc issues by using syscall() to directly
access the kernel set and get affinity functions. This is described
below.
===========================================================================
How does PLPA work?
-------------------
Jeff Squyres initially sent a mail to the Open MPI developer's mailing
list explaining the Linux processor affinity problems and asking for
help coming up with a solution (particularly for binary
compatibility):
http://www.open-mpi.org/community/lists/devel/2005/11/0558.php
Discussion on that thread and others eventually resulted in the
run-time tests that form the heart of the PLPA. Many thanks to Paul
Hargrove and Bogdan Costescu for their time and effort to get these
tests right.
PLPA was written so that other developers who want to use processor
affinity in Linux don't have to go through this mess. The PLPA
provides a single interface that can be used on any platform,
regardless of which back-end API variant it has. This includes both
the sched_setaffinity() and sched_getaffinity() calls as well as the
CPU_*() macros.
The PLPA avoids glibc altogether -- although tests were developed that
could *usually* figure out which glibc variant to use at run time,
there were still some cases where it was either impossible to
determine or the glibc interface itself was buggy. Hence, it was
decided that a simpler approach was simply to use syscall() to invoke
the back-end kernel functions directly.
The kernel functions have gone through a few changes as well, so the
PLPA does a few run-time tests to determine which variant to use
before actually invoking the back-end functions with the
user-specified arguments.
NOTE: The run-time tests that the PLPA performs involve getting the
current affinity for the process in question and then attempting to
set them back to the same value. By definition, this introduces a
race condition (there is no atomic get-and-set functionality for
processor affinity). The PLPA cannot guarantee consistent results if
multiple entities (such as multiple threads or multiple processes) are
setting the affinity for a process at the same time. In a worst case
scenario, the PLPA may actually determine that it cannot determine the
kernel variant at run time if another entity modifies a process'
affinity while PLPA is executing its run-time tests.
===========================================================================
Does PLPA make truly portable binaries?
---------------------------------------
As much as Linux binaries are portable, yes. That is, if you have
within your power to make a binary that is runnable on several
different Linux distributions/versions/etc., then you may run into
problems with the Linux processor affinity functions. PLPA attempts
to solve this problem for you by *also* making the Linux processor
affinity calls be binary portable.
Hence, you need to start with something that is already binary
portable (perhaps linking everything statically) -- then PLPA will be
of help to you. Do not fall into the misconception that PLPA will
magically make your executable be binary portable between different
Linux variants.
===========================================================================
How do I use PLPA?
------------------
There are three main uses of the PLPA:
1. Using the plpa-info(1) executable to check if your system supports
processor affinity and the PLPA can determine which to use at
run-time.
2. Developers using the PLPA library both to enable source and binary
Linux processor affinity portability, and to write
processor-topology-aware applications.
3. Using the plpa-taskset(1) executable to bind arbitrary executables
to Linux virtual processor IDs and/or specific socket/core tuples.
In more detail:
1. The plpa-info(1) executable is a few simple calls into the PLPA
library that checks which API variant the system it is running on
has. If the kernel supports processor affinity and the PLPA is
able to figure out which API variant to use, it prints "Kernel
affinity support: no". Other responses indicate an error. The
"--topo" switch will print out basic topology information about
your system, if supported.
Since the PLPA library abstracts this kind of problem away, this is
more a diagnostic tool than anything else.
See "plpa-info --help" for more information. A man page does not
yet exist, unfortunately.
Note that plpa-info is *only* compiled and installed if PLPA is
installed as a standalone package (see below).
2. Developers can use this package by including the <plpa.h> header
file and using the following prototypes for setting and getting
processor affinity:
int plpa_sched_setaffinity(pid_t pid, size_t cpusetsize,
const plpa_cpu_set_t *cpuset);
int plpa_sched_getaffinity(pid_t pid, size_t cpusetsize,
const plpa_cpu_set_t *cpuset)
These functions perform run-time tests to determine which back-end
API variant exists on the system and then dispatch to it correctly.
The units of cpusetsize is number of bytes. This should normally
just be sizeof(*cpuset), but is made available as a parameter to
allow for future expansion of the PLPA (stay tuned).
The observant reader will notice that this is remarkably similar to
the one of the Linux API's (the function names are different and
the CPU set type is different). PLPA also provides several macros
for manipulating the plpa_cpu_set_t bitmask, quite similar to FDSET
macros (see "What, Exactly, Is the Problem?" above for a
description of problems with the native CPU_* macros):
- PLPA_CPU_ZERO(&cpuset): Sets all bits in a plpa_cpu_set_t to
zero.
- PLPA_CPU_SET(num, &cpuset): Sets bit <num> of <cpuset> to one.
- PLPA_CPU_CLR(num, &cpuset): Sets bit <num> of <cpuset> to zero.
- PLPA_CPU_ISSET(num, &cpuset): Returns one if bit <num> of
<cpuset> is one; returns zero otherwise.
Note that all four macros take a *pointer* to a plpa_cpu_set_t, as
denoted by "&cpuset" in the descriptions above.
Also note that he PLPA distinguishes between Linux processor,
socket, and core IDs and processor, socket, and core numbers. The
*Linux IDs* are kernel-assigned integer values that do not
necessarily start with zero and are not necessarily contiguous.
The *numbers* start with 0 and are contiguous to (N-1). The
numbers are therefore mainly a human convenience; they may or may
not exactly correspond to the Linux IDs; it is safest to assume
that they do not.
The following API functions are also available on supported
platforms with kernels that support topology information (e.g.,
AMD/Intel platforms with Linux kernel v2.6.16 or later). The list
below is a summary only; see plpa.h for a specific list of function
signatures:
- plpa_have_topology_information()
Will return 1 if the PLPA is able to provide topology
information, 0 otherwise. If 0 is returned, all the functions
below will return a negative value to signify a graceful failure.
- plpa_map_to_processor_id()
Take a (socket ID, core ID) tuple and map it to a Linux processor
ID
- plpa_map_to_socket_core()
Take a Linux processor ID and map it to a (socket ID, core ID)
tuple
- plpa_get_processor_info()
Return the number of processors and the max Linux processor ID
- plpa_get_processor_id()
Return the Linux processor ID for the Nth processor (starting
with 0)
- plpa_get_processor_flags()
Return whether a Linux processor ID exists, and if so, if it is
online
- plpa_get_socket_info()
Return the number of sockets and the max Linux socket ID
- plpa_get_socket_id()
Return the Linux socket ID for the Nth socket (starting with 0)
- plpa_get_core_info()
For a given socket ID, return the number of cores and the max
Linux core ID
- plpa_get_core_id()
For a given socket ID, return the Linux core ID of the Nth core
(starting with 0)
- plpa_get_core_flags()
Return whether a (socket ID,core ID) tuple exists, and if so, if
it is online
- plpa_set_cache_behavior()
Tell PLPA to use (or not) a local cache for the topology
information, or to refresh the cache right now
- plpa_finalize()
Release all internal resources allocated and maintained by the
PLPA. It is permissible to invoke other PLPA functions after
plpa_finalize(), but if you want to release PLPA's resources, you
will need to invoke plpa_finalize() again. Note that it is not
necessary (but harmless) to invoke plpa_finalize() on systems
where plpa_have_topology_information() returns that the topology
information is not supported.
*** NOTE: Topology information (i.e., (socket ID, core ID) tuples)
may not be reported for offline processors. Hence, if any
processors are offline, the socket/core values returned by PLPA
will likely change once the processor is brought back online.
Sorry; this is how the Linux kernel works -- there's nothing
PLPA can do about it.
The above functions are slightly more documented in plpa.h.
Contributions of real man pages would be greatly appreciated.
3. The plpa-taskset(1) executable represents an evolution of the
venerable "taskset(1)" command. It allows binding of arbitrary
processes to specific Linux processor IDs and/or specific (socket
ID, core ID) tuples. It supports all the same command line syntax
of the taskset(1) command, but also supports additional syntax for
specifying socket and core IDs. Hence, you can launch
processor-bound jobs without needing to modify their source code to
call the PLPA library. See "plpa-taskset --help" for more
information on the command line options available, and brief
examples of usage. A man page does not yet exist, unfortunately.
===========================================================================
How do I compile / install the PLPA as a standalone package?
------------------------------------------------------------
The PLPA uses the standard GNU Autoconf/Automake/Libtool toolset to
build and install itself. This means that generally, the following
works:
shell$ ./configure --prefix=/where/you/want/to/install
[...lots of output...]
shell$ make all
[...lots of output...]
shell$ make install
Depending on your --prefix, you may need to run the "make install"
step as root or some other privileged user.
There are a few noteworthy configure options listed below. The
enable/disable options are shown in their non-default form. For
example, if --enable-foo is shown below, it is because --disable-foo
is the default.
--enable-emulate: allow using PLPA on platforms that do not have
__NR_sched_setaffinity (e.g., OS X); usually only useful in
development / testing scenarios.
--disable-executables: do not build the PLPA executables; only build
the library.
--enable-included-mode: build PLPA in the "included" mode (see
below).
--enable-debug: this option is probably only helpful for PLPA
developers.
--with-plpa-symbol-prefix=STRING: a string prefix to add to all public
PLPA symbols. This is usually only useful in included mode (see
below).
--with-valgrind(=DIR): require building PLPA with Valgrind support
(requires finding include/valgrind/memcheck.h). This will add a
small number of Valgrind annotations in the PLPA code base that
remove false/irrelevant Valgrind warnings. The =DIR clause is only
necessary if Valgrind's header files cannot be found by the
preprocessor's default search path.
"make install" will install the following:
- <plpa.h> in $includedir (typically $prefix/include)
- libplpa.la and libplpa.a and/or libplpa.so in $libdir (typically
$prefix/lib)
- plpa-info(1) executable in $bindir (typically $prefix/bin)
- plpa-taskset(1) executable in $bindir (typically $prefix/bin)
Note that since PLPA builds itself with GNU Libtool, it can be built
as a static or shared library (or both). The default is to build a
shared library. You can enable building a static library by supplying
the "--enable-static" argument to configure; you can disable building
the shared library by supplying the "--disable-shared" argument to
configure. "make install" will install whichever library was built
(or both).
"make uninstall" will fully uninstall PLPA from the prefix directory
(again, depending in filesystem permissions, you may need to run this
as root or some privileged user).
===========================================================================
How do I include/embed PLPA in my software package?
---------------------------------------------------
It can be desirable to include PLPA in a larger software package
(be sure to check out the LICENSE file) so that users don't have to
separately download and install it before installing your software
(after all, PLPA is a tiny little project -- why make users bother
with it?).
When used in "included" mode, PLPA will:
- not install any header files
- not build or install any executables
- not build libplpa.* -- instead, it will build libplpa_included.*
There are two ways to put PLPA into "included" mode. From the
configure command line:
shell$ ./configure --enable-included-mode ...
Or by directly integrating PLPA's m4 configure macro in your configure
script and invoking a specific macro to enable the included mode.
Every project is different, and there are many different ways of
integrating PLPA into yours. What follows is *one* example of how to
do it.
Copy the PLPA directory in your source tree and include the plpa.m4
file in your configure script -- perhaps with the following line in
acinclude.m4 (assuming the use of Automake):
m4_include(path/to/plpa.m4)
The following macros can then be used from your configure script (only
PLPA_INIT *must* be invoked if using the m4 macros):
- PLPA_STANDALONE
Force the building of PLPA in standalone mode. Overrides the
--enable-included-mode command line switch.
- PLPA_INCLUDED
Force the building of PLPA in included mode.
- PLPA_SET_SYMBOL_PREFIX(foo)
Tells the PLPA to prefix all types and public symbols with "foo"
instead of "plpa_". This is recommended behavior if you are
including PLPA in a larger project -- it is possible that your
software will be combined with other software that also includes
PLPA. If you both use different symbol prefixes, there will be no
type/symbol clashes, and everything will compile and link
successfully. If you both include PLPA and do not change the symbol
prefix, it is likely that you will get multiple symbol definitions
when linking if an external PLPA is linked against your library /
application. Note that the PLPA_CPU_*() macros are *NOT* prefixed
(because they are only used when compiling and therefore present no
link/run-time conflicts), but all other types, enum values, and
symbols are. Enum values are prefixed with an upper-case
translation if the prefix supplied. For example,
PLPA_SET_SYMBOL_PREFIX(foo_) will result in foo_init() and
FOO_PROBE_OK. Tip: It might be good to include "plpa" in the
prefix, just for clarity.
- PLPA_DISABLE_EXECUTABLES
Provides the same result as the --disable-executables configure
flag, and is implicit in included mode.
- PLPA_ENABLE_EXECUTABLES
Provides the same result as the --enable-executables configure flag.
If used in conjunction with PLPA_INCLUDED, it must be specified
*after* PLPA_INLCLUDED to have effect, as PLPA_INCLUDED *disables*
executables.
- PLPA_INIT(config-prefix, action-upon-success, action-upon-failure)
Invoke the PLPA tests and setup the PLPA to build. A traversal of
"make" into the PLPA directory should build everything (it is safe
to list the PLPA directory in the SUBDIRS of a higher-level
Makefile.am, for example). ***PLPA_INIT must be invoked after the
STANDALONE, INCLUDED, SET_SYMBOL_PREFIX, DISABLE_EXECUTABLES, and
ENABLE_EXECUTABLES macros.*** The first argument is the prefix to
use for AC_OUTPUT files. Hence, if your embedded PLPA is located in
the source tree at contrib/plpa, you should pass [contrib/plpa] as
the first argument.
- PLPA_DO_AM_CONDITIONALS
If you embed PLPA in a larger project and build it conditionally
(e.g., if PLPA_INIT is in a conditional), you must unconditionally
invoke PLPA_DO_AM_CONDITIONALS to avoid warnings from Automake (for
the cases where PLPA is not selected to be built). This macro is
necessary because PLPA uses some AM_CONDITIONALs to build itself;
AM_CONDITIONALs cannot be defined conditionally. It is safe (but
unnecessary) to call PLPA_DO_AM_CONDITIONALS even if PLPA_INIT is
invoked unconditionally.
Here's an example of integrating with a larger project named sandbox:
----------
shell$ cd sandbox
shell$ cp -r /somewhere/else/plpa-<version> plpa
shell$ edit acinclude.m4
...add the line "m4_include(plpa/config/plpa.m4)"...
shell$ edit Makefile.am
...add "plpa" to SUBDIRS...
...add "$(top_builddir)/plpa/src/libplpa/libplpa_included.la" to
my executable's LDADD line...
...add "-I$(top_builddir)/plpa/src/libplpa" to AM_CPPFLAGS
shell$ edit configure.ac
...add "PLPA_INCLUDED" line...
...add "PLPA_SET_SYMBOL_PREFIX(sandbox_plpa_)" line...
...add "PLPA_INIT([./plpa], [plpa_happy=yes], [plpa_happy=no])" line...
...add error checking for plpa_happy=no case...
shell$ edit src/my_program.c
...add #include <plpa.h>...
...add calls to sandbox_plpa_sched_setaffinity()...
shell$ aclocal
shell$ autoconf
shell$ libtoolize --automake
shell$ automake -a
shell$ ./configure
...lots of output...
shell$ make
...lots of output...
----------
===========================================================================
How can I tell if PLPA is working?
----------------------------------
Run plpa-info; if it says "Kernel affinity support: yes", then PLPA is
working properly.
If you want to compile your own test program to verify it, try
compiling and running the following:
---------------------------------------------------------------------------
#include <stdio.h>
#include <plpa.h>
int main(int argc, char* argv[]) {
plpa_api_type_t p;
if (0 == plpa_api_probe(&p) && PLPA_PROBE_OK == p) {
printf("All is good!\n");
} else {
printf("Looks like PLPA is not working\n");
}
return 0;
}
---------------------------------------------------------------------------
You may need to supply appropriate -I and -L arguments to the
compiler/linker, respectively, to tell it where to find the PLPA
header and library files. Also don't forget to supply -lplpa to link
in the PLPA library itself. For example, if you configured PLPA with:
shell$ ./configure --prefix=$HOME/my-plpa-install
Then you would compile the above program with:
shell$ gcc my-plpa-test.c \
-I$HOME/my-plpa-install/include \
-L$HOME/my-plpa-install/lib -lplpa \
-o my-plpa-test
shell$ ./my-plpa-test
If it compiles, links, runs, and prints "All is good!", then all
should be well.
===========================================================================
What license does PLPA use?
---------------------------
This package is distributed under the BSD license (see the LICENSE
file in the top-level directory of a PLPA distribution). The
copyrights of several institutions appear throughout the code base
because some of the code was directly derived from the Open MPI
project (http://www.open-mpi.org/), which is also distributed under
the BSD license.
===========================================================================
How do I get involved in PLPA?
------------------------------
The PLPA continues to evolve, particularly as core counts increase and
internal host topology becomes more important. We want to hear your
opinions.
The best way to report bugs, send comments, or ask questions is to
sign up on the user's mailing list:
plpa-users@open-mpi.org
Because of spam, only subscribers are allowed to post to this list
(ensure that you subscribe with and post from exactly the same e-mail
address -- joe@example.com is considered different than
joe@mycomputer.example.com!). Visit this page to subscribe to the
list:
http://www.open-mpi.org/mailman/listinfo.cgi/plpa-users
Thanks for your time.
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Main PLPA m4 macro, to be invoked by the user
#
# Expects two or three paramters:
# 1. Configuration prefix (optional; if not specified, "." is assumed)
# 2. What to do upon success
# 3. What to do upon failure
#
AC_DEFUN([PLPA_INIT],[
# If we used the 2 param variant of PLPA_INIT, then assume the
# config prefix is ".". Otherwise, it's $1.
m4_ifval([$3],
[_PLPA_INIT_COMPAT([$1], [$2], [$3])],
[AC_MSG_WARN([The 2-argument form of the PLPA INIT m4 macro is deprecated])
AC_MSG_WARN([It was removed starting with PLPA v1.2])
AC_MSG_ERROR([Cannot continue])])
])dnl
#-----------------------------------------------------------------------
# Do the main work for PLPA_INIT
#
# Expects three paramters:
# 1. Configuration prefix
# 2. What to do upon success
# 3. What to do upon failure
#
AC_DEFUN([_PLPA_INIT_COMPAT],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([AM_PROG_LEX])
AC_REQUIRE([AC_PROG_YACC])
m4_define([plpa_config_prefix],[$1])
# Check for syscall()
AC_CHECK_FUNC([syscall], [plpa_config_happy=1], [plpa_config_happy=0])
# Look for syscall.h
if test "$plpa_config_happy" = 1; then
AC_CHECK_HEADER([sys/syscall.h], [plpa_config_happy=1], [plpa_config_happy=0])
fi
# Look for unistd.h
if test "$plpa_config_happy" = 1; then
AC_CHECK_HEADER([unistd.h], [plpa_config_happy=1], [plpa_config_happy=0])
fi
# Check for __NR_sched_setaffinity
if test "$plpa_config_happy" = 1; then
AC_MSG_CHECKING([for __NR_sched_setaffinity])
if test "$plpa_emulate" = "yes"; then
AC_MSG_RESULT([emulated])
AC_DEFINE([__NR_sched_setaffinity], [0], [Emulated value])
else
AC_TRY_COMPILE([#include <syscall.h>
#include <unistd.h>], [#ifndef __NR_sched_setaffinity
#error __NR_sched_setaffinity_not found!
#endif
int i = 1;],
[AC_MSG_RESULT([yes])
plpa_config_happy=1],
[AC_MSG_RESULT([no])
plpa_config_happy=0])
fi
fi
# Check for __NR_sched_getaffinity (probably overkill, but what
# the heck?)
if test "$plpa_config_happy" = 1; then
AC_MSG_CHECKING([for __NR_sched_getaffinity])
if test "$plpa_emulate" = "yes"; then
AC_MSG_RESULT([emulated])
AC_DEFINE([__NR_sched_getaffinity], [0], [Emulated value])
else
AC_TRY_COMPILE([#include <syscall.h>
#include <unistd.h>], [#ifndef __NR_sched_getaffinity
#error __NR_sched_getaffinity_not found!
#endif
int i = 1;],
[AC_MSG_RESULT([yes])
plpa_config_happy=1],
[AC_MSG_RESULT([no])
plpa_config_happy=0])
fi
fi
# If all was good, do the real init
AS_IF([test "$plpa_config_happy" = "1"],
[_PLPA_INIT($2, $3)],
[$3])
PLPA_DO_AM_CONDITIONALS
AC_CONFIG_FILES(
plpa_config_prefix[/Makefile]
plpa_config_prefix[/src/Makefile]
plpa_config_prefix[/src/libplpa/Makefile]
)
# Cleanup
unset plpa_config_happy
])dnl
#-----------------------------------------------------------------------
# Build PLPA as a standalone package
AC_DEFUN([PLPA_STANDALONE],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
plpa_mode=standalone
])dnl
#-----------------------------------------------------------------------
# Build PLPA as an included package
AC_DEFUN([PLPA_INCLUDED],[
m4_ifval([$1],
[AC_MSG_WARN([The 1-argument form of the PLPA INCLUDED m4 macro is deprecated])
AC_MSG_WARN([It was removed starting with PLPA v1.2])
AC_MSG_ERROR([Cannot continue])])
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
plpa_mode=included
PLPA_DISABLE_EXECUTABLES
])dnl
#-----------------------------------------------------------------------
dnl JMS: No fortran bindings yet
dnl # Set whether the fortran bindings will be built or not
dnl AC_DEFUN([PLPA_FORTRAN],[
dnl AC_REQUIRE([_PLPA_INTERNAL_SETUP])
dnl
dnl # Need [] around entire following line to escape m4 properly
dnl [plpa_tmp=`echo $1 | tr '[:upper:]' '[:lower:]'`]
dnl if test "$1" = "0" -o "$1" = "n"; then
dnl plpa_fortran=no
dnl elif test "$1" = "1" -o "$1" = "y"; then
dnl plpa_fortran=yes
dnl else
dnl AC_MSG_WARN([Did not understand PLPA_FORTRAN argument ($1) -- ignored])
dnl fi
dnl ])dnl
#-----------------------------------------------------------------------
# Disable building the executables
AC_DEFUN([PLPA_DISABLE_EXECUTABLES],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
plpa_executables=no
])dnl
#-----------------------------------------------------------------------
# Disable building the executables
AC_DEFUN([PLPA_ENABLE_EXECUTABLES],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
plpa_executables=yes
])dnl
#-----------------------------------------------------------------------
# Specify the symbol prefix
AC_DEFUN([PLPA_SET_SYMBOL_PREFIX],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
plpa_symbol_prefix_value=$1
])dnl
#-----------------------------------------------------------------------
# Internals
AC_DEFUN([_PLPA_INTERNAL_SETUP],[
AC_ARG_ENABLE([emulate],
AC_HELP_STRING([--enable-emulate],
[Emulate __NR_sched_setaffinity and __NR_sched_getaffinity, to allow building on non-Linux systems (for testing)]))
if test "$enable_emulate" = "yes"; then
plpa_emulate=yes
else
plpa_emulate=no
fi
# Build and install the executables or no?
AC_ARG_ENABLE([executables],
AC_HELP_STRING([--disable-executables],
[Using --disable-executables disables building and installing the PLPA executables]))
if test "$enable_executables" = "yes" -o "$enable_executables" = ""; then
plpa_executables=yes
else
plpa_executables=no
fi
# Included mode, or standalone?
AC_ARG_ENABLE([included-mode],
AC_HELP_STRING([--enable-included-mode],
[Using --enable-included-mode puts the PLPA into "included" mode. The default is --disable-included-mode, meaning that the PLPA is in "standalone" mode.]))
if test "$enable_included_mode" = "yes"; then
plpa_mode=included
if test "$enable_executables" = ""; then
plpa_executables=no
fi
else
plpa_mode=standalone
fi
dnl JMS: No fortran bindings yet
dnl # Fortran bindings, or no?
dnl AC_ARG_ENABLE([fortran],
dnl AC_HELP_STRING([--disable-fortran],
dnl [Using --disable-fortran disables building the Fortran PLPA API bindings]))
dnl if test "$enable_fortran" = "yes" -o "$enable_fortran" = ""; then
dnl plpa_fortran=yes
dnl else
dnl plpa_fortran=no
dnl fi
# Change the symbol prefix?
AC_ARG_WITH([plpa-symbol-prefix],
AC_HELP_STRING([--with-plpa-symbol-prefix=STRING],
[STRING can be any valid C symbol name. It will be prefixed to all public PLPA symbols. Default: "plpa_"]))
if test "$with_plpa_symbol_prefix" = ""; then
plpa_symbol_prefix_value=plpa_
else
plpa_symbol_prefix_value=$with_plpa_symbol_prefix
fi
# Debug mode?
AC_ARG_ENABLE([debug],
AC_HELP_STRING([--enable-debug],
[Using --enable-debug enables various maintainer-level debugging controls. This option is not recomended for end users.]))
if test "$enable_debug" = "yes"; then
plpa_debug=1
plpa_debug_msg="enabled"
elif test "$enable_debug" = "" -a -d .svn; then
plpa_debug=1
plpa_debug_msg="enabled (SVN checkout default)"
else
plpa_debug=0
plpa_debug_msg="disabled"
fi
])dnl
#-----------------------------------------------------------------------
# Internals for PLPA_INIT
AC_DEFUN([_PLPA_INIT],[
AC_REQUIRE([_PLPA_INTERNAL_SETUP])
# Are we building as standalone or included?
AC_MSG_CHECKING([for PLPA building mode])
AC_MSG_RESULT([$plpa_mode])
# Debug mode?
AC_MSG_CHECKING([if want PLPA maintainer support])
AC_DEFINE_UNQUOTED(PLPA_DEBUG, [$plpa_debug], [Whether we are in debugging more or not])
AC_MSG_RESULT([$plpa_debug_msg])
# We need to set a path for header, etc files depending on whether
# we're standalone or included. this is taken care of by PLPA_INCLUDED.
AC_MSG_CHECKING([for PLPA config prefix])
AC_MSG_RESULT(plpa_config_prefix)
# Note that plpa_config.h *MUST* be listed first so that it
# becomes the "main" config header file. Any AM_CONFIG_HEADERs
# after that (plpa.h) will only have selective #defines replaced,
# not the entire file.
AM_CONFIG_HEADER(plpa_config_prefix[/src/libplpa/plpa_config.h])
AM_CONFIG_HEADER(plpa_config_prefix[/src/libplpa/plpa.h])
# What prefix are we using?
AC_MSG_CHECKING([for PLPA symbol prefix])
AC_DEFINE_UNQUOTED(PLPA_SYM_PREFIX, [$plpa_symbol_prefix_value],
[The PLPA symbol prefix])
# Ensure to [] escape the whole next line so that we can get the
# proper tr tokens
[plpa_symbol_prefix_value_caps="`echo $plpa_symbol_prefix_value | tr '[:lower:]' '[:upper:]'`"]
AC_DEFINE_UNQUOTED(PLPA_SYM_PREFIX_CAPS, [$plpa_symbol_prefix_value_caps],
[The PLPA symbol prefix in all caps])
AC_MSG_RESULT([$plpa_symbol_prefix_value])
# Build with valgrind support if we can find it, unless it was
# explicitly disabled
AC_ARG_WITH([valgrind],
[AC_HELP_STRING([--with-valgrind(=DIR)],
[Directory where the valgrind software is installed])])
CPPFLAGS_save="$CPPFLAGS"
valgrind_happy=no
AS_IF([test "$with_valgrind" != "no"],
[AS_IF([test ! -z "$with_valgrind" -a "$with_valgrind" != "yes"],
[CPPFLAGS="$CPPFLAGS -I$with_valgrind/include"])
AC_CHECK_HEADERS([valgrind/valgrind.h],
[AC_MSG_CHECKING([for VALGRIND_CHECK_MEM_IS_ADDRESSABLE])
AC_LINK_IFELSE(AC_LANG_PROGRAM([[
#include "valgrind/memcheck.h"
]],
[[char buffer = 0xff;
VALGRIND_CHECK_MEM_IS_ADDRESSABLE(&buffer, sizeof(buffer));]]),
[AC_MSG_RESULT([yes])
valgrind_happy=yes],
[AC_MSG_RESULT([no])
AC_MSG_WARN([Need Valgrind version 3.2.0 or later.])],
[AC_MSG_RESULT([cross-compiling; assume yes...?])
AC_MSG_WARN([PLPA will fail to compile if you do not have Valgrind version 3.2.0 or later])
valgrind_happy=yes]),
],
[AC_MSG_WARN([valgrind.h not found])])
AS_IF([test "$valgrind_happy" = "no" -a "x$with_valgrind" != "x"],
[AC_MSG_WARN([Valgrind support requested but not possible])
AC_MSG_ERROR([Cannot continue])])])
AS_IF([test "$valgrind_happy" = "no"],
[CPPFLAGS="$CPPFLAGS_save"
valgrind_define=0],
[valgrind_define=1])
AC_DEFINE_UNQUOTED([PLPA_WANT_VALGRIND_SUPPORT], [$valgrind_define],
[Whether we want Valgrind support or not])
dnl JMS: No fortran bindings yet
dnl # Check for fortran
dnl AC_MSG_CHECKING([whether to build PLPA Fortran API])
dnl AC_MSG_RESULT([$plpa_fortran])
# Check whether to build the exectuables or not
AC_MSG_CHECKING([whether to build PLPA executables])
AC_MSG_RESULT([$plpa_executables])
# If we're building executables, we need some things for plpa-taskset
if test "$plpa_executables" = "yes"; then
AC_C_INLINE
fi
# Success
$1
])dnl
#-----------------------------------------------------------------------
# This must be a standalone routine so that it can be called both by
# PLPA_INIT and an external caller (if PLPA_INIT is not invoked).
AC_DEFUN([PLPA_DO_AM_CONDITIONALS],[
if test "$plpa_did_am_conditionals" != "yes"; then
AM_CONDITIONAL([PLPA_BUILD_STANDALONE], [test "$plpa_mode" = "standalone"])
dnl JMS: No fortran bindings yet
dnl AM_CONDITIONAL(PLPA_BUILD_FORTRAN, [test "$plpa_fortran" = "yes"])
AM_CONDITIONAL(PLPA_BUILD_EXECUTABLES, [test "$plpa_executables" = "yes"])
fi
plpa_did_am_conditionals=yes
])dnl
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
SUBDIRS = libplpa
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Note that this file is generated by configure, so we don't want to
# ship it in the tarball. Hence the "nodist_" prefixes to the HEADERS
# macros, below.
public_headers = plpa.h
noinst_LTLIBRARIES = libplpa_included.la
nodist_noinst_HEADERS = $(public_headers)
# The sources
plpa_sources = \
plpa_internal.h \
plpa_api_probe.c \
plpa_dispatch.c \
plpa_runtime.c \
plpa_map.c
libplpa_included_la_SOURCES = $(plpa_sources)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment