Thread System 0.3.1
High-performance C++20 thread pool with work stealing and DAG scheduling
Loading...
Searching...
No Matches
numa_topology.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2024, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
6
7#include <algorithm>
8#include <string>
9#include <thread>
10
11#ifdef __linux__
12#include <dirent.h>
13#include <fstream>
14#include <sstream>
15#include <unistd.h>
16#endif
17
18namespace kcenon::thread
19{
20
22{
23#ifdef __linux__
24 return detect_linux();
25#else
26 // macOS, Windows, and other platforms: fallback to single-node topology
27 return create_fallback();
28#endif
29}
30
31auto numa_topology::get_node_for_cpu(int cpu_id) const -> int
32{
33 if (cpu_id < 0 || static_cast<std::size_t>(cpu_id) >= cpu_to_node_.size())
34 {
35 return -1;
36 }
37 return cpu_to_node_[static_cast<std::size_t>(cpu_id)];
38}
39
40auto numa_topology::get_distance(int node1, int node2) const -> int
41{
42 if (node1 < 0 || node2 < 0 ||
43 static_cast<std::size_t>(node1) >= distances_.size() ||
44 static_cast<std::size_t>(node2) >= distances_.size())
45 {
46 return -1;
47 }
48
49 const auto& row = distances_[static_cast<std::size_t>(node1)];
50 if (static_cast<std::size_t>(node2) >= row.size())
51 {
52 return -1;
53 }
54
55 return row[static_cast<std::size_t>(node2)];
56}
57
58auto numa_topology::is_same_node(int cpu1, int cpu2) const -> bool
59{
60 int node1 = get_node_for_cpu(cpu1);
61 int node2 = get_node_for_cpu(cpu2);
62
63 if (node1 < 0 || node2 < 0)
64 {
65 return false;
66 }
67
68 return node1 == node2;
69}
70
72{
73 return nodes_.size() > 1;
74}
75
76auto numa_topology::node_count() const -> std::size_t
77{
78 return nodes_.size();
79}
80
81auto numa_topology::cpu_count() const -> std::size_t
82{
83 return total_cpus_;
84}
85
86auto numa_topology::get_nodes() const -> const std::vector<numa_node>&
87{
88 return nodes_;
89}
90
91auto numa_topology::get_cpus_for_node(int node_id) const -> std::vector<int>
92{
93 for (const auto& node : nodes_)
94 {
95 if (node.node_id == node_id)
96 {
97 return node.cpu_ids;
98 }
99 }
100 return {};
101}
102
103#ifdef __linux__
105{
106 numa_topology topology;
107
108 // Check if /sys/devices/system/node exists
109 DIR* node_dir = opendir("/sys/devices/system/node");
110 if (!node_dir)
111 {
112 return create_fallback();
113 }
114
115 // Find all NUMA nodes
116 std::vector<int> node_ids;
117 struct dirent* entry = nullptr;
118 while ((entry = readdir(node_dir)) != nullptr)
119 {
120 std::string name = entry->d_name;
121 if (name.substr(0, 4) == "node" && name.length() > 4)
122 {
123 try
124 {
125 int node_id = std::stoi(name.substr(4));
126 node_ids.push_back(node_id);
127 }
128 catch (...)
129 {
130 // Skip invalid entries
131 }
132 }
133 }
134 closedir(node_dir);
135
136 if (node_ids.empty())
137 {
138 return create_fallback();
139 }
140
141 std::sort(node_ids.begin(), node_ids.end());
142
143 // Determine maximum CPU ID for cpu_to_node_ sizing
144 unsigned int hw_concurrency = std::thread::hardware_concurrency();
145 if (hw_concurrency == 0)
146 {
147 hw_concurrency = 1;
148 }
149 topology.cpu_to_node_.resize(hw_concurrency, -1);
150
151 // Parse each node's information
152 for (int node_id : node_ids)
153 {
154 numa_node node;
155 node.node_id = node_id;
156
157 // Read CPUs for this node
158 std::string cpulist_path = "/sys/devices/system/node/node" +
159 std::to_string(node_id) + "/cpulist";
160 std::ifstream cpulist_file(cpulist_path);
161 if (cpulist_file.is_open())
162 {
163 std::string line;
164 if (std::getline(cpulist_file, line))
165 {
166 // Parse CPU list format: "0-3,8-11" or "0,1,2,3"
167 std::istringstream iss(line);
168 std::string token;
169 while (std::getline(iss, token, ','))
170 {
171 // Check for range (e.g., "0-3")
172 auto dash_pos = token.find('-');
173 if (dash_pos != std::string::npos)
174 {
175 try
176 {
177 int start = std::stoi(token.substr(0, dash_pos));
178 int end = std::stoi(token.substr(dash_pos + 1));
179 for (int cpu = start; cpu <= end; ++cpu)
180 {
181 node.cpu_ids.push_back(cpu);
182 if (static_cast<std::size_t>(cpu) < topology.cpu_to_node_.size())
183 {
184 topology.cpu_to_node_[static_cast<std::size_t>(cpu)] = node_id;
185 }
186 }
187 }
188 catch (...)
189 {
190 // Skip invalid entries
191 }
192 }
193 else
194 {
195 try
196 {
197 int cpu = std::stoi(token);
198 node.cpu_ids.push_back(cpu);
199 if (static_cast<std::size_t>(cpu) < topology.cpu_to_node_.size())
200 {
201 topology.cpu_to_node_[static_cast<std::size_t>(cpu)] = node_id;
202 }
203 }
204 catch (...)
205 {
206 // Skip invalid entries
207 }
208 }
209 }
210 }
211 }
212
213 // Read memory size for this node
214 std::string meminfo_path = "/sys/devices/system/node/node" +
215 std::to_string(node_id) + "/meminfo";
216 std::ifstream meminfo_file(meminfo_path);
217 if (meminfo_file.is_open())
218 {
219 std::string line;
220 while (std::getline(meminfo_file, line))
221 {
222 if (line.find("MemTotal:") != std::string::npos)
223 {
224 std::istringstream iss(line);
225 std::string dummy;
226 std::size_t mem_kb = 0;
227 iss >> dummy >> dummy >> mem_kb;
228 node.memory_size_bytes = mem_kb * 1024;
229 break;
230 }
231 }
232 }
233
234 topology.nodes_.push_back(std::move(node));
235 topology.total_cpus_ += topology.nodes_.back().cpu_ids.size();
236 }
237
238 // Read inter-node distances
239 topology.distances_.resize(node_ids.size());
240 for (std::size_t i = 0; i < node_ids.size(); ++i)
241 {
242 topology.distances_[i].resize(node_ids.size(), 10);
243
244 std::string distance_path = "/sys/devices/system/node/node" +
245 std::to_string(node_ids[i]) + "/distance";
246 std::ifstream distance_file(distance_path);
247 if (distance_file.is_open())
248 {
249 std::string line;
250 if (std::getline(distance_file, line))
251 {
252 std::istringstream iss(line);
253 for (std::size_t j = 0; j < node_ids.size(); ++j)
254 {
255 int dist = 10;
256 if (iss >> dist)
257 {
258 topology.distances_[i][j] = dist;
259 }
260 }
261 }
262 }
263 }
264
265 if (topology.nodes_.empty())
266 {
267 return create_fallback();
268 }
269
270 return topology;
271}
272#endif
273
275{
276 numa_topology topology;
277
278 unsigned int hw_concurrency = std::thread::hardware_concurrency();
279 if (hw_concurrency == 0)
280 {
281 hw_concurrency = 1;
282 }
283
284 // Create single NUMA node with all CPUs
285 numa_node node;
286 node.node_id = 0;
287 for (unsigned int i = 0; i < hw_concurrency; ++i)
288 {
289 node.cpu_ids.push_back(static_cast<int>(i));
290 }
291 node.memory_size_bytes = 0; // Unknown
292
293 topology.nodes_.push_back(std::move(node));
294 topology.total_cpus_ = hw_concurrency;
295
296 // Initialize cpu_to_node mapping
297 topology.cpu_to_node_.resize(hw_concurrency, 0);
298
299 // Single node has distance 10 to itself
300 topology.distances_ = {{10}};
301
302 return topology;
303}
304
305} // namespace kcenon::thread
NUMA (Non-Uniform Memory Access) topology information.
std::size_t total_cpus_
Total CPU count.
auto get_nodes() const -> const std::vector< numa_node > &
Get all NUMA nodes.
static auto detect() -> numa_topology
Detect and return the system's NUMA topology.
auto get_cpus_for_node(int node_id) const -> std::vector< int >
Get CPUs belonging to a specific node.
auto get_distance(int node1, int node2) const -> int
Get the distance between two NUMA nodes.
auto get_node_for_cpu(int cpu_id) const -> int
Get the NUMA node for a given CPU.
std::vector< std::vector< int > > distances_
Inter-node distances.
std::vector< numa_node > nodes_
All NUMA nodes.
auto is_numa_available() const -> bool
Check if NUMA is available on this system.
static auto detect_linux() -> numa_topology
Detect topology on Linux using sysfs.
std::vector< int > cpu_to_node_
CPU ID -> NUMA node mapping.
auto cpu_count() const -> std::size_t
Get the total number of CPUs.
static auto create_fallback() -> numa_topology
Create fallback single-node topology.
auto node_count() const -> std::size_t
Get the number of NUMA nodes.
auto is_same_node(int cpu1, int cpu2) const -> bool
Check if two CPUs are on the same NUMA node.
Core threading foundation of the thread system library.
Definition thread_impl.h:17
STL namespace.
NUMA node topology detection and information.
Information about a single NUMA node.
std::vector< int > cpu_ids
CPUs belonging to this node.
std::size_t memory_size_bytes
Total memory on this node.
int node_id
NUMA node identifier.