$cat /proc/cmdline BOOT_IMAGE=/boot/vmlinuz-4.8.0-27-generic root=UUID=58c66f12-0588-442b-9bb8-1d2dd833efe2 ro quiet splash isolcpus=3,7 irqaffinity=0-2,4-6 vt.handoff=7
$stress -c 24 stress: info: [31717] dispatching hogs: 24 cpu, 0 io, 0 vm, 0 hdd
top - 18:39:07 up 2 days, 20:48, 18 users, load average: 23,15, 10,46, 4,53 Tasks: 457 total, 26 running, 431 sleeping, 0 stopped, 0 zombie %Cpu0 :100,0 us, 0,0 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu1 : 98,7 us, 1,3 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu2 : 99,3 us, 0,7 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu3 : 0,0 us, 0,0 sy, 0,0 ni,100,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu4 : 95,7 us, 4,3 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu5 : 98,0 us, 2,0 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu6 : 98,7 us, 1,3 sy, 0,0 ni, 0,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st %Cpu7 : 0,0 us, 0,0 sy, 0,0 ni,100,0 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st KiB Mem : 7855176 total, 385736 free, 5891280 used, 1578160 buff/cache KiB Swap: 15624188 total, 10414520 free, 5209668 used. 626872 avail Mem
CPU 3和7是免费的,而其他6个处于完全忙碌状态.精细.
- It uses two int buffers of the same size
- It reads one-by-one all the values of the first buffer
- each value is a random index in the second buffer
- It reads the value at the index in the second buffer
- It sums all the values taken from the second buffer
- It does all the previous steps for bigger and bigger
- At the end, I print the number of voluntary and involuntary CPU context switches
$./TestCpuset ### launch on any non-isolated CPU $taskset -c 7 ./TestCpuset ### launch on isolated CPU 7
当在隔离的CPU上启动时,上下文切换的数量几乎是恒定的(在10到20之间),即使我并行启动“stress -c 24”.(看起来很正常)
I have an hypothesis which is that the “isolcpus” option would isolate
CPU form any process (unless the process an CPU affinity would be
given, such as what is done with “taskset”) but not from kernel tasks.
However, I found no documentation about it
仅供参考,这个问题与我之前开设的另一个问题相关:Cannot allocate exclusively a CPU for my process
#include <limits.h> #include <iostream> #include <unistd.h> #include <sys/time.h> #include <sys/resource.h> const unsigned int BUFFER_SIZE = 4096; using namespace std; class TimedSumComputer { public: TimedSumComputer() : sum(0), bufferSize(0), valueBuffer(0), indexBuffer(0) {} public: virtual ~TimedSumComputer() { resetBuffers(); } public: void init(unsigned int bufferSize) { this->bufferSize = bufferSize; resetBuffers(); initValueBuffer(); initIndexBuffer(); } private: void resetBuffers() { delete [] valueBuffer; delete [] indexBuffer; valueBuffer = 0; indexBuffer = 0; } void initValueBuffer() { valueBuffer = new unsigned int[bufferSize]; for (unsigned int i = 0 ; i < bufferSize ; i++) { valueBuffer[i] = randomUint(); } } static unsigned int randomUint() { int value = rand() % UINT_MAX; return value; } protected: void initIndexBuffer() { indexBuffer = new unsigned int[bufferSize]; for (unsigned int i = 0 ; i < bufferSize ; i++) { indexBuffer[i] = rand() % bufferSize; } } public: unsigned int getSum() const { return sum; } unsigned int computeTimeInMicroSeconds() { struct timeval startTime, endTime; gettimeofday(&startTime, NULL); unsigned int sum = computeSum(); gettimeofday(&endTime, NULL); return ((endTime.tv_sec - startTime.tv_sec) * 1000 * 1000) + (endTime.tv_usec - startTime.tv_usec); } unsigned int computeSum() { sum = 0; for (unsigned int i = 0 ; i < bufferSize ; i++) { unsigned int index = indexBuffer[i]; sum += valueBuffer[index]; } return sum; } protected: unsigned int sum; unsigned int bufferSize; unsigned int * valueBuffer; unsigned int * indexBuffer; }; unsigned int runTestForBufferSize(TimedSumComputer & timedComputer, unsigned int bufferSize) { timedComputer.init(bufferSize); unsigned int timeInMicroSec = timedComputer.computeTimeInMicroSeconds(); cout << "bufferSize = " << bufferSize << " - time (in micro-sec) = " << timeInMicroSec << endl; return timedComputer.getSum(); } void runTest(TimedSumComputer & timedComputer) { unsigned int result = 0; for (unsigned int i = 1 ; i < 10 ; i++) { result += runTestForBufferSize(timedComputer, BUFFER_SIZE * i); } unsigned int factor = 1; for (unsigned int i = 2 ; i <= 6 ; i++) { factor *= 10; result += runTestForBufferSize(timedComputer, BUFFER_SIZE * factor); } cout << "result = " << result << endl; } void printPid() { cout << "###############################" << endl; cout << "Pid = " << getpid() << endl; cout << "###############################" << endl; } void printNbContextSwitch() { struct rusage usage; getrusage(RUSAGE_THREAD, &usage); cout << "Number of voluntary context switch: " << usage.ru_nvcsw << endl; cout << "Number of involuntary context switch: " << usage.ru_nivcsw << endl; } int main() { printPid(); TimedSumComputer timedComputer; runTest(timedComputer); printNbContextSwitch(); return 0; }今天,我获得了更多关于我的问题的线索
> Ftrace Linux Kernel Tracing
> ftrace – Function Tracer
# sudo bash # cd /sys/kernel/debug/tracing # echo 1 > options/function-trace ; echo function_graph > current_tracer ; echo 1 > tracing_on ; echo 0 > tracing_max_latency ; taskset -c 7 [path-to-my-program]/TestCpuset ; echo 0 > tracing_on # cat trace
当我的程序在CPU 7(taskset -c 7)上启动时,我必须过滤“跟踪”输出
# grep " 7)" trace
# grep " 7)" trace | grep "=>" ... 7) TestCpu-4753 => kworker-5866 7) kworker-5866 => TestCpu-4753 7) TestCpu-4753 => watchdo-26 7) watchdo-26 => TestCpu-4753 7) TestCpu-4753 => kworker-5866 7) kworker-5866 => TestCpu-4753 7) TestCpu-4753 => kworker-5866 7) kworker-5866 => TestCpu-4753 7) TestCpu-4753 => kworker-5866 7) kworker-5866 => TestCpu-4753 ...
> kworker
>这些进程/线程到底是什么? (看来它们是由内核处理的)