data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
#include <iostream> #include <sys/types.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <sys/stat.h> #include <pthread.h> #include <sys/time.h> #include <string> using namespace std; std::string sub_cgroup_dir("/sys/fs/cgroup/cpu/test"); // common lib bool is_dir(const std::string& path) { struct stat statbuf; if (stat(path.c_str(), &statbuf) == 0 ) { if (0 != S_ISDIR(statbuf.st_mode)) { return true; } } return false; } bool write_file(const std::string& file_path, int num) { FILE* fp = fopen(file_path.c_str(), "w"); if (fp = NULL) { return false; } std::string write_data = to_string(num); fputs(write_data.c_str(), fp); fclose(fp); return true; } // ms long get_ms_timestamp() { timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000 + tv.tv_usec / 1000); } // cgroup bool create_cgroup() { if (is_dir(sub_cgroup_dir) == false) { if (mkdir(sub_cgroup_dir.c_str(), S_IRWXU | S_IRGRP) != 0) { cout << "mkdir cgroup dir fail" << endl; return false; } } int pid = getpid(); cout << "pid is " << pid << endl; std::string procs_path = sub_cgroup_dir + "/cgroup.procs"; return write_file(procs_path, pid); } bool set_period(int period) { std::string period_path = sub_cgroup_dir + "/cpu.cfs_period_us"; return write_file(period_path, period); } bool set_quota(int quota) { std::string quota_path = sub_cgroup_dir + "/cpu.cfs_quota_us"; return write_file(quota_path, quota); } // thread // param: ms interval void* thread_func(void* param) { int i = 0; int interval = (long)param; long last = get_ms_timestamp(); while (true) { i++; if (i % 1000 != 0) { continue; } long current = get_ms_timestamp(); if ((current - last) >= interval) { usleep(1000); last = current; } } pthread_exit(NULL); } void test_thread() { const int k_thread_num = 10; pthread_t pthreads[k_thread_num]; for (int i = 0; i < k_thread_num; i++) { if (pthread_create(&pthreads[i], NULL, thread_func, (void*)(i + 1)) != 0) { cout << "create thread fail" << endl; } else { cout << "create thread success,tid is " << pthreads[i] << endl; } } } //argv[0] : period //argv[1] : quota int main(int argc,char* argv[]) { if (argc <3) { cout << "usage : ./inactive timer $period $quota" << endl; return -1; } int period = stoi(argv[1]); int quota = stoi(argv[2]); cout << "period is " << period << endl; cout << "quota is " << quota << endl; test_thread(); if (create_cgroup() == false) { cout << "create cgroup fail" << endl; return -1; } int i =0; while (true) { if (i > 20) { i = 0; } i++; long current = get_ms_timestamp(); long last = current; while ((current - last) < i) { usleep(1000); current = get_ms_timestamp(); } set_period(period); set_quota(quota); } return 0; }
View Code
自由互联热门推荐:PDF电子发票识别软件,一键识别电子发票并导入到Excel中!10大顶级数据挖掘软件!人工智能的十大作用!
2.1.2 编译
g++ -std=c++11 -lpthread trigger_cgroup_timer_inactive.cpp -o inactive_timer
2.1.3 在CentOS7.0~7.5的系统上执行程序
./inactive_timer 100000 10000
2.1.4 上述代码主要干了2件事 1> 将自己进程设置为CGroup控制cpu 2> 反复设置CGroup的cpu.cfs_period_us和cpu.cfs_quota_us 3> 起10个线程消耗cpu 2.1.5《极简组调度-CGroup如何限制cpu》已经讲过CGroup限制cpu的原理: CGroup控制cpu是通过cfs_period_us指定的一个时间周期内,CGroup下的进程,能使用cfs_quota_us时间长度的cpu,如果在该周期内使用的cpu超过了cfs_quota_us设定的值,则将其throttled,即将其从公平调度运行队列中移出,然后等待定时器触发下个周期unthrottle后再移入,从而达到控制cpu的效果。 2.2 现象
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
tg_set_cfs_quota() tg_set_cfs_bandwidth() /* restart the period timer (if active) to handle new period expiry */ if (runtime_enabled && cfs_b->timer_active) { /* force a reprogram */ cfs_b->timer_active = 0; __start_cfs_bandwidth(cfs_b); }仔细观察上述代码,设想如下场景: 1> 在线程A设置CGroup的quota或者period时,将cfs_b->timer_active设为0,调用_start_cfs_bandwidth()后,在未执行到__start_cfs_bandwidth()代码580行hrtimer_cancel()之前,cpu切换到B线程 2> 线程B也调用__start_cfs_bandwidth(),执行完后将cfs_b->timer_active设为1,并调用start_bandwidth_timer()激活timer,此时cpu切换到线程A 3> 线程A恢复并继续执行,调用hrtimer_cancel()让period_timer失效,然后执行到__start_cfs_bandwidth()代码585行后,发现cfs_b->timer_active为1,直接return,而不再将period_timer激活
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
2> 当定时器失效后,由于3.2中线程B将cfs_b->timer_active = 1,所以即使下次时钟中断执行到assign_cfs_rq_runtime()中时,由于误判timer是active的,也不会调用__start_cfs_bandwidth()再次激活timer,这样被throttle的group se永远不会被unthrottle投入rq调度了
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"
data:image/s3,"s3://crabby-images/ee574/ee5744bae85591d9eb564ba2e870d2bdf819f935" alt="频繁设置CGroup触发linux内核bug导致CGroup running task不调度"