一、前言
android中应用进程可以在多个cpu上运行,当操作系统在cpu之间频繁切换应用时,可能会引起缓存失效、降低缓存命中率,导致cpu的使用效率下降。此外,手机soc芯片通常是大小核架构,相同的任务,大核的处理时间要小于小核的处理时间。因此,为了追求更好的性能表现,android中经常需要在某些时候把一些关键线程/进程绑定在某个或某些指定的cpu上运行,这种操作俗称“绑核”。
常见的cpu绑定方式,按照原理的不同,可分为设置亲和性、设置uclamp和cpuset子系统三种方式。但是根据具体的native/java接口的不同、以及设置对象(线程/进程)的不同,可以再进一步细分为setThreadGroup、setThreadGroupAndCpuset、setProcessGroup、SetTaskProfiles、SetProcessProfiles、sched_setaffinity、sched_setattr以及taskset命令这几种方式。
二、java接口
setThreadGroup、setThreadGroupAndCpuset和setProcessGroup这三种是java层调用的接口,原理比较类似或者说极具可比性。严格来说,setThreadGroup并没有绑定cpu的作用,但是为了搞清楚和另外两个接口的区别,也值得去研究一下。
1.setThreadGroup
设置线程调度组时,上层可以直接调用setThreadGroup这个接口来实现。可以看到底层是通过调用SetTaskProfiles接口来实现的,需要传入对应的profile_name,对应格式为:SCHED_SP_XXX。
frameworks/base/core/java/android/os/Process.java
1038 /**
1039 * Sets the scheduling group for a thread.
1040 * @hide
1041 * @param tid The identifier of the thread to change.
1042 * @param group The target group for this thread from THREAD_GROUP_*.
1043 *
1052 * Does not set cpuset for some historical reason, just calls
1053 * libcutils::set_sched_policy().
1054 */
1055 public static final native void setThreadGroup(int tid, int group)
1056 throws IllegalArgumentException, SecurityException;
frameworks/base/core/jni/android_util_Process.cpp
207 void android_os_Process_setThreadGroup(JNIEnv* env, jobject clazz, int tid, jint grp)
208 {
209 ALOGV("%s tid=%d grp=%" PRId32, __func__, tid, grp);
210 if (!verifyGroup(env, grp)) {
211 return;
212 }
213
>>通过get_sched_policy_profile_name方法获取对应的profile_name
>>然后调用SetTaskProfiles去执行后续流程
214 int res = SetTaskProfiles(tid, {get_sched_policy_profile_name((SchedPolicy)grp)}, true) ? 0 : -1;
215
216 if (res != NO_ERROR) {
217 signalExceptionForGroupError(env, -res, tid);
218 }
219 }
/system/core/libprocessgroup/sched_policy.cpp
265 const char* get_sched_policy_profile_name(SchedPolicy policy) {
...
274 static constexpr const char* kSchedProfiles[SP_CNT + 1] = {
275 "SCHED_SP_DEFAULT", "SCHED_SP_BACKGROUND", "SCHED_SP_FOREGROUND",
276 "SCHED_SP_SYSTEM", "SCHED_SP_FOREGROUND", "SCHED_SP_FOREGROUND",
277 "SCHED_SP_TOP_APP", "SCHED_SP_RT_APP", "SCHED_SP_DEFAULT"};
278 if (policy < SP_DEFAULT || policy >= SP_CNT) {
279 return nullptr;
280 }
>>获取profile_name: SCHED_SP_XXX
281 return kSchedProfiles[policy + 1];
282 }
2.setThreadGroupAndCpuset
从setThreadGroupAndCpuset方法的代码注释来看,该方法是可以设置线程调度组和cpuset组的,也就意味着具有了绑定cpu的能力。底层也是通过SetTaskProfiles来实现,但和setThreadGroup不同的是,此时是通过函数get_cpuset_policy_profile_name来获取对应的profile_name,对应格式为:CPUSET_SP_XXX。
/frameworks/base/core/java/android/os/Process.java
1055 public static final native void setThreadGroup(int tid, int group)
1056 throws IllegalArgumentException, SecurityException;
1057
1058 /**
1059 * Sets the scheduling group and the corresponding cpuset group
1060 * @hide
1061 * @param tid The identifier of the thread to change.
1062 * @param group The target group for this thread from THREAD_GROUP_*.
...
1069 */
1070 public static final native void setThreadGroupAndCpuset(int tid, int group)
1071 throws IllegalArgumentException, SecurityException;
frameworks/base/core/jni/android_util_Process.cpp
221 void android_os_Process_setThreadGroupAndCpuset(JNIEnv* env, jobject clazz, int tid, jint grp)
222 {
223 ALOGV("%s tid=%d grp=%" PRId32, __func__, tid, grp);
224 if (!verifyGroup(env, grp)) {
225 return;
226 }
227
>>通过get_cpuset_policy_profile_name获取对应的profile_name
228 int res = SetTaskProfiles(tid, {get_cpuset_policy_profile_name((SchedPolicy)grp)}, true) ? 0 : -1;
229
230 if (res != NO_ERROR) {
231 signalExceptionForGroupError(env, -res, tid);
232 }
233 }
/system/core/libprocessgroup/sched_policy.cpp
246 const char* get_cpuset_policy_profile_name(SchedPolicy policy) {
...
255 static constexpr const char* kCpusetProfiles[SP_CNT + 1] = {
256 "CPUSET_SP_DEFAULT", "CPUSET_SP_BACKGROUND", "CPUSET_SP_FOREGROUND",
257 "CPUSET_SP_SYSTEM", "CPUSET_SP_FOREGROUND", "CPUSET_SP_FOREGROUND",
258 "CPUSET_SP_TOP_APP", "CPUSET_SP_DEFAULT", "CPUSET_SP_RESTRICTED"};
259 if (policy < SP_DEFAULT || policy >= SP_CNT) {
260 return nullptr;
261 }
>>获取profile_name: CPUSET_SP_XXX
262 return kCpusetProfiles[policy + 1];
263 }
3.setProcessGroup
根据代码注释,这个接口是针对进程中的所有线程生效的。底层原理是通过SetProcessProfiles接口来实现的,这和前面提到的setThreadGroupAndCpuset不同。获取profile_name的函数是get_cpuset_policy_profile_name,这一点也是不同的。
1073 /**
1074 * Sets the scheduling group for a process and all child threads
1075 * @hide
1076 * @param pid The identifier of the process to change.
1077 * @param group The target group for this process from THREAD_GROUP_*.
...
1091 * Always sets cpusets.
1092 */
1093 @UnsupportedAppUsage
1094 public static final native void setProcessGroup(int pid, int group)
1095 throws IllegalArgumentException, SecurityException;
1096
235 void android_os_Process_setProcessGroup(JNIEnv* env, jobject clazz, int pid, jint grp)
236 {
237 ALOGV("%s pid=%d grp=%" PRId32, __func__, pid, grp);
238 char proc_path[255];
...
277
>>通过函数get_cpuset_policy_profile_name来获取profile_name
>>然后调用SetProcessProfileCached接口
278 if (!SetProcessProfilesCached(0, pid, {get_cpuset_policy_profile_name((SchedPolicy)grp)}))
279 signalExceptionForGroupError(env, errno ? errno : EPERM, pid);
280 }
/system/core/libprocessgroup/processgroup.cpp
154 bool SetProcessProfilesCached(uid_t uid, pid_t pid, const std::vector<std::string>& profiles) {
>>最终调用SetProcessProfiles实现
155 return TaskProfiles::GetInstance().SetProcessProfiles(uid, pid, profiles, true);
156 }
三、native接口
native接口主要有SetTaskProfiles和SetProcessProfiles这两种,分别针对线程与进程生效。前面提到的java接口底层也是通过调用这两个native接口去实现对应功能的。
1.SetTaskProfiles
cpuset是cgroup子系统,本质上是一个虚拟的文件系统、挂载在/dev/cpuset目录,主要用于操作线程/进程与cpu/memory相关节点的绑定。而SetTaskProfiles主要作用就是将目标tid/pid写入cpuset对应的文件节点中,从而实现线程/进程的绑核操作。
下面将以CPUSET_SP_BACKGROUND这种profile为例来梳理setTaskProfile代码逻辑以及实现指定线程绑定cpu的过程。
1.1 TaskProfile定义
首先,在task_profiles.json文件中对CPUSET_SP_BACKGROUND定义了几种能力,基本上对应着cgrounp机制中的几种controller,如cpu、cpuset、blkio等。其中,与cpuset相关的profile为"ProcessCapacityLow",根据它的详细描述,可以看到"ProcessCapacityLow"对应的controller正是cpuset、其对应的path为"background"。
/system/core/libprocessgroup/profiles/task_profiles.json
661 {
662 "Name": "CPUSET_SP_BACKGROUND",
663 "Profiles": [ "HighEnergySaving", "ProcessCapacityLow", "LowIoPriority", "TimerSlackHigh" ]
664 },
345 {
346 "Name": "ProcessCapacityLow",
347 "Actions": [
348 {
349 "Name": "JoinCgroup",
350 "Params":
351 {
352 "Controller": "cpuset",
353 "Path": "background"
354 }
355 }
356 ]
357 },
1.2 获取TaskProfile
根据传入的参数profiles从profiles_中获取对应的TaskProfile。其中profiles_是已经加载好的所有profiles的集合,可以通过name获取对应的TaskProfile。
828 bool TaskProfiles::SetTaskProfiles(int tid, const std::vector<std::string>& profiles,
829 bool use_fd_cache) {
830 bool success = true;
>>profiles为profile_name的集合
>>以"CPUSET_SP_BACKGROUND"为例,这里传入的profiles为{"CPUSET_SP_BACKGROUND"}
831 for (const auto& name : profiles) {
>>根据name获取对应的TaskProfile
832 TaskProfile* profile = GetProfile(name);
833 if (profile != nullptr) {
834 if (use_fd_cache) {
835 profile->EnableResourceCaching(ProfileAction::RCT_TASK);
836 }
>>执行当前profile对应的ExecuteForTask方法,传入tid参数,主要作用是将相关参数写入对应文件节点
837 if (!profile->ExecuteForTask(tid)) {
838 PLOG(WARNING) << "Failed to apply " << name << " task profile";
839 success = false;
840 }
841 } else {
842 PLOG(WARNING) << "Failed to find " << name << " task profile";
843 success = false;
844 }
845 }
>>根据profile_name获取对应的TaskProfile
789 TaskProfile* TaskProfiles::GetProfile(const std::string& name) const {
>>profiles_为TaskProfiles读取的task_profiles.json中的所有profile信息
>>这里将以"CPUSET_SP_BACKGROUND"为key从profiles_获取对应的TaskProfile
790 auto iter = profiles_.find(name);
791
792 if (iter != profiles_.end()) {
793 return iter->second.get();
794 }
795 return nullptr;
796 }
1.3 执行ProfileAction.ExecuteForTask
ExecuteForTask函数是实现ProfileAction对应功能的核心逻辑。
542 bool TaskProfile::ExecuteForTask(int tid) const {
543 if (tid == 0) {
544 tid = GetThreadId();
545 }
>>这里的element为ProfileAction类型
>>对于"CPUSET_SP_BACKGROUND",其对应的element为ApplyProfileAction,也是ProfileAction中的一种
>>执行对应ProfileAction的ExecuteForTask
546 for (const auto& element : elements_) {
>>执行对应类型的ProfileAction的ExecuteForTask方法
547 if (!element->ExecuteForTask(tid)) {
548 LOG(VERBOSE) << "Applying profile action " << element->Name() << " failed";
549 return false;
550 }
551 }
552 return true;
553 }
/system/core/libprocessgroup/task_profiles.h
169 class TaskProfile {
170 public:
171 TaskProfile(const std::string& name) : name_(name), res_cached_(false) {}
...
182 private:
183 const std::string name_;
184 bool res_cached_;
>>elements_中的元素为ProfileAction类型
185 std::vector<std::unique_ptr<ProfileAction>> elements_;
1.4 ProfileAction子类
通过执行具体类型的ProfileAction所对应的ExecuteForTask方法将相关参数写入对应文件节点。ProfileAction被很多子类继承,比如SetClampsAction、SetCgroupAction等,当然,也包括前面提到的ApplyProfileAction。ApplyProfileAction相对比较特殊,相当于自身包含了多种action,而SetCgroupAction这种只对应着具体某一种action。
/system/core/libprocessgroup/task_profiles.h
80 // Profile actions
81 class SetClampsAction : public ProfileAction {
123 // Set cgroup profile element
124 class SetCgroupAction : public ProfileAction {
188 // Set aggregate profile element
189 class ApplyProfileAction : public ProfileAction {
1.5 SetCgroupAction
每一种ProfileAction都有各自的ExecuteForTask逻辑。比如SetCgroupAction可以将tid加入到对应的thread group,而对应的thread group有指定可运行的cpu,从而实现绑定cpu的目的。
/system/core/libprocessgroup/task_profiles.cpp
318 bool SetCgroupAction::ExecuteForTask(int tid) const {
319 CacheUseResult result = UseCachedFd(ProfileAction::RCT_TASK, tid);
320 if (result != ProfileAction::UNUSED) {
321 return result == ProfileAction::SUCCESS;
322 }
323
324 // fd was not cached or cached fd can't be used
>>通过对应的controller去获取路径
>>对于"CPUSET_SP_BACKGROUND",其获取的操作路径为"/dev/cpuset/background/tasks"
325 std::string tasks_path = controller()->GetTasksFilePath(path_);
>>根据tasks_path打开对应的file
326 unique_fd tmp_fd(TEMP_FAILURE_RETRY(open(tasks_path.c_str(), O_WRONLY | O_CLOEXEC)));
327 if (tmp_fd < 0) {
328 PLOG(WARNING) << "Failed to open " << tasks_path;
329 return false;
330 }
>>通过AddTidToCgroup方法实现将tid加入到指定路径的thread group中
331 if (!AddTidToCgroup(tid, tmp_fd, controller()->name())) {
332 LOG(ERROR) << "Failed to add task into cgroup";
333 return false;
334 }
335
336 return true;
337 }
235 bool SetCgroupAction::AddTidToCgroup(int tid, int fd, const char* controller_name) {
236 if (tid <= 0) {
237 return true;
238 }
239
240 std::string value = std::to_string(tid);
241
>>将tid写入对应文件节点
>>对于"CPUSET_SP_BACKGROUND",则是将tid写入"/dev/cpuset/background/tasks"中,这样线程将只能在background组
>>指定的cpu上运行,一般会限制在cpu0-4即小核上运行
242 if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) == value.length()) {
243 return true;
244 }
1.6 获取操作路径
//GetTasksFilePath
/system/core/libprocessgroup/cgroup_map.cpp
50 static constexpr const char* CGROUP_PROCS_FILE = "/cgroup.procs";
51 static constexpr const char* CGROUP_TASKS_FILE = "/tasks";
52 static constexpr const char* CGROUP_TASKS_FILE_V2 = "/cgroup.tasks";
88 std::string CgroupController::GetTasksFilePath(const std::string& rel_path) const {
>>这里的path()返回的是controller对应的操作路径
>>对于"CPUSET_SP_BACKGROUND",对应的controller为cpuset,其对应路径为"/dev/cpuset"
89 std::string tasks_path = path();
90
91 if (!rel_path.empty()) {
>>rel_path为action对应的路径
>>对于"CPUSET_SP_BACKGROUND",rel_path="background", 所以此时tasks_path="/dev/cpuset/background"
92 tasks_path += "/" + rel_path;
93 }
>>如果version=1,则文件路径为"/tasks",对应完整路径为"/dev/cpuset/background/tasks"
>>如果version=2,则文件路径为"/cgroup.tasks",对应完整路径为"/dev/cpuset/background/cgroup.tasks"
94 return (version() == 1) ? tasks_path + CGROUP_TASKS_FILE : tasks_path + CGROUP_TASKS_FILE_V2;
95 }
1.7 TaskProfile的加载
task_profiles.json文件中定义了各种TaskProfile,每个TaskProfile表示拥有某一种或者某些能力。而task_profiles.json文件中所有TaskProfile的读取与处理是在task_profiles.cpp文件中的Load函数中,便于后续直接根据profile_name获取对应的TaskProfile。
/system/core/libprocessgroup/profiles/task_profiles.json
>>task_profiles.json文件中定义着各种profile
628 "AggregateProfiles": [
629 {
630 "Name": "SCHED_SP_DEFAULT",
631 "Profiles": [ "TimerSlackNormal" ]
632 },
...
661 {
662 "Name": "CPUSET_SP_BACKGROUND",
663 "Profiles": [ "HighEnergySaving", "ProcessCapacityLow", "LowIoPriority", "TimerSlackHigh" ]
664 },
665 {
666 "Name": "CPUSET_SP_FOREGROUND",
667 "Profiles": [ "HighPerformance", "ProcessCapacityHigh", "HighIoPriority", "TimerSlackNormal" ]
668 },
...
689 ]
/system/core/libprocessgroup/task_profiles.cpp
618 bool TaskProfiles::Load(const CgroupMap& cg_map, const std::string& file_name) {
...
>>根据file_name确定要读取的文件
621 if (!android::base::ReadFileToString(file_name, &json_doc)) {
622 LOG(ERROR) << "Failed to read task profiles from " << file_name;
623 return false;
624 }
...
628 Json::Value root;
629 std::string errorMessage;
>>root即为task_profiles.json
630 if (!reader->parse(&*json_doc.begin(), &*json_doc.end(), &root, &errorMessage)) {
...
>>根据key="Profiles"对应的各个profile
661 const Json::Value& profiles_val = root["Profiles"];
662 for (Json::Value::ArrayIndex i = 0; i < profiles_val.size(); ++i) {
663 const Json::Value& profile_val = profiles_val[i];
664
665 std::string profile_name = profile_val["Name"].asString();
>>获取profile对应的所有action, 每个profile有1个或多个action
666 const Json::Value& actions = profile_val["Actions"];
667 auto profile = std::make_shared<TaskProfile>(profile_name);
668
>>遍历当前profile对应的所有action
669 for (Json::Value::ArrayIndex act_idx = 0; act_idx < actions.size(); ++act_idx) {
670 const Json::Value& action_val = actions[act_idx];
671 std::string action_name = action_val["Name"].asString();
672 const Json::Value& params_val = action_val["Params"];
673 if (action_name == "JoinCgroup") {
674 std::string controller_name = params_val["Controller"].asString();
675 std::string path = params_val["Path"].asString();
676
677 auto controller = cg_map.FindController(controller_name);
678 if (controller.HasValue()) {
>>如果action_name是"JoinCgroup",则初始化SetCgroupAction,并传入对应的controller和path
>>比如对于profile="ProcessCapacityLow",其对应controller为cpuset、对应path为"background"
679 profile->Add(std::make_unique<SetCgroupAction>(controller, path));
680 } else {
681 LOG(WARNING) << "JoinCgroup: controller " << controller_name << " is not found";
682 }
683 } else if (action_name == "SetTimerSlack") {
>>aggregateprofiles_val=[{...}, {...}, {...}, ...]
755 const Json::Value& aggregateprofiles_val = root["AggregateProfiles"];
756 for (Json::Value::ArrayIndex i = 0; i < aggregateprofiles_val.size(); ++i) {
>>aggregateprofile_val为aggregateprofiles_val中的item之一,比如{"Name":"CPUSET_SP_BACKGROUND","Profiles":[...]}
757 const Json::Value& aggregateprofile_val = aggregateprofiles_val[i];
758
>>获取对应Name,比如"CPUSET_SP_BACKGROUND"
759 std::string aggregateprofile_name = aggregateprofile_val["Name"].asString();
>>获取对应Profiles,比如:[ "HighEnergySaving", "ProcessCapacityLow", "LowIoPriority", "TimerSlackHigh" ]
760 const Json::Value& aggregateprofiles = aggregateprofile_val["Profiles"];
761 std::vector<std::shared_ptr<TaskProfile>> profiles;
762 bool ret = true;
763
>>遍历aggregateprofiles列表中的每一个Profile
764 for (Json::Value::ArrayIndex pf_idx = 0; pf_idx < aggregateprofiles.size(); ++pf_idx) {
765 std::string profile_name = aggregateprofiles[pf_idx].asString();
766
>>如果当前遍历到的profile_name与aggregateprofile_name相同,则置ret = false、且break
767 if (profile_name == aggregateprofile_name) {
768 LOG(WARNING) << "AggregateProfiles: recursive profile name: " << profile_name;
769 ret = false;
770 break;
>>如果当前profile_name不存在于profiles_中,说明对应profile不存在,则置ret = false、且break
771 } else if (profiles_.find(profile_name) == profiles_.end()) {
772 LOG(WARNING) << "AggregateProfiles: undefined profile name: " << profile_name;
773 ret = false;
774 break;
775 } else {
>>把当前profile_name对应的profile添加到profiles中
776 profiles.push_back(profiles_[profile_name]);
777 }
778 }
>>若前面无异常,则ret=true
779 if (ret) {
>>以aggregateprofile_name(比如"CPUSET_SP_BACKGROUND")为例、初始化对应TaskProfile
780 auto profile = std::make_shared<TaskProfile>(aggregateprofile_name);
>>执行TaskProfile.Add方法添加ApplyProfileAction,此时应用到profiles!
781 profile->Add(std::make_unique<ApplyProfileAction>(profiles));
>>以aggregateprofile_name为key,添加到总的profiles容器中
782 profiles_[aggregateprofile_name] = profile;
783 }
784 }
/system/core/libprocessgroup/task_profiles.h
169 class TaskProfile {
170 public:
>>初始化TaskProfile时给name_赋值(比如"CPUSET_SP_BACKGROUND")
171 TaskProfile(const std::string& name) : name_(name), res_cached_(false) {}
172
173 const std::string& Name() const { return name_; }
>>通过Add方法把前面初始化的ApplyProfileAction添加到elements_中
174 void Add(std::unique_ptr<ProfileAction> e) { elements_.push_back(std::move(e)); }
1.8 ApplyProfileAction
ApplyProfileAction也是ProfileAction的子类,但是比较特殊,其他的子类本身只具有单一功能(比如SetCgroupAction),但是ApplyProfileAction像是多个ProfileAction的组合体,同时拥有多个功能,有对应的profiles列表。因此其ExecuteForTask逻辑与其他类别的ProfileAction有很大不同。
188 // Set aggregate profile element
189 class ApplyProfileAction : public ProfileAction {
190 public:
>>初始化ApplyProfileAction时给profiles_赋值,比如:[ "HighEnergySaving", "ProcessCapacityLow", "LowIoPriority", "TimerSlackHigh" ]
191 ApplyProfileAction(const std::vector<std::shared_ptr<TaskProfile>>& profiles)
192 : profiles_(profiles) {}
/system/core/libprocessgroup/task_profiles.cpp
508 bool ApplyProfileAction::ExecuteForTask(int tid) const {
>>对于ApplyProfileAction,其ExecuteForTask方法逻辑与其他类型的ProfileAction不同,这里会遍历profiles_中的每一个profile
>>并执行每一个profile对应的ExecuteForTask方法
>>还说以"CPUSET_SP_BACKGROUND"为例,对应profiles_列表为:[ "HighEnergySaving", "ProcessCapacityLow", "LowIoPriority", "TimerSlackHigh" ]
509 for (const auto& profile : profiles_) {
510 profile->ExecuteForTask(tid);
511 }
512 return true;
513 }
对于其中的"ProcessCapacityLow",该profile对应的action为"JoinCgroup",则其ExecuteForTask逻辑对应着SetCgroupAction::ExecuteForTask。
345 {
346 "Name": "ProcessCapacityLow",
347 "Actions": [
348 {
349 "Name": "JoinCgroup",
350 "Params":
351 {
352 "Controller": "cpuset",
353 "Path": "background"
354 }
355 }
356 ]
357 },
2.SetProcessProfiles
SetProcessProfiles主要是针对进程生效,其代码逻辑整体上与SetTaskProfiles相似,只是有一些细节上的区别:
1)SetProcessProfiles中要求执行对应TaskProfile的ExecuteForProcess方法,而SetTaskProfiles则是执行ExecuteForTask方法;
2)SetProcessProfiles中是通过GetProcsFilePath方法获取操作路径,而SetTaskProfiles则是通过GetTaskFilePath方法获取操
作路径;
3)SetProcessProfiles中获取的操作路径以"/cgroup.procs"结尾,而SetTaskProfiles中获取的路径是以"/tasks"结尾。
/system/core/libprocessgroup/task_profiles.cpp
807 bool TaskProfiles::SetProcessProfiles(uid_t uid, pid_t pid,
808 const std::vector<std::string>& profiles, bool use_fd_cache) {
809 bool success = true;
810 for (const auto& name : profiles) {
811 TaskProfile* profile = GetProfile(name);
812 if (profile != nullptr) {
813 if (use_fd_cache) {
814 profile->EnableResourceCaching(ProfileAction::RCT_PROCESS);
815 }
>>执行对应profile的ExecuteForProcess方法
>>传入的参数为uid与pid
816 if (!profile->ExecuteForProcess(uid, pid)) {
817 PLOG(WARNING) << "Failed to apply " << name << " process profile";
818 success = false;
819 }
820 } else {
821 PLOG(WARNING) << "Failed to find " << name << " process profile";
822 success = false;
823 }
824 }
825 return success;
826 }
297 bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
298 CacheUseResult result = UseCachedFd(ProfileAction::RCT_PROCESS, pid);
299 if (result != ProfileAction::UNUSED) {
300 return result == ProfileAction::SUCCESS;
301 }
302
303 // fd was not cached or cached fd can't be used
>>通过GetProcsFilePath方法获取操作路径
304 std::string procs_path = controller()->GetProcsFilePath(path_, uid, pid);
305 unique_fd tmp_fd(TEMP_FAILURE_RETRY(open(procs_path.c_str(), O_WRONLY | O_CLOEXEC)));
306 if (tmp_fd < 0) {
307 PLOG(WARNING) << "Failed to open " << procs_path;
308 return false;
309 }
>>同样是通过AddTidToCgroup方法将pid写入对应文件节点
310 if (!AddTidToCgroup(pid, tmp_fd, controller()->name())) {
311 LOG(ERROR) << "Failed to add task into cgroup";
312 return false;
313 }
314
315 return true;
316 }
/system/core/libprocessgroup/cgroup_map.cpp
50 static constexpr const char* CGROUP_PROCS_FILE = "/cgroup.procs";
97 std::string CgroupController::GetProcsFilePath(const std::string& rel_path, uid_t uid,
98 pid_t pid) const {
99 std::string proc_path(path());
100 proc_path.append("/").append(rel_path);
>>把proc_path中的"<pid>"替换为传入的pid、将"<uid>"替换为传入的uid
101 proc_path = regex_replace(proc_path, std::regex("<uid>"), std::to_string(uid));
102 proc_path = regex_replace(proc_path, std::regex("<pid>"), std::to_string(pid));
103
>>对于CPUSET_SP_BACKGROUND,proc_path中并无"<pid>"、"<uid>"
>>CGROUP_PROCS_FILE="/cgroup.procs"
>>所以最终proc_path="/dev/cpuset/background/cgroup.procs"
104 return proc_path.append(CGROUP_PROCS_FILE);
105 }
四、底层逻辑
1.sched_setaffinity
android中可以通过设置cpu亲和性来指定task在哪些cpu上运行,从而达到提升性能或节约能耗的目的。而设置cpu亲和性的底层逻辑,则是用户空间通过系统调用执行kernel中的sched_setaffinity函数来设置线程对应task_struct的cpus_mask,并迁移task到cpus_mask指定的cpu上运行(前提是当前task运行的cpu不在cpus_mask允许的范围)。在设置cpus_mask前,用户空间传入的new_mask还会与cpuset子系统的mask进行位与运算取交集,使得new_mask需要同时满足cpuset的限制。
/kernel/sched/core.c
/**
* sys_sched_setaffinity - set the CPU affinity of a process
* @pid: pid of the process
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new CPU mask
*
* Return: 0 on success. An error code otherwise.
*/
SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
{
cpumask_var_t new_mask;
int retval;
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
return -ENOMEM;
retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
if (retval == 0)
>>从用户空间经syscall执行到kernel中对应方法
retval = sched_setaffinity(pid, new_mask);
free_cpumask_var(new_mask);
return retval;
}
long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
{
cpumask_var_t cpus_allowed, new_mask;
struct task_struct *p;
int retval;
rcu_read_lock();
>>根据pid获取对应的task_struct
>>这里的pid实则为tid,线程在kernel中的存在形式为task_struct
p = find_process_by_pid(pid);
if (!p) {
rcu_read_unlock();
return -ESRCH;
}
...
>>取出cpuset中task对应的cpumask
cpuset_cpus_allowed(p, cpus_allowed);
>>将cpuset中的cpumask与userspace下发的cpumask进行与运算(取交集)得到new_mask
cpumask_and(new_mask, in_mask, cpus_allowed);
...
#endif
again:
>>将最终的cpumask(new_mask)传入,执行__set_cpus_allowed_ptr逻辑去设置task对应的cpu
retval = __set_cpus_allowed_ptr(p, new_mask, true);
if (!retval) {
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
* is removed from the allowed bitmask.
*
* NOTE: the caller must have a valid reference to the task, the
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check)
{
const struct cpumask *cpu_valid_mask = cpu_active_mask;
unsigned int dest_cpu;
struct rq_flags rf;
struct rq *rq;
int ret = 0;
>>获取task_struct对应的running queue
rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
if (p->flags & PF_KTHREAD) {
/*
* Kernel threads are allowed on online && !active CPUs
*/
cpu_valid_mask = cpu_online_mask;
}
/*
* Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag.
*/
>>如果不允许改变当前task_struct所占的cpu资源,则直接退出
if (check && (p->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
goto out;
}
>>如果要求设置的new_mask与原有的cpus_mask相同,则直接退出
if (cpumask_equal(&p->cpus_mask, new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL;
goto out;
}
>>将new_mask设置到task_struct对应的cpus_allowed中
do_set_cpus_allowed(p, new_mask);
if (p->flags & PF_KTHREAD) {
/*
* For kernel threads that do indeed end up on online &&
* !active we want to ensure they are strict per-CPU threads.
*/
WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
!cpumask_intersects(new_mask, cpu_active_mask) &&
p->nr_cpus_allowed != 1);
}
/* Can the task run on the task's current CPU? If so, we're done */
>>如果task当前正运行在new_mask所允许的cpu上,则已经完成affinity的设置,直接退出
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
>>运行到此处,说明当前task没有运行在new_mask所允许的cpu上,需要根据当前task的状态去做一些动作
>>如果当前task正在running或者wake
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &rf);
>>通过stop_one_cpu停止并迁移task到允许的cpu上
>>其中,cpu_of(rq)为将要stop的cpu,migration_cpu_stop是将要执行、用于迁移cpu的函数
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
return 0;
} else if (task_on_rq_queued(p)) {
/*
* OK, since we're going to drop the lock immediately
* afterwards anyway.
*/
>>如果当前task处于sleep状态,则直接迁移task到允许的cpu上
rq = move_queued_task(rq, &rf, p, dest_cpu);
}
out:
task_rq_unlock(rq, p, &rf);
return ret;
}
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
lockdep_assert_held(&p->pi_lock);
queued = task_on_rq_queued(p);
running = task_current(rq, p);
...
>>执行set_cpus_allowed函数修改task对应的属性
p->sched_class->set_cpus_allowed(p, new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_next_task(rq, p);
}
sched_class是task对应的调度策略(比如rt、cfs等),一般调度策略对应的set_cpus_allowed方法即为set_cpus_allowed_common。
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
>>将传入的new_mask复制给task对应的cpus_mask
cpumask_copy(&p->cpus_mask, new_mask);
>>同时更新属性值nr_cpus_allowed
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
2.sched_setattr
kernel中给task分配运行的cpu时会参考task对应的utilization,如果该值越大,则kernel会判断该task是一个较"大"的task,会倾向于将该task分配给最大运行频率更高的大核上;反之,如果该值越小,则kernel会判断该task是一个较"小"的task,那么为了功耗等方面的考虑,会倾向于将task分配给最大运行频率更低的小核上去运行。简单来说就是task对应的utilization越大,就越倾向于上大核, 而uclamp主要是用来钳制或截断系统计算出来的task对应的utilization,使其utilization能够受到用户空间的调控。其中,uclamp.min是用于设置utilization的下限,而uclamp.max是用于设置utilization的上限。一般情况下,想要将某个线程在小核上运行,则去修改降低线程对应的uclamp.max;如果想要增加在大核上运行的概率,则修改增加线程对应的uclamp.min值。而android中一般是通过系统调用去执行kernel中的sched_setattr函数来设置线程对应的uclamp参数。
需要说明的是,uclamp机制原本并不用于绑核,只是如果将uclamp.min与uclamp.max均设置为1024(1024为用户空间最大值,kernel中会归一化为0-100的区间),实际上也能实现接近于绑核的效果。
/kernel/sched/core.c
/**
* sys_sched_setattr - same as above, but with extended sched_attr
* @pid: the pid in question.
* @uattr: structure containing the extended parameters.
* @flags: for future extension.
*/
SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
unsigned int, flags)
{
struct sched_attr attr;
struct task_struct *p;
int retval;
...
rcu_read_lock();
retval = -ESRCH;
>>根据pid获取对应的task_struct
p = find_process_by_pid(pid);
if (likely(p))
get_task_struct(p);
rcu_read_unlock();
if (likely(p)) {
>>从用户空间经syscall执行到kernel中对应方法sched_setattr
retval = sched_setattr(p, &attr);
put_task_struct(p);
}
return retval;
}
int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
{
>>进一步执行__sched_setscheduler函数
return __sched_setscheduler(p, attr, true, true);
}
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
bool user, bool pi)
{
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
...
__setscheduler(rq, p, attr, pi);
>>通过函数__setscheduler_uclamp来设置task对应的uclamp值
__setscheduler_uclamp(p, attr);
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr)
{
enum uclamp_id clamp_id;
...
>>如果从用户空间传来的参数attr中不包含SCHED_FLAG_UTIL_CLAMP这个flag,则直接return
if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
return;
>>如果flag中包含SCHED_FLAG_UTIL_CLAMP_MIN,则允许设置用户空间传来的ulamp.min值
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
attr->sched_util_min, true);
}
>>>>如果flag中包含SCHED_FLAG_UTIL_CLAMP_MAX,则允许设置用户空间传来的ulamp.max值
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
attr->sched_util_max, true);
}
}
值得一提的是,根据上面的代码逻辑,用户空间设置uclamp值一般需要同时设置SCHED_FLAG_UTIL_CLAMP这个flag。如果只是设置task对应的uclamp.min值,则也可以只是添加SCHED_FLAG_UTIL_CLAMP_MIN这个flag;如果只是设置uclamp.max,则也可以通过设置SCHED_FLAG_UTIL_CLAMP_MAX这个flag来代替SCHED_FLAG_UTIL_CLAMP。而当设置了SCHED_FLAG_UTIL_CLAMP这个flag时,则说明同时修改task对应的uclamp.min与ulamp.max值。
/include/uapi/linux/sched.h
#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
SCHED_FLAG_KEEP_PARAMS)
#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
SCHED_FLAG_UTIL_CLAMP_MAX)
/* Actually do priority change: must hold pi & rq lock. */
static void __setscheduler(struct rq *rq, struct task_struct *p,
const struct sched_attr *attr, bool keep_boost)
{
/*
* If params can't change scheduling class changes aren't allowed
* either.
*/
>>如果flag中包含SCHED_FLAG_KEEP_PARAMS,则不再执行后续的__setscheduler_params逻辑
if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
return;
__setscheduler_params(p, attr);
五、taskset
taskset命令可以用来设置线程/进程对应的cpu亲核性,这个可以用于开发者提前验证绑核方案的有效性,其底层原理也是通过系统调用执行kernel中sched_setaffinity函数逻辑去设置task_struct对应的cpus_mask。常用的操作与命令如下:
设置tid对应线程的cpus_mask,其中cpus_mask为十六进制:
taskset -p [cpus_mask][tid]
获取tid对应线程的cpus_mask:
taskset -p [tid]
设置pid对应进程中所有线程的cpu_mask,其中参数a表示对进程中的所有线程生效:
taskset -pa [cpus_mask][pid]