小弟最近研究了一段時間的ARM Linux,想把進程管理方面的感受跟大家交流下,不對的地方多多指點
Process Creation and Termination
Process Scheduling and Dispatching
Process Switching
Porcess Synchronization and support for interprocess communication
Management of process control block
--------from <Operating system:internals and design principles>
進程調度
Linux2.4.x是一個基於非搶佔式的多任務的分時操作系統,雖然在用戶進程的調度上採用搶佔式策略,但是而在內核還是採用了輪轉的方法,如果有個內核態的線程惡性佔有CPU不釋放,那系統無法從中解脫出來,所以實時性並不是很強。這種情況有望在Linux 2.6版本中得到改善,在2.6版本中採用了搶佔式的調度策略。
內核中根據任務的實時程度提供了三種調度策略:
需要說明的是,SCHED_FIFO和SCHED_RR兩種調度策略之間沒有優先順序上的區別,主要的區別是任務的大小上。另外,task_struct結構中的policy中還包含了一個SCHED_YIELD位,置位時表示該進程主動放棄CPU。
在上述三種調度策略的基礎上,進程依照優先順序的高低被分別調系統。優先順序是一些簡單的整數,它代表了為決定應該允許哪一個進程使用CPU的資源時判斷方便而賦予進程的權值——優先順序越高,它得到CPU時間的機會也就越大。
在Linux中,非實時進程有兩種優先順序,一種是靜態優先順序,另一種是動態優先順序。實時進程又增加了第三種優先順序,實時優先順序。
在每個tick到來的時候(也就是時鐘中斷髮生),系統減小當前佔有CPU的進程的counter,如果counter減小到0,則將need_resched置1,中斷返回過程中進行調度。update_process_times()為時鐘中斷處理程序調用的一個子函數:
void update_process_times(int user_tick)
{
struct task_struct *p = current;
int cpu = smp_processor_id(), system = user_tick ^ 1;
update_one_process(p, user_tick, system, cpu);
if (p->pid) {
if (--p->counter <= 0) {
p->counter = 0;
p->need_resched = 1;
}
if (p->nice > 0)
kstat.per_cpu_nice[cpu] += user_tick;
else
kstat.per_cpu_user[cpu] += user_tick;
kstat.per_cpu_system[cpu] += system;
} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
kstat.per_cpu_system[cpu] += system;
}
Linux中進程的調度使在schedule()函數中實現的,該函數在下面的ARM彙編片斷中被調用到:
/*
* This is the fast syscall return path. We do as little as
* possible here, and this includes saving r0 back into the SVC
* stack.
*/
ret_fast_syscall:
ldr r1, [tsk, #TSK_NEED_RESCHED]
ldr r2, [tsk, #TSK_SIGPENDING]
teq r1, #0 @ need_resched || sigpending
teqeq r2, #0
bne slow
fast_restore_user_regs
/*
* Ok, we need to do extra processing, enter the slow path.
*/
slow: str r0, [sp, #S_R0+S_OFF]! @ returned r0
b 1f
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
*/
reschedule:
bl SYMBOL_NAME(schedule)
ENTRY(ret_to_user)
ret_slow_syscall:
ldr r1, [tsk, #TSK_NEED_RESCHED]
ldr r2, [tsk, #TSK_SIGPENDING]
1: teq r1, #0 @ need_resched => schedule()
bne reschedule @如果需要重新調度則調用schedule
teq r2, #0 @ sigpending => do_signal()
blne __do_signal
restore_user_regs
而這段代碼在中斷返回或者系統調用返回中反覆被調用到。
1. 進程狀態轉換時: 如進程終止,睡眠等,當進程要調用sleep()或exit()等函數使進程狀態發生改變時,這些函數會主動調用schedule()轉入進程調度。
2. 可運行隊列中增加新的進程時;
ENTRY(ret_from_fork)
bl SYMBOL_NAME(schedule_tail)
get_current_task tsk
ldr ip, [tsk, #TSK_PTRACE] @ check for syscall tracing
mov why, #1
tst ip, #PT_TRACESYS @ are we tracing syscalls?
beq ret_slow_syscall
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
bl SYMBOL_NAME(syscall_trace)
b ret_slow_syscall @跳轉到上面的代碼片斷
3. 在時鐘中斷到來后:Linux初始化時,設定系統定時器的周期為10毫秒。當時鐘中斷髮生時,時鐘中斷服務程序timer_interrupt立即調用時鐘處理函數do_timer( ),在do_timer()會將當前進程的counter減1,如果counter為0則置need_resched標誌,在從時鐘中斷返回的過程中會調用schedule.
4. 進程從系統調用返回到用戶態時;判斷need_resched標誌是否置位,若是則轉入執行schedule()。系統調用實際上就是通過軟中斷實現的,下面是ARM平台下軟中斷處理代碼。
.align 5
ENTRY(vector_swi)
save_user_regs
zero_fp
get_scno
enable_irqs ip
str r4, [sp, #-S_OFF]! @ push fifth arg
get_current_task tsk
ldr ip, [tsk, #TSK_PTRACE] @ check for syscall tracing
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #OS_NUMBER << 20 @ check OS number
adr tbl, sys_call_table @ load syscall table pointer
tst ip, #PT_TRACESYS @ are we tracing syscalls?
bne __sys_trace
adrsvc al, lr, ret_fast_syscall @ 裝載返回地址,用於在跳轉調用后返回到
@上面的代碼片斷中的ret_fast_syscall
cmp scno, #NR_syscalls @ check upper syscall limit
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
2: mov why, #0 @ no longer a real syscall
cmp scno, #ARMSWI_OFFSET
eor r0, scno, #OS_NUMBER << 20 @ put OS number back
bcs SYMBOL_NAME(arm_syscall)
b SYMBOL_NAME(sys_ni_syscall) @ not private func
5. 內核處理完中斷後,進程返回到用戶態。
6. 進程主動調用schedule()請求進行進程調度。
schedule()函數分析:
/*
* 'schedule()' is the scheduler function. It's a very simple and nice
* scheduler: it's not perfect, but certainly works for most things.
*
* The goto is "interesting".
*
* NOTE!! Task 0 is the 'idle' task, which gets called when no other
* tasks can run. It can not be killed, and it cannot sleep. The 'state'
* information in task[0] is never used.
*/
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
struct list_head *tmp;
int this_cpu, c;
spin_lock_prefetch(&runqueue_lock);
if (!current->active_mm) BUG();
need_resched_back:
prev = current;
this_cpu = prev->processor;
if (unlikely(in_interrupt())) {
printk("Scheduling in interrupt\n");
BUG();
}
release_kernel_lock(prev, this_cpu);
/*
* 'sched_data' is protected by the fact that we can run
* only one process per CPU.
*/
sched_data = & aligned_data[this_cpu].schedule_data;
spin_lock_irq(&runqueue_lock);
/* move an exhausted RR process to be last.. */
if (unlikely(prev->policy == SCHED_RR))
/*
* 如果採用輪轉法調度,則重新檢查counter是否為0, 若是則將其掛到運行隊列的最後
*/
if (!prev->counter) {
prev->counter = NICE_TO_TICKS(prev->nice);
move_last_runqueue(prev);
}
switch (prev->state) {
case TASK_INTERRUPTIBLE:
/*
* 如果是TASK_INTERRUPTIBLE,並且能夠喚醒它的信號已經來臨,
* 則將狀態置為TASK_RUNNING
*/
if (signal_pending(prev)) {
prev->state = TASK_RUNNING;
break;
}
default:
del_from_runqueue(prev);
case TASK_RUNNING:;
}
prev->need_resched = 0;
/*
* this is the scheduler proper:
*/
repeat_schedule:
/*
* Default process to select..
*/
next = idle_task(this_cpu);
c = -1000;
list_for_each(tmp, &runqueue_head) {
/*
* 遍歷運行隊列,查找優先順序最高的進程, 優先順序最高的進程將獲得CPU
*/
p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p, this_cpu)) {
/*
* goodness()中,如果是實時進程,則weight = 1000 p->rt_priority,
* 使實時進程的優先順序永遠比非實時進程高
*/
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c) //注意這裡是”>”而不是”>=”,如果權值相同,則先來的先上
c = weight, next = p;
}
}
/* Do we need to re-calculate counters? */
if (unlikely(!c)) {
/*
* 如果當前優先順序為0,那麼整個運行隊列中的進程將重新計算優先權
*/
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)
p->counter = (p->counter >> 1) NICE_TO_TICKS(p->nice);
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
goto repeat_schedule;
}
/*
* from this point on nothing can prevent us from
* switching to the next task, save this fact in sched_data.
*/
sched_data->curr = next;
task_set_cpu(next, this_cpu);
spin_unlock_irq(&runqueue_lock);
if (unlikely(prev == next)) {
/* We won't go through the normal tail, so do this by hand */
prev->policy &= ~SCHED_YIELD;
goto same_process;
}
kstat.context_swtch ;
/*
* there are 3 processes which are affected by a context switch:
*
* prev == .... ==> (last => next)
*
* It's the 'much more previous' 'prev' that is on next's stack,
* but prev is set to (the just run) 'last' process by switch_to().
* This might sound slightly confusing but makes tons of sense.
*/
prepare_to_switch(); {
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
if (!mm) { //如果是內核線程的切換,則不做頁表處理
if (next->active_mm) BUG();
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next, this_cpu);
} else {
if (next->active_mm != mm) BUG();
switch_mm(oldmm, mm, next, this_cpu); //如果是用戶進程,切換頁表
}
if (!prev->mm) {
prev->active_mm = NULL;
mmdrop(oldmm);
}
}
/*
* This just switches the register state and the stack.
*/
switch_to(prev, next, prev);
__schedule_tail(prev);
same_process:
reacquire_kernel_lock(current);
if (current->need_resched)
goto need_resched_back;
return;
}
switch_mm中是進行頁表的切換,即將下一個的pgd的開始物理地址放入CP15中的C2寄存器。進程的pgd的虛擬地址存放在task_struct結構中的pgd指針中,通過__virt_to_phys宏可以轉變成成物理地址。
static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk, unsigned int cpu)
{
if (prev != next)
cpu_switch_mm(next->pgd, tsk);
}
#define cpu_switch_mm(pgd,tsk) cpu_set_pgd(__virt_to_phys((unsigned long)(pgd)))
#define cpu_get_pgd() \
({ \
unsigned long pg; \
__asm__("mrc p15, 0, %0, c2, c0, 0" \
: "=r" (pg)); \
pg &= ~0x3fff; \
(pgd_t *)phys_to_virt(pg); \
})
switch_to()完成進程上下文的切換,通過調用彙編函數__switch_to完成,其實現比較簡單,也就是保存prev進程的上下文信息,該上下文信息由context_save_struct結構描述,包括主要的寄存器,然後將next的上下文信息讀出,信息保存在task_struct中的thread.save中TSS_SAVE標識了thread.save在task_struct中的位置。
/*
* Register switch for ARMv3 and ARMv4 processors
* r0 = previous, r1 = next, return previous.
* previous and next are guaranteed not to be the same.
*/
ENTRY(__switch_to)
stmfd sp!, {r4 - sl, fp, lr} @ Store most regs on stack
mrs ip, cpsr
str ip, [sp, #-4]! @ Save cpsr_SVC
str sp, [r0, #TSS_SAVE] @ Save sp_SVC
ldr sp, [r1, #TSS_SAVE] @ Get saved sp_SVC
ldr r2, [r1, #TSS_DOMAIN]
ldr ip, [sp], #4
mcr p15, 0, r2, c3, c0 @ Set domain register
msr spsr, ip @ Save tasks CPSR into SPSR for this return
ldmfd sp!, {r4 - sl, fp, pc}^ @ Load all regs saved previously
struct context_save_struct {
unsigned long cpsr;
unsigned long r4;
unsigned long r5;
unsigned long r6;
unsigned long r7;
unsigned long r8;
unsigned long r9;
unsigned long sl;
unsigned long fp;
unsigned long pc;
};
[admin via 研發互助社區 ] ARM Linux進程調度已經有5277次圍觀
http://cocdig.com/docs/show-post-42436.html