Linux Process

EEEFFEE·2023년 11월 27일
0

Armv8 Architecture

목록 보기
4/15

23.11.23 최초 작성

1. 프로세스 자료구조

1.1 stack 공간

  • 64bit Armv8기준 스택 크기는 0x4000

  • IRQ Stack : 인터럽트를 처리하기 위한 프로세스의 스택 공간
    (크기 0x4000보다 클 수 있음)

1.1.1 Issue

  • Stack Overflow, out-of-bound, use-after-free

  • 대응 : Stack canary, compile option, 디버깅, 코드 리뷰, 스택 내용 분석

1.2 current macro

  • 현재 실행중인 프로세스의task_struct의 주소를 알 수 있는 매크로

  • get_current : sp_el0의 값(현재 사용자 프로세스 stack 주소)를 task_struct로 변환해 반환

  • Context switching시 X1은 앞으로 CPU를 점유하며 실행할 프로세스의 task_struct주소 저장

  • sp_el0는 이제부터 CPU를 점유하며 실행되는 프로세스의 주소로 업데이트


//https://elixir.bootlin.com/linux/v5.15.30/source/arch/arm64/include/asm/current.h#L15

#ifndef __ASM_CURRENT_H
#define __ASM_CURRENT_H

#include <linux/compiler.h>

#ifndef __ASSEMBLY__

struct task_struct;

/*
 * We don't use read_sysreg() as we want the compiler to cache the value where
 * possible.
 */
static __always_inline struct task_struct *get_current(void)
{
	unsigned long sp_el0;

	asm ("mrs %0, sp_el0" : "=r" (sp_el0));

	return (struct task_struct *)sp_el0;
}

#define current get_current()

#endif /* __ASSEMBLY__ */

#endif /* __ASM_CURRENT_H */

  • :


1.2 task_struct

  • 프로세스의 정보를 저장하는 자료구조로 task descriptor라고 함 (아키텍처와 무관)
  • 이름, pid, 관계등의 정보 저장
  • 프로세스 생성 시 커널이 프로세스에 부여

//https://elixir.bootlin.com/linux/v5.15.30/source/include/linux/sched.h

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	 * For reasons of header soup (see current_thread_info()), this
	 * must be the first element of task_struct.
	 */
	struct thread_info		thread_info;
#endif
	unsigned int			__state;

#ifdef CONFIG_PREEMPT_RT
	/* saved state for "spinlock sleepers" */
	unsigned int			saved_state;
#endif

	/*
	 * This begins the randomizable portion of task_struct. Only
	 * scheduling-critical items should be added above here.
	 */
	randomized_struct_fields_start

	void				*stack;
	refcount_t			usage;
	/* Per task flags (PF_*), defined further below: */
	unsigned int			flags;
	unsigned int			ptrace;

#ifdef CONFIG_SMP
	int				on_cpu;
	struct __call_single_node	wake_entry;
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* Current CPU: */
	unsigned int			cpu;
#endif
	unsigned int			wakee_flips;
	unsigned long			wakee_flip_decay_ts;
	struct task_struct		*last_wakee;
	...
}

1.3 thread_info / thread_struct

  • 스레드의 정보를 저장하는 자료구조 (아키텍처에 의존적)
  • task_struct의 시작 주소

//https://elixir.bootlin.com/linux/v5.15.30/source/arch/arm64

struct thread_struct {
	struct cpu_context	cpu_context;	/* cpu context */
	/*
	 * Whitelisted fields for hardened usercopy:
	 * Maintainers must ensure manually that this contains no
	 * implicit padding.
	 */
	struct {
		unsigned long	tp_value;	/* TLS register */
		unsigned long	tp2_value;
		struct user_fpsimd_state fpsimd_state;
	} uw;
    ...
}

struct cpu_context {
	unsigned long x19;
	unsigned long x20;
	unsigned long x21;
	unsigned long x22;
	unsigned long x23;
	unsigned long x24;
	unsigned long x25;
	unsigned long x26;
	unsigned long x27;
	unsigned long x28;
	unsigned long fp;
	unsigned long sp;
	unsigned long pc;
};

2. 프로세스 생성 흐름

2.1 User mode & Kernel mode


// https://elixir.bootlin.com/linux/v5.10.60/source/arch/arm64/include/asm/ptrace.h
// register 상태 확인해 (pstate) user_mode, kernel_mode 구분

#define user_mode(regs)	\
	(((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t)
#define processor_mode(regs) \
	((regs)->pstate & PSR_MODE_MASK)

//https://elixir.bootlin.com/linux/v5.10.60/source/arch/arm64/kernel/traps.c

static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
	unsigned long addr = instruction_pointer(regs);
	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
	int i;

	if (user_mode(regs))
		return;

	...
	printk("%sCode: %s\n", lvl, str);
}

2.2 생성 흐름


// User mode
fork()

// Kernel mode
__arm64_sys_clone
kernel_clone
copy_process

  • __arm64_sys_clone : kernel_clone 함수 호출
//https://elixir.bootlin.com/linux/v5.15.30/source/kernel/fork.c

SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
unsigned long, tls,
int __user *, child_tidptr)
{
	struct kernel_clone_args args = {
		.flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
		.pidfd = parent_tidptr,
		.child_tid = child_tidptr,
		.parent_tid = parent_tidptr,
		.exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
		.stack = newsp,
		.tls = tls,
		};
	return kernel_clone(&args);			//
}

  • kernel_clone : 프로세스를 생성하고 pid를 반환

https://elixir.bootlin.com/linux/v5.15.30/source/kernel/fork.c

pid_t kernel_clone(struct kernel_clone_args *args)
{
	u64 clone_flags = args->flags;
	struct completion vfork;
	struct pid *pid;
	struct task_struct *p;
	int trace = 0;
	pid_t nr;

	/*
	 * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
	 * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
	 * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
	 * field in struct clone_args and it still doesn't make sense to have
	 * them both point at the same memory location. Performing this check
	 * here has the advantage that we don't need to have a separate helper
	 * to check for legacy clone().
	 */
	if ((args->flags & CLONE_PIDFD) &&
	    (args->flags & CLONE_PARENT_SETTID) &&
	    (args->pidfd == args->parent_tid))
		return -EINVAL;

	/*
	 * Determine whether and which event to report to ptracer.  When
	 * called from kernel_thread or CLONE_UNTRACED is explicitly
	 * requested, no event is reported; otherwise, report if the event
	 * for the type of forking is enabled.
	 */
	if (!(clone_flags & CLONE_UNTRACED)) {
		if (clone_flags & CLONE_VFORK)
			trace = PTRACE_EVENT_VFORK;
		else if (args->exit_signal != SIGCHLD)
			trace = PTRACE_EVENT_CLONE;
		else
			trace = PTRACE_EVENT_FORK;

		if (likely(!ptrace_event_enabled(current, trace)))
			trace = 0;
	}

	p = copy_process(NULL, trace, NUMA_NO_NODE, args);
	add_latent_entropy();

	if (IS_ERR(p))
		return PTR_ERR(p);

	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	trace_sched_process_fork(current, p);

	pid = get_task_pid(p, PIDTYPE_PID);
	nr = pid_vnr(pid);

	if (clone_flags & CLONE_PARENT_SETTID)
		put_user(nr, args->parent_tid);

	if (clone_flags & CLONE_VFORK) {
		p->vfork_done = &vfork;
		init_completion(&vfork);
		get_task_struct(p);
	}

	wake_up_new_task(p);

	/* forking complete and child started to run, tell ptracer */
	if (unlikely(trace))
		ptrace_event_pid(trace, pid);

	if (clone_flags & CLONE_VFORK) {
		if (!wait_for_vfork_done(p, &vfork))
			ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
	}

	put_pid(pid);
	return nr;
}

  • copy_process : 프로세스의 정보를 복사하는 함수

https://elixir.bootlin.com/linux/v5.15.30/source/kernel/fork.c

static __latent_entropy struct task_struct *copy_process(
					struct pid *pid,
					int trace,
					int node,
					struct kernel_clone_args *args)
{
	int pidfd = -1, retval;
	struct task_struct *p;
	struct multiprocess_signals delayed;
	struct file *pidfile = NULL;
	u64 clone_flags = args->flags;
	struct nsproxy *nsp = current->nsproxy;

	...

	/*
	 * Force any signals received before this point to be delivered
	 * before the fork happens.  Collect up signals sent to multiple
	 * processes that happen during the fork and delay them so that
	 * they appear to happen after the fork.
	 */
	sigemptyset(&delayed.signal);
	INIT_HLIST_NODE(&delayed.node);

	spin_lock_irq(&current->sighand->siglock);
	if (!(clone_flags & CLONE_THREAD))
		hlist_add_head(&delayed.node, &current->signal->multiprocess);
	recalc_sigpending();
	spin_unlock_irq(&current->sighand->siglock);
	retval = -ERESTARTNOINTR;
	if (task_sigpending(current))
		goto fork_out;

	retval = -ENOMEM;
	p = dup_task_struct(current, node);
	if (!p)
		goto fork_out;
	if (args->io_thread) {
		/*
		 * Mark us an IO worker, and block any signal that isn't
		 * fatal or STOP
		 */
		p->flags |= PF_IO_WORKER;
		siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
	}


	/* Perform scheduler related setup. Assign this task to a CPU. */
	retval = sched_fork(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_policy;

	retval = perf_event_init_task(p, clone_flags);
	if (retval)
		goto bad_fork_cleanup_policy;
	retval = audit_alloc(p);
	if (retval)
		goto bad_fork_cleanup_perf;
	/* copy all the process information */
	shm_init_task(p);
	retval = security_task_alloc(p, clone_flags);
	if (retval)
		goto bad_fork_cleanup_audit;
	retval = copy_semundo(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_security;
	retval = copy_files(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_semundo;
	retval = copy_fs(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_files;
	retval = copy_sighand(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_fs;
	retval = copy_signal(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_sighand;
	retval = copy_mm(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_signal;
	retval = copy_namespaces(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_mm;
	retval = copy_io(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_namespaces;
	retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
	if (retval)
		goto bad_fork_cleanup_io;
	
    ...
}

3. 프로세스 종료

3.1 프로세스 종료 흐름

  • exit 호출 시 종료 흐름

// user mode
exit

// kernel mode
__arm64_sys_exit_group
do_group_exit
do_exit

  • kill 시그널 수신 시 종료 흐름

//user mode

//kernel mode
el0_svc
do_notify_resume
get_signal
do_signal
do_group_exit
do_exit

  • __arm64_sys_exit_group, do_group_exit : do_exit 함수 호출

//https://elixir.bootlin.com/linux/v5.15.30/source/kernel/exit.c

SYSCALL_DEFINE1(exit_group, int, error_code)
{
	do_group_exit((error_code & 0xff) << 8);
	/* NOTREACHED */
	return 0;
}

do_group_exit(int exit_code)
{
	struct signal_struct *sig = current->signal;
	...
	do_exit(exit_code);
	/* NOTREACHED */
}

  • do_exit : 프로세스를 종료하고 리소스를 반환하는 함수

//https://elixir.bootlin.com/linux/v5.15.30/source/kernel/exit.c

void __noreturn do_exit(long code)
{
	struct task_struct *tsk = current;
	int group_dead;
	/*
	 * We can get here from a kernel oops, sometimes with preemption off.
	 * Start by checking for critical errors.
	 * Then fix up important state like USER_DS and preemption.
	 * Then do everything else.
	 */
	WARN_ON(blk_needs_flush_plug(tsk));
    //인터럽트 발생 시 먼저 처리
    
	if (unlikely(in_interrupt()))			
		panic("Aiee, killing interrupt handler!");
	...
	if (unlikely(tsk->flags & PF_EXITING)) {
		pr_alert("Fixing recursive fault but reboot is needed!\n");
		futex_exit_recursive(tsk);
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule();
	}
	
	io_uring_files_cancel();
	exit_signals(tsk);  /* sets PF_EXITING */

	/* sync mm's RSS info before statistics gathering */
	if (tsk->mm)
		sync_mm_rss(tsk->mm);
	acct_update_integrals(tsk);
    
	...
	tsk->exit_code = code;
	taskstats_exit(tsk, group_dead);
    
    ...
	exit_mm();

	if (group_dead)
		acct_process();
	trace_sched_process_exit(tsk);
	
	exit_sem(tsk);
	exit_shm(tsk);
	exit_files(tsk);
	exit_fs(tsk);
	...
	do_task_dead();
}

4. kernel thread

  • 커널 공간에서만 실행되는 프로세스

  • 백그라운드에서 실행되며 시스템 메모리나 전원 제어

  • 유저 공간과 상호작용하지 않으며 모든 동작을 커널에서 직접 관리

  • 시스템이 부팅될 때 생성되고 시스템이 종료될때 소멸

4.1 워커 스레드 생성


//https://elixir.bootlin.com/linux/v5.10.60/source/kernel/workqueue.c
static struct worker *create_worker(struct worker_pool *pool)
{
	struct worker *worker = NULL;
	...
	worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
					      "kworker/%s", id_buf);
                          // worker_thread 함수 호출
	...
}

static int worker_thread(void *__worker)
{
	struct worker *worker = __worker;
	struct worker_pool *pool = worker->pool;

	/* tell the scheduler that this is a workqueue worker */
	set_pf_worker(true);
woke_up:
	raw_spin_lock_irq(&pool->lock);

4.2 생성 과정


//커널 스레드 생성 요청

kthread_create
kthread_create_on_node
__kthread_create_on_node

//스케줄링
//kthreadd 프로세스

kthreadd
create_kthread
kernel_thread
kernel_clone

4.3 커널 스레드 종류

  • kthreadd 프로세스
    • 모든 커널 스레드의 부모 프로세스
    • kthreadd : 핸들러 함수이며 커널 스레드 생성함
  • ksoftirqd 프로세스
    • Soft IRQ를 위한 프로세스
    • run_ksoftirqd : Soft IRQ 서비스 실행, __do_softirq() 함수에서 깨움
  • threaded IRQ
    • 인터럽트 후반부 처리를 위한 프로세스

0개의 댓글