
Architecture(ISA : Instruction set architecture)
: The parts of a processor design that one needs to understand for writing assembly/machine code
Microachitecture
: Implementation of the architecture
ex). cache size, core frequency
Machine code
: The byte-level programs that a processor executes
Assembly code
: A text representation of machine code
moveq src, dst
: move "quad-word(64 bits)" from src to dst
(source와 destination은 register가 될 수도, memory가 될 수도 있다)
addq src, dst
: dst += src
subq src, dst
: dst -= src
movq Source, Dest
movb, movw, movl, movq
(move byte, word, double word, quad word)
1, 2, 4, 8 bytes
8, 16, 32, 64 bits
movq src, dst
// Imm, Reg
movq $0x4, %rax			// temp = 0x4;
// Imm, Mem
movq $-147, (%rax)		// *p = -147
// Reg, Reg
movq %rax, %rdx			// temp2 = temp1
// Reg, Mem
movq %rax, (%rdx)		// *p = temp
// Mem, Reg
movq (%rax), %rdx		// temp = *p
주소를 통해 그 값에 접근하는 것을 역참조(dereference)라고 합니다
(want to jump. D : integer. +D means 'other location')
whatAmI : 
	movq (%rdi), %rax
    movq (%rsi), %rdx
    movq %rdx, (%rdi)
    movq %rax, (%rsi)
void swap(long *xp, long *yp) {
	long t0 = *xp;
    long t1 = *yp;
    *xp = t1;
    *yp = t0;
}
// argument *xp = %rdi, *yp = %rsi
(Rb, Ri) / Mem[Reg[Rb] + Reb[Ri]]
D(Rb, Ri) / Mem[Reg[Rb] + Reg[Ri] + D]
(Rb, Ri, S) / Mem[Reg[Rb] + S * Reg[Ri]]
%rdx : 0xf000
%rcx : 0x0100
일 때,
1). 0x8 (%rdx)
= 0xf000 + 0x8
= 0xf008
2). (%rdx, %rcx)
= 0xf000 + 0x100
= 0xf100
3). (%rdx, %rcx, 4)
= 0xf000 + 4 * 0x100
= 0xf400
4). 0x80(, %rdx, 2)
= 2 * 0xf000 + 0x80
= 0x1e080
leaq Src, Dst
long scale(long x, long y, long z) {
	long t = x + 4*y + 12*z;
    return t;
}
x in %rdi, y in %rsi, z in %rdx
scale :
	leaq (%rdi, %rsi, 4), %rax	// x + 4y를 %rax에 저장
    leaq (%rdx, %rdx, 2), %rdx  // z + 2z를 z에 저장
    leaq (%rax, %rdx, 4), %rax  // (x+4y) + 4*(3z)를 %rax에 저장
    ret
p in %rbx, q in %rdx
Q.
(1). leaq 9(%rdx), %rax
(2). leaq (%rdx, %rbx), %rax
(3). leaq (%rdx, %rbx, 3), %rax
(4). leaq 2(%rbx, %rbx, 7), %rax
(5). leaq 0xE(, %rbx, 3), %rax
(6). leaq 6(%rbx, %rdx, 7), %rax
A.
O (1). t1 = q + 9
O (2). t1 = p + q
O (3). t1 = q + 3 * p
O (4). t1 = (p + 7 * p) + 2		 -> 8p + 2
O (5). t1 = (0 + p * 3) + 0xE	 -> 3p + 14
O (6). t1 = (p + q * 7) + 6
addq	src, dst	// dst = dst + src
subq	src, dst	// dst = dst - src
imulq	src, dst	// dst = dst * src
salq	src, dst	// dst = dst << src
sarq	src, dst	// dst = dst >> src
shrq	src, dst	// dst = dst >> src
xorq	src, dst	// dst = dst ^ src
andq	src, dst	// dst = dst & src
orq		src, dst	// dst = dst | src
incq	dst		// dst = dst + 1
decq	dst		// dst = dst - 1
negq	dst		// dst = -dst
notq	dst 	// dst = ~dst
long arith(long x, long y, long z) {
	long t1 = x + y;
    long t2 = z + t1;
    long t3 = x + 4;
    long t4 = y * 48;
    long t5 = t3 + t4;
    long rval = t2 * t5;
    return rval;
}
x in %rdi, y in  %rsi, (z, t4) in %rdx, (t1, t2, rval) in %rax, t5 in %rcx
arith :
	leaq (%rdi, %rsi), %rax		// t1(%rax)에 x + y 저장
    addq %rdx, %rax				// t2(%rax) = t1 + z
    leaq (%rsi, %rsi, 2), %rdx	// z에 일단 3y 저장.
    salq $4, %rdx				// z = 16 * (3y) = 48 * y
    leaq 4(%rdi, %rdx), %rcx	// t5 = x+4[t3] + 48y[t4]
	imulq %rcx, %rax			// t2 * t5
gcc -Og p1.c p2.c -o p 다음과 같이 컴파일한다.C code (sum.c)
long plus(long x, long y);
void sumstore(long x, long y, long *dest) {
	long t = plus(x, y);
    *dest = t;
}
Generated x86-64 Assembly
sumstore :
	pushq	%rbx	// %rbx가 프로그램 스택에 저장(push)되어야 한다는 걸 의미한다
    movq	%rdx, %rbx
    call 	plus
    movq	%rax, (%bx)
    popq	%rbx
    ret
*dest = t; in C code
: Store value t where designated by dest
movq %rax, (%rbx)
: Move 8-byte value to memory (quad words)
 0x40059e : 48 89 03
: 4 byte instruction
stored at address 0x40059e