Architecture(ISA : Instruction set architecture)
: The parts of a processor design that one needs to understand for writing assembly/machine code
Microachitecture
: Implementation of the architecture
ex). cache size, core frequency
Machine code
: The byte-level programs that a processor executes
Assembly code
: A text representation of machine code
moveq src, dst
: move "quad-word(64 bits)" from src to dst
(source와 destination은 register가 될 수도, memory가 될 수도 있다)
addq src, dst
: dst += src
subq src, dst
: dst -= src
movq Source, Dest
movb, movw, movl, movq
(move byte, word, double word, quad word)
1, 2, 4, 8 bytes
8, 16, 32, 64 bits
movq src, dst
// Imm, Reg
movq $0x4, %rax // temp = 0x4;
// Imm, Mem
movq $-147, (%rax) // *p = -147
// Reg, Reg
movq %rax, %rdx // temp2 = temp1
// Reg, Mem
movq %rax, (%rdx) // *p = temp
// Mem, Reg
movq (%rax), %rdx // temp = *p
주소를 통해 그 값에 접근하는 것을 역참조(dereference)라고 합니다
(want to jump. D : integer. +D means 'other location')
whatAmI :
movq (%rdi), %rax
movq (%rsi), %rdx
movq %rdx, (%rdi)
movq %rax, (%rsi)
void swap(long *xp, long *yp) {
long t0 = *xp;
long t1 = *yp;
*xp = t1;
*yp = t0;
}
// argument *xp = %rdi, *yp = %rsi
(Rb, Ri) / Mem[Reg[Rb] + Reb[Ri]]
D(Rb, Ri) / Mem[Reg[Rb] + Reg[Ri] + D]
(Rb, Ri, S) / Mem[Reg[Rb] + S * Reg[Ri]]
%rdx : 0xf000
%rcx : 0x0100
일 때,
1). 0x8 (%rdx)
= 0xf000 + 0x8
= 0xf008
2). (%rdx, %rcx)
= 0xf000 + 0x100
= 0xf100
3). (%rdx, %rcx, 4)
= 0xf000 + 4 * 0x100
= 0xf400
4). 0x80(, %rdx, 2)
= 2 * 0xf000 + 0x80
= 0x1e080
leaq Src, Dst
long scale(long x, long y, long z) {
long t = x + 4*y + 12*z;
return t;
}
x in %rdi, y in %rsi, z in %rdx
scale :
leaq (%rdi, %rsi, 4), %rax // x + 4y를 %rax에 저장
leaq (%rdx, %rdx, 2), %rdx // z + 2z를 z에 저장
leaq (%rax, %rdx, 4), %rax // (x+4y) + 4*(3z)를 %rax에 저장
ret
p in %rbx, q in %rdx
Q.
(1). leaq 9(%rdx), %rax
(2). leaq (%rdx, %rbx), %rax
(3). leaq (%rdx, %rbx, 3), %rax
(4). leaq 2(%rbx, %rbx, 7), %rax
(5). leaq 0xE(, %rbx, 3), %rax
(6). leaq 6(%rbx, %rdx, 7), %rax
A.
O (1). t1 = q + 9
O (2). t1 = p + q
O (3). t1 = q + 3 * p
O (4). t1 = (p + 7 * p) + 2 -> 8p + 2
O (5). t1 = (0 + p * 3) + 0xE -> 3p + 14
O (6). t1 = (p + q * 7) + 6
addq src, dst // dst = dst + src
subq src, dst // dst = dst - src
imulq src, dst // dst = dst * src
salq src, dst // dst = dst << src
sarq src, dst // dst = dst >> src
shrq src, dst // dst = dst >> src
xorq src, dst // dst = dst ^ src
andq src, dst // dst = dst & src
orq src, dst // dst = dst | src
incq dst // dst = dst + 1
decq dst // dst = dst - 1
negq dst // dst = -dst
notq dst // dst = ~dst
long arith(long x, long y, long z) {
long t1 = x + y;
long t2 = z + t1;
long t3 = x + 4;
long t4 = y * 48;
long t5 = t3 + t4;
long rval = t2 * t5;
return rval;
}
x in %rdi, y in %rsi, (z, t4) in %rdx, (t1, t2, rval) in %rax, t5 in %rcx
arith :
leaq (%rdi, %rsi), %rax // t1(%rax)에 x + y 저장
addq %rdx, %rax // t2(%rax) = t1 + z
leaq (%rsi, %rsi, 2), %rdx // z에 일단 3y 저장.
salq $4, %rdx // z = 16 * (3y) = 48 * y
leaq 4(%rdi, %rdx), %rcx // t5 = x+4[t3] + 48y[t4]
imulq %rcx, %rax // t2 * t5
gcc -Og p1.c p2.c -o p
다음과 같이 컴파일한다.C code (sum.c)
long plus(long x, long y);
void sumstore(long x, long y, long *dest) {
long t = plus(x, y);
*dest = t;
}
Generated x86-64 Assembly
sumstore :
pushq %rbx // %rbx가 프로그램 스택에 저장(push)되어야 한다는 걸 의미한다
movq %rdx, %rbx
call plus
movq %rax, (%bx)
popq %rbx
ret
*dest = t;
in C code
: Store value t where designated by dest
movq %rax, (%rbx)
: Move 8-byte value to memory (quad words)
0x40059e : 48 89 03
: 4 byte instruction
stored at address 0x40059e