
本記事はPythonで簡単なx86エミュレータを作成します。
前回 ではサブルーチンを呼び出す命令であるcall/retについて学びました。
今回は条件分岐命令で使用されるeflagsの使い方について学んでいきます。
eflagsとは
前回 ではサブルーチンを呼び出す命令であるcall/retについて学びました。
今回は、変数の状態により処理を分岐させる条件分岐命令を行うために使用されるeflagsというフラグレジスタについて学んでいきます。
eflagsは以下のような32ビットの各ビットをそれぞれフラグとして使用します。

上記の中で今回は以下の4つを実装します。
- CF(キャリーフラグ):演算結果で繰り上がりや繰り下がりがあれば1をセット
- ZF(ゼロフラグ):演算結果が0であれば1にセット
- SF(サインフラグ):演算結果で符号が負になれば1をセット
- OF(オーバーフローフラグ):符号付き演算結果で桁あふれが発生したら1をセット
なお、上記の条件に当てはまらなければ0にセット(クリア)されます。
まず、以下のC言語プログラムについて見ていきます。
// sum.c
int sum(int n){
int sum = 0;
int i;
for (i = 1; i <= n; i++){
sum += i;
}
return sum;
}
int main(){
return sum(5);
}
上記は1から与えられた引数までの合計を計算し返すプログラムです。
これを逆アセンブルしたものを以下に記します。
00000000 E831000000 call 0x36 00000005 E9F683FFFF jmp 0xffff8400 0000000A 55 push ebp 0000000B 89E5 mov ebp,esp 0000000D 83EC10 sub esp,byte +0x10 00000010 C745FC00000000 mov dword [ebp-0x4],0x0 00000017 C745F801000000 mov dword [ebp-0x8],0x1 0000001E EB09 jmp short 0x29 00000020 8B45F8 mov eax,[ebp-0x8] 00000023 0145FC add [ebp-0x4],eax 00000026 FF45F8 inc dword [ebp-0x8] 00000029 8B45F8 mov eax,[ebp-0x8] 0000002C 3B4508 cmp eax,[ebp+0x8] 0000002F 7EEF jng 0x20 00000031 8B45FC mov eax,[ebp-0x4] 00000034 C9 leave 00000035 C3 ret 00000036 55 push ebp 00000037 89E5 mov ebp,esp 00000039 6A05 push byte +0x5 0000003B E8CAFFFFFF call 0xa 00000040 83C404 add esp,byte +0x4 00000043 C9 leave 00000044 C3 ret
上記では0x0A~0x35番地までがsum()の処理になり、[ebp-0x8]に変数i、[ebp-0x4]に変数sum、そして[ebp+0x8]に引数がスタックに積まれています。
まず、0x29番地にジャンプし、引数と変数iの値をコピーしたeaxを0x2C番地のcmp命令で比較し、0x2F番地のjng(=jle)で二つの値の大小を確認します。
jngではZFに1がセットされているか、もしくはSFとOFが同じではないかでジャンプするかを決定します。
なお、jngは"Jump if Not Greater"の略で、jleは"Jump if Less or Equal"の略になり、これらのJ**命令(jzやjnzなど)では、各フラグレジスタの状態(0か1か)に応じてジャンプします。
Pythonによるスクリプトの作成
それでは、Pythonでeflagsによる条件分岐命令を実装していきます。
# emulator.py
class ModRM:
def __init__(self):
self.modrm = {
"mod" :0x00,
"opecode" :0x00,
"reg_index" :0x00,
"rm" :0x00,
"sib" :0x00,
"disp8" :0x00,
"disp32" :0x00
}
class Emulator:
def __init__(self):
self.register_name = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"]
self.registers = {
"EAX": 0x00,
"ECX": 0x00,
"EDX": 0x00,
"EBX": 0x00,
"ESP": 0x00,
"EBP": 0x00,
"ESI": 0x00,
"EDI": 0x00
}
self.eflags = 0x00
self.memory = None
self.eip = None
self.instructions = [None for i in range(256)]
def init_instructions(self):
self.instructions[0x01] = self.add_rm32_r32
self.instructions[0x3b] = self.cmp_r32_rm32
self.instructions[0x40] = self.inc_eax
for i in range(8):
self.instructions[0x50+i] = self.push_r32
for i in range(8):
self.instructions[0x58+i] = self.pop_r32
self.instructions[0x68] = self.push_imm32
self.instructions[0x6a] = self.push_imm8
self.instructions[0x70] = self.jo
self.instructions[0x71] = self.jno
self.instructions[0x72] = self.jc
self.instructions[0x73] = self.jnc
self.instructions[0x74] = self.jz
self.instructions[0x75] = self.jnz
self.instructions[0x78] = self.js
self.instructions[0x79] = self.jns
self.instructions[0x7c] = self.jl
self.instructions[0x7e] = self.jle
self.instructions[0x83] = self.code_83
self.instructions[0x89] = self.mov_rm32_r32
self.instructions[0x8b] = self.mov_r32_rm32
for i in range(8):
self.instructions[0xb8 + i] = self.mov_r32_imm32
self.instructions[0xc3] = self.ret
self.instructions[0xc7] = self.mov_rm32_imm32
self.instructions[0xc9] = self.leave
self.instructions[0xe8] = self.call_rel32
self.instructions[0xe9] = self.near_jump
self.instructions[0xeb] = self.short_jump
self.instructions[0xff] = self.code_ff
def create_emu(self, size, eip, esp):
self.eip = eip
self.registers["ESP"] = esp
self.memory = [0x00 for _ in range(size)]
def dump_registers(self):
for i in range(len(self.registers)):
name = self.register_name[i]
print("{} = 0x{:08x}".format(name, self.registers[name]))
print("EIP = 0x{:08x}".format(self.eip))
def mov_r32_imm32(self):
reg = self.get_code8(0) - 0xb8
value = self.get_code32(1)
reg_name = self.register_name[reg]
self.registers[reg_name] = value
self.eip += 5
if self.eip >= 0x100000000:
self.eip ^= 0x100000000
def short_jump(self):
diff = self.get_sign_code8(1)
if diff & 0x80:
diff -= 0x100
self.eip += (diff + 2)
def get_code8(self, index):
code = self.memory[self.eip + index]
if not type(code) == int:
code = int.from_bytes(code, 'little')
return code
def get_sign_code8(self, index):
code = self.memory[self.eip + index]
code = int.from_bytes(code, 'little')
return code & 0xff
def get_code32(self, index):
ret = 0x00
for i in range(4):
ret |= self.get_code8(index + i) << (i * 8)
return ret
def get_sign_code32(self, index):
return self.get_code32(index)
def near_jump(self):
diff = self.get_sign_code32(1)
if diff & 0x80000000:
diff -= 0x100000000
self.eip += (diff + 5)
def parse_modrm(self):
m = ModRM()
code = self.get_code8(0)
m.modrm["mod"] = ((code & 0xc0) >> 6)
m.modrm["opecode"] = m.modrm["reg_index"] = ((code & 0x38) >> 3)
m.modrm["rm"] = code & 0x07
self.eip += 1
if (m.modrm["mod"] != 3 and m.modrm["rm"] == 4):
m.modrm["sib"] = self.get_code8(0)
eip += 1
if (m.modrm["mod"] == 0 and m.modrm["rm"] == 5) or m.modrm["mod"] == 2:
m.modrm["disp32"] = self.get_sign_code32(0)
m.modrm["disp8"] = m.modrm["disp32"] & 0xff
eip += 4
elif m.modrm["mod"] == 1:
m.modrm["disp8"] = m.modrm["disp32"] = self.get_sign_code8(0)
self.eip += 1
return m
def mov_rm32_imm32(self):
self.eip += 1
m = self.parse_modrm()
value = self.get_code32(0)
self.eip += 4
self.set_rm32(m, value)
def set_rm32(self, m, value):
if m.modrm["mod"] == 3:
self.set_register32(m.modrm["rm"], value)
else:
address = self.calc_memory_address(m)
self.set_memory32(address, value)
def set_memory8(self, address, value):
self.memory[address] = value & 0xff
def set_memory32(self, address, value):
for i in range(4):
self.set_memory8(address+i, value >> (i*8))
def calc_memory_address(self, m):
if m.modrm["mod"] == 0:
if m.modrm["rm"] == 4:
print("not implemented ModRM mod = 0, rm = 4")
sys.exit(0)
elif m.modrm["rm"] == 5:
return m.modrm["disp32"]
else:
return self.get_register32(m.modrm["rm"])
elif m.modrm["mod"] == 1:
if m.modrm["rm"] == 4:
print("not implemented ModRM mod = 1, rm = 4")
sys.exit(0)
else:
return self.get_register32(m.modrm["rm"]) + m.modrm["disp8"]
elif m.modrm["mod"] == 2:
if m.modrm["rm"] == 4:
print("not implemented ModRM mod = 2, rm = 4")
sys.exit(0)
else:
return self.get_register32(m.modrm["rm"]) + m.modrm["disp32"]
else:
print("not implemented ModRM mod = 3")
sys.exit(0)
def mov_rm32_r32(self):
self.eip += 1
m = self.parse_modrm()
r32 = self.get_r32(m)
self.set_rm32(m, r32)
def mov_r32_rm32(self):
self.eip += 1
m = self.parse_modrm()
rm32 = self.get_rm32(m)
self.set_r32(m, rm32)
def get_rm32(self, m):
if m.modrm["mod"] == 3:
return self.get_register32(m.modrm["rm"])
else:
address = self.calc_memory_address(m)
return self.get_memory32(address)
def get_memory8(self, address):
return self.memory[address]
def get_memory32(self, address):
ret = 0
for i in range(4):
mem = self.get_memory8(address + i)
if not type(mem) == int:
mem = ord(mem)
ret |= mem << (8*i)
return ret
def set_r32(self, m, value):
self.set_register32(m.modrm["reg_index"], value)
def get_r32(self, m):
return self.get_register32(m.modrm["reg_index"])
def add_rm32_r32(self):
self.eip += 1
m = self.parse_modrm()
r32 = self.get_r32(m)
rm32 = self.get_rm32(m)
self.set_rm32(m, rm32 + r32)
def sub_rm32_imm8(self, m):
rm32 = self.get_rm32(m)
imm8 = self.get_sign_code8(0)
self.eip += 1
result = rm32 - imm8
self.set_rm32(m, result)
self.update_eflags_sub(rm32, imm8, result)
def code_83(self):
self.eip += 1
m = self.parse_modrm()
if m.modrm["opecode"] == 0:
self.add_rm32_imm8(m)
elif m.modrm["opecode"] == 5:
self.sub_rm32_imm8(m)
elif m.modrm["opecode"] == 7:
self.cmp_rm32_imm8(m)
else:
print("not implemented: 83 /{}".format(m.modrm["opecode"]))
sys.exit(1)
def inc_rm32(self, m):
value = self.get_rm32(m)
self.set_rm32(m, value + 1)
def inc_eax(self):
self.registers["EAX"] += 1
self.eip += 1
def code_ff(self):
self.eip += 1
m = self.parse_modrm()
if m.modrm["opecode"] == 0:
self.inc_rm32(m)
else:
print("not implemented: FF /{}".format(m.modrm["opecode"]))
sys.exit(1)
def get_register32(self, index):
reg = self.register_name[index]
return self.registers[reg]
def set_register32(self, index, value):
reg = self.register_name[index]
self.registers[reg] = value
def push_r32(self):
reg = self.get_code8(0) - 0x50
self.push32(self.get_register32(reg))
self.eip += 1
def pop_r32(self):
reg = self.get_code8(0) - 0x58
self.set_register32(reg, self.pop32())
self.eip += 1
def push32(self, value):
esp = self.register_name.index("ESP")
address = self.get_register32(esp) - 4
self.set_register32(esp, address)
self.set_memory32(address, value)
def pop32(self):
esp = self.register_name.index("ESP")
address = self.get_register32(esp)
ret = self.get_memory32(address)
self.set_register32(esp, address + 4)
return ret
def call_rel32(self):
diff = self.get_sign_code32(1)
if diff & 0x80000000:
diff -= 0x100000000
self.push32(self.eip + 5)
self.eip += (diff + 5)
def ret(self):
self.eip = self.pop32()
def leave(self):
ebp = self.get_register32(self.register_name.index("EBP"))
self.set_register32(self.register_name.index("ESP"), ebp)
self.set_register32(self.register_name.index("EBP"), self.pop32())
self.eip += 1
def push_imm8(self):
value = self.get_code8(1)
self.push32(value)
self.eip += 2
def push_imm32(self):
value = self.get_code32(1)
self.push32(value)
self.eip += 5
def add_rm32_imm8(self, m):
rm32 = self.get_rm32(m)
imm8 = self.get_sign_code8(0)
self.eip += 1
self.set_rm32(m, rm32+imm8)
def cmp_r32_rm32(self):
self.eip += 1
m = self.parse_modrm()
r32 = self.get_r32(m)
rm32 = self.get_rm32(m)
result = r32 - rm32
self.update_eflags_sub(r32, rm32, result)
def cmp_rm32_imm8(self, m):
rm32 = self.get_rm32(m)
imm8 = self.get_sign_code8(0)
print(rm32, imm8)
self.eip += 1
result = rm32 - imm8
self.update_eflags_sub(rm32, imm8, result)
def update_eflags_sub(self, v1, v2, result):
sign1 = v1 >> 31
sign2 = v2 >> 31
signr = (result >> 31) & 1
self.set_carry(result >> 32)
self.set_zero(result == 0)
self.set_sign(signr)
self.set_overflow(sign1 != sign2 and sign1 != signr)
def set_carry(self, is_carry):
if is_carry:
self.eflags |= CARRY_FLAG
else:
self.eflags &= ~CARRY_FLAG
def set_zero(self, is_zero):
if is_zero:
self.eflags |= ZERO_FLAG
else:
self.eflags &= ~ZERO_FLAG
def set_sign(self, is_sign):
if is_sign:
self.eflags |= SIGN_FLAG
else:
self.eflags &= ~SIGN_FLAG
def set_overflow(self, is_overflow):
if is_overflow:
self.eflags |= OVERFLOW_FLAG
else:
self.eflags &= ~OVERFLOW_FLAG
def is_carry(self):
return (self.eflags & CARRY_FLAG) != 0
def is_zero(self):
return (self.eflags & ZERO_FLAG) != 0
def is_sign(self):
return (self.eflags & SIGN_FLAG) != 0
def is_overflow(self):
return (self.eflags & OVERFLOW_FLAG) != 0
def j(func):
def wrapper(self, *args, **kwargs):
if func(self, *args, **kwargs):
diff = self.get_sign_code8(1)
else:
diff = 0
self.eip += (diff + 2)
return wrapper
def jn(func):
def wrapper(self, *args, **kwargs):
if func(self, *args, **kwargs):
diff = 0
else:
diff = self.get_sign_code8(1)
self.eip += (diff + 2)
return wrapper
@j
def jc(self):
return self.is_carry()
@jn
def jnc(self):
return self.is_carry()
@j
def js(self):
return self.is_sign()
@jn
def jns(self):
return self.is_sign()
@j
def jz(self):
return self.is_zero()
@jn
def jnz(self):
return self.is_zero()
@j
def jo(self):
return self.is_overflow()
@jn
def jno(self):
return self.is_overflow()
def jl(self):
if self.is_sign() != self.is_overflow():
diff = self.get_sign_code8(1)
if diff & 0x80:
diff -= 0x100
else:
diff = 0
self.eip += (diff + 2)
def jle(self):
if self.is_zero() or self.is_sign() != self.is_overflow():
diff = self.get_sign_code8(1)
if diff & 0x80:
diff -= 0x100
else:
diff = 0
self.eip += (diff + 2)
CARRY_FLAG = 1
ZERO_FLAG = 1 << 6
SIGN_FLAG = 1 << 7
OVERFLOW_FLAG = 1 << 11
mem_size = 1024 * 1024
emu = Emulator()
emu.create_emu(mem_size, 0x7c00, 0x7c00)
binary = open('sum.bin', 'rb')
offset = 0x7c00
while True:
b = binary.read(1)
if b == b'':
break
emu.memory[offset] = b
offset += 1
binary.close()
emu.init_instructions()
while emu.eip < mem_size:
code = emu.get_code8(0)
print("EIP = 0x{:02x}, Code = 0x{:02x}".format(emu.eip, code))
if emu.instructions[code] == None:
print("\n\nNot Implemented: 0x{:02x}".format(code))
break
emu.instructions[code]()
if emu.eip == 0x00:
print("\n\nend of program.\n\n")
break
emu.dump_registers()
動作確認
それでは、上記で作成したスクリプトを実行してみます。
なお、事前にアセンブリ言語のプログラムはbinファイルとしてビルドしておきます。
> python emulator.py EIP = 0x7c00, Code = 0xe8 EIP = 0x7c36, Code = 0x55 EIP = 0x7c37, Code = 0x89 EIP = 0x7c39, Code = 0x6a EIP = 0x7c3b, Code = 0xe8 EIP = 0x7c0a, Code = 0x55 EIP = 0x7c0b, Code = 0x89 EIP = 0x7c0d, Code = 0x83 EIP = 0x7c10, Code = 0xc7 EIP = 0x7c17, Code = 0xc7 EIP = 0x7c1e, Code = 0xeb EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c20, Code = 0x8b EIP = 0x7c23, Code = 0x01 EIP = 0x7c26, Code = 0xff EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c20, Code = 0x8b EIP = 0x7c23, Code = 0x01 EIP = 0x7c26, Code = 0xff EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c20, Code = 0x8b EIP = 0x7c23, Code = 0x01 EIP = 0x7c26, Code = 0xff EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c20, Code = 0x8b EIP = 0x7c23, Code = 0x01 EIP = 0x7c26, Code = 0xff EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c20, Code = 0x8b EIP = 0x7c23, Code = 0x01 EIP = 0x7c26, Code = 0xff EIP = 0x7c29, Code = 0x8b EIP = 0x7c2c, Code = 0x3b EIP = 0x7c2f, Code = 0x7e EIP = 0x7c31, Code = 0x8b EIP = 0x7c34, Code = 0xc9 EIP = 0x7c35, Code = 0xc3 EIP = 0x7c40, Code = 0x83 EIP = 0x7c43, Code = 0xc9 EIP = 0x7c44, Code = 0xc3 EIP = 0x7c05, Code = 0xe9 end of program. EAX = 0x0000000f ECX = 0x00000000 EDX = 0x00000000 EBX = 0x00000000 ESP = 0x00007c00 EBP = 0x00000000 ESI = 0x00000000 EDI = 0x00000000 EIP = 0x00000000
問題なく条件分岐が実行され、eaxに合計値の15(0x0f)が格納できたことが確認できました。