Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding label support to assembler #36

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 73 additions & 11 deletions pyevmasm/evmasm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from bisect import bisect
from binascii import hexlify, unhexlify
from builtins import map, next, range, object
from future.builtins import next, bytes
from builtins import next, bytes
import copy

DEFAULT_FORK = "petersburg"
Expand Down Expand Up @@ -181,8 +181,7 @@ def operand(self):
@operand.setter
def operand(self, value):
if self.operand_size != 0 and value is not None:
mask = (1 << self.operand_size * 8) - 1
if ~mask & value:
if value.bit_length() > self.operand_size * 8:
raise ValueError("operand should be %d bits long" % (self.operand_size * 8))
self._operand = value

Expand Down Expand Up @@ -329,7 +328,17 @@ def is_arithmetic(self):
'ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND', 'SHL', 'SHR', 'SAR'}


def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
def is_push(instr):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alternative instr.semantics == "PUSH"

Also if you really need an is_push make one like is_branch (you can use opcodes instead of semantics if you want)

pyevmasm/pyevmasm/evmasm.py

Lines 305 to 308 in 0933d39

@property
def is_branch(self):
""" True if the instruction is a jump """
return self.semantics in {'JUMP', 'JUMPI'}

return (instr._opcode >= 0x60) and (instr._opcode <= 0x6F)

def is_digit(operand):
try:
int(operand, 0)
return True
except:
return False

def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK, fillins={}):
""" Assemble one EVM instruction from its textual representation.

:param asmcode: assembly code for one instruction
Expand All @@ -355,13 +364,25 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
instr.pc = pc
if instr.operand_size > 0:
assert len(asmcode) == 2
instr.operand = int(asmcode[1], 0)
operand = asmcode[1].strip()
if is_push(instr) and not is_digit(operand):
# instantiating a label, fill it with zeros instead
instr.operand = 0
if operand in fillins:
fillins[operand].append(pc)
else:
fillins[operand] = [pc]
else:
instr.operand = int(asmcode[1], 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idea:

Not sure we need the fillings dict?
Maybe just write whatever there is to the instruction.operand. An int or label(string).
And then consider the instructions with a string operand to need fixup. ?

Instruction will not be able to generate a bytecode until it is "fixedup"

return instr
except:
raise AssembleError("Something wrong at pc %d" % pc)

def fixup_instr(instr, label_offset):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being it so simple I vote to do this inline so we do not need to document, maintain, etc and we save 1 func call.

assert is_push(instr)
instr.operand = label_offset

def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK):
""" Assemble a sequence of textual representation of EVM instructions

:param asmcode: assembly code for any number of instructions
Expand Down Expand Up @@ -390,13 +411,54 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
"""
asmcode = asmcode.split('\n')
asmcode = iter(asmcode)

# we use a dictionary to record label locations:
labels = {}
# another dictionary to record which instruction
# we need to fill in.
fillins = {}
# we have to traverse the generated instruction twice
# so no use of generator here
instrs = []

for line in asmcode:
if not line.strip():
line = line.strip()

# skip empty lines
if not line:
continue
instr = assemble_one(line, pc=pc, fork=fork)
yield instr

# remove comments
index = line.find("#")
if index is not -1:
line = line[:index]

# skip directives:
if line.find(".") is 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line.statrswith ?

continue

# handle labels
if line.endswith(":"):
# this is a label, record it with location (PC)
labels[line[:-1]] = pc
continue

instr = assemble_one(line, pc=pc, fork=fork, fillins=fillins)
instrs.append(instr)
pc += instr.size

# fixup instructions
for label in labels:
if label not in fillins.keys():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need for keys(). label not in fillins is enough I think, no?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you can check if the instruction has any operan referring to a label and fix them.

Add an Instruction.uses_labels() that encapsulates that?

continue
for instr in instrs:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of iterating over al linstructions for each label that qualifies maybe just rework all this do a single iteration over the instructions and fix the ones that needs it ? The instructions with Instruction.uses_labels() true

if instr._pc in fillins[label]:
label_pc = labels[label]
fixup_instr(instr, label_pc)

# to keep it compatible with existing APIs
for instr in instrs:
yield instr

def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
""" Disassemble a single instruction from a bytecode
Expand Down Expand Up @@ -443,7 +505,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
return instruction


def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK):
def disassemble_all(bytecode, pc=1, fork=DEFAULT_FORK):
""" Disassemble all instructions in bytecode

:param bytecode: an evm bytecode (binary)
Expand Down Expand Up @@ -513,7 +575,7 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK):
return '\n'.join(map(str, disassemble_all(bytecode, pc=pc, fork=fork)))


def assemble(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble(asmcode, pc=1, fork=DEFAULT_FORK):
""" Assemble an EVM program

:param asmcode: an evm assembler program
Expand Down
24 changes: 24 additions & 0 deletions tests/test_EVMAssembler.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@ def test_ADD_1(self):
asmcode = EVMAsm.disassemble_hex('0x608040526002610100')
self.assertEqual(asmcode, '''PUSH1 0x80\nBLOCKHASH\nMSTORE\nPUSH1 0x2\nPUSH2 0x100''')

def test_label(self):
bytecode = EVMAsm.assemble_hex("""
Start:
PUSH1 Return
PUSH1 0x11
PUSH1 0x22
PUSH2 Function
JUMP
Return:
JUMPDEST
PUSH1 0x00
MSTORE
PUSH1 0x20
PUSH1 0x00
RETURN
Function:
JUMPDEST
ADD
SWAP1
JUMP
""")
self.assertEqual(bytecode,
'0x600a60116022610013565b60005260206000f35b019056')

def test_STOP(self):
insn = EVMAsm.disassemble_one(b'\x00')
self.assertTrue(insn.mnemonic == 'STOP')
Expand Down