-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding label support to assembler #36
base: master
Are you sure you want to change the base?
Changes from 7 commits
3ac47c4
dd51af3
56cc71e
1908a60
0b1def2
e1057e1
31503e4
f0f2d55
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
from bisect import bisect | ||
from binascii import hexlify, unhexlify | ||
from builtins import map, next, range, object | ||
from future.builtins import next, bytes | ||
from builtins import next, bytes | ||
import copy | ||
|
||
DEFAULT_FORK = "petersburg" | ||
|
@@ -181,8 +181,7 @@ def operand(self): | |
@operand.setter | ||
def operand(self, value): | ||
if self.operand_size != 0 and value is not None: | ||
mask = (1 << self.operand_size * 8) - 1 | ||
if ~mask & value: | ||
if value.bit_length() > self.operand_size * 8: | ||
raise ValueError("operand should be %d bits long" % (self.operand_size * 8)) | ||
self._operand = value | ||
|
||
|
@@ -329,7 +328,17 @@ def is_arithmetic(self): | |
'ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND', 'SHL', 'SHR', 'SAR'} | ||
|
||
|
||
def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): | ||
def is_push(instr): | ||
return (instr._opcode >= 0x60) and (instr._opcode <= 0x6F) | ||
|
||
def is_digit(operand): | ||
try: | ||
int(operand, 0) | ||
return True | ||
except: | ||
return False | ||
|
||
def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK, fillins={}): | ||
""" Assemble one EVM instruction from its textual representation. | ||
|
||
:param asmcode: assembly code for one instruction | ||
|
@@ -355,13 +364,25 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): | |
instr.pc = pc | ||
if instr.operand_size > 0: | ||
assert len(asmcode) == 2 | ||
instr.operand = int(asmcode[1], 0) | ||
operand = asmcode[1].strip() | ||
if is_push(instr) and not is_digit(operand): | ||
# instantiating a label, fill it with zeros instead | ||
instr.operand = 0 | ||
if operand in fillins: | ||
fillins[operand].append(pc) | ||
else: | ||
fillins[operand] = [pc] | ||
else: | ||
instr.operand = int(asmcode[1], 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Idea: Not sure we need the fillings dict? Instruction will not be able to generate a bytecode until it is "fixedup" |
||
return instr | ||
except: | ||
raise AssembleError("Something wrong at pc %d" % pc) | ||
|
||
def fixup_instr(instr, label_offset): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Being it so simple I vote to do this inline so we do not need to document, maintain, etc and we save 1 func call. |
||
assert is_push(instr) | ||
instr.operand = label_offset | ||
|
||
def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): | ||
def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK): | ||
""" Assemble a sequence of textual representation of EVM instructions | ||
|
||
:param asmcode: assembly code for any number of instructions | ||
|
@@ -390,13 +411,54 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): | |
""" | ||
asmcode = asmcode.split('\n') | ||
asmcode = iter(asmcode) | ||
|
||
# we use a dictionary to record label locations: | ||
labels = {} | ||
# another dictionary to record which instruction | ||
# we need to fill in. | ||
fillins = {} | ||
# we have to traverse the generated instruction twice | ||
# so no use of generator here | ||
instrs = [] | ||
|
||
for line in asmcode: | ||
if not line.strip(): | ||
line = line.strip() | ||
|
||
# skip empty lines | ||
if not line: | ||
continue | ||
instr = assemble_one(line, pc=pc, fork=fork) | ||
yield instr | ||
|
||
# remove comments | ||
index = line.find("#") | ||
if index is not -1: | ||
line = line[:index] | ||
|
||
# skip directives: | ||
if line.find(".") is 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. line.statrswith ? |
||
continue | ||
|
||
# handle labels | ||
if line.endswith(":"): | ||
# this is a label, record it with location (PC) | ||
labels[line[:-1]] = pc | ||
continue | ||
|
||
instr = assemble_one(line, pc=pc, fork=fork, fillins=fillins) | ||
instrs.append(instr) | ||
pc += instr.size | ||
|
||
# fixup instructions | ||
for label in labels: | ||
if label not in fillins.keys(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you can check if the instruction has any operan referring to a label and fix them. Add an |
||
continue | ||
for instr in instrs: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of iterating over al linstructions for each label that qualifies maybe just rework all this do a single iteration over the instructions and fix the ones that needs it ? The instructions with |
||
if instr._pc in fillins[label]: | ||
label_pc = labels[label] | ||
fixup_instr(instr, label_pc) | ||
|
||
# to keep it compatible with existing APIs | ||
for instr in instrs: | ||
yield instr | ||
|
||
def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): | ||
""" Disassemble a single instruction from a bytecode | ||
|
@@ -443,7 +505,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): | |
return instruction | ||
|
||
|
||
def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK): | ||
def disassemble_all(bytecode, pc=1, fork=DEFAULT_FORK): | ||
""" Disassemble all instructions in bytecode | ||
|
||
:param bytecode: an evm bytecode (binary) | ||
|
@@ -513,7 +575,7 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK): | |
return '\n'.join(map(str, disassemble_all(bytecode, pc=pc, fork=fork))) | ||
|
||
|
||
def assemble(asmcode, pc=0, fork=DEFAULT_FORK): | ||
def assemble(asmcode, pc=1, fork=DEFAULT_FORK): | ||
""" Assemble an EVM program | ||
|
||
:param asmcode: an evm assembler program | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alternative instr.semantics == "PUSH"
Also if you really need an
is_push
make one likeis_branch
(you can use opcodes instead of semantics if you want)pyevmasm/pyevmasm/evmasm.py
Lines 305 to 308 in 0933d39