From 09d0864d492ada29b7e5adf35699b477c8c66531 Mon Sep 17 00:00:00 2001 From: Anuda Weerasinghe Date: Wed, 26 Apr 2023 21:13:28 -0400 Subject: [PATCH 1/7] adds asttokens and progress update --- .gitignore | 1 + enum_issue_progress.md | 82 ++++++++++++++++++++++++++++++++++++++++++ enum_scope.txt | 64 +++++++++++++++++++++++++++++++++ enum_test.py | 7 ++++ vulture/core.py | 5 +++ 5 files changed, 159 insertions(+) create mode 100644 enum_issue_progress.md create mode 100644 enum_scope.txt create mode 100644 enum_test.py diff --git a/.gitignore b/.gitignore index df191195..0ec272f6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ vulture.egg-info/ .pytest_cache/ .tox/ .venv/ +.vscode/ diff --git a/enum_issue_progress.md b/enum_issue_progress.md new file mode 100644 index 00000000..184b4b96 --- /dev/null +++ b/enum_issue_progress.md @@ -0,0 +1,82 @@ +## Our progress thus far on [issue #304](https://github.com/jendrikseipp/vulture/issues/304) + +- We traced the code path by running the debugger on the source code. +- Identified that Vulture generates an AST of the provided source code in the `scan` function in `core.py` +- Each node in the AST is then visited by the `visit` function +- The `visit` function decides what helper to use based on the type of node +- For this issue, we specifically care about the `visit_Call` and `visit_ClassDef` functions because these are invoked when the `list()` function is called, and when a class is defined with the `Enum` subclass. + +Based on these findings, here is a high level approach of the solution we will implement: + +- Extend the ast with scoping information using the [asttokens](https://asttokens.readthedocs.io/en/latest/) package. +- In `visit_Call`, check if the `node.func.id=='list'` - are we calling the list function? +- If so, get the argument(`node.args[0].id`) and use the scoping information provided by asttokens to check if the argument is a class +- If so, use the scope info about the class to check if `Enum` is a superclass. +- If so, then add all the fields defined in the class to `defined_variables` + +Debug output is attached below. See `enum_scope.txt` for more details. + +``` +Module( + body=[ + ImportFrom( + module='enum', + names=[ + alias(name='Enum')], + level=0), + ClassDef( + name='E', + bases=[ + Name(id='Enum', ctx=Load())], + keywords=[], + body=[ + Assign( + targets=[ + Name(id='A', ctx=Store())], + value=Constant(value=1)), + Assign( + targets=[ + Name(id='B', ctx=Store())], + value=Constant(value=2))], + decorator_list=[])], + type_ignores=[]) +Module( + body=[ + Import( + names=[ + alias(name='enum')]), + ClassDef( + name='EnumWhitelist', + bases=[ + Attribute( + value=Name(id='enum', ctx=Load()), + attr='Enum', + ctx=Load())], + keywords=[], + body=[ + Assign( + targets=[ + Name(id='EnumWhitelist', ctx=Store())], + value=Constant(value=1))], + decorator_list=[]), + Expr( + value=Attribute( + value=Attribute( + value=Name(id='EnumWhitelist', ctx=Load()), + attr='EnumWhitelist', + ctx=Load()), + attr='_name_', + ctx=Load())), + Expr( + value=Attribute( + value=Attribute( + value=Name(id='EnumWhitelist', ctx=Load()), + attr='EnumWhitelist', + ctx=Load()), + attr='_value_', + ctx=Load()))], + type_ignores=[]) +enum_test.py:3: unused class 'E' (60% confidence) +enum_test.py:4: unused variable 'A' (60% confidence) +enum_test.py:5: unused variable 'B' (60% confidence) +``` diff --git a/enum_scope.txt b/enum_scope.txt new file mode 100644 index 00000000..e5f4900b --- /dev/null +++ b/enum_scope.txt @@ -0,0 +1,64 @@ +AST without scoping info: +Module( + body=[ + ImportFrom( + module='enum', + names=[ + alias(name='Enum')], + level=0), + ClassDef( + name='E', + bases=[ + Name(id='Enum', ctx=Load())], + keywords=[], + body=[ + Assign( + targets=[ + Name(id='A', ctx=Store())], + value=Constant(value=1)), + Assign( + targets=[ + Name(id='B', ctx=Store())], + value=Constant(value=2))], + decorator_list=[])], + type_ignores=[]) +AST without scoping info: +Module( + body=[ + Import( + names=[ + alias(name='enum')]), + ClassDef( + name='EnumWhitelist', + bases=[ + Attribute( + value=Name(id='enum', ctx=Load()), + attr='Enum', + ctx=Load())], + keywords=[], + body=[ + Assign( + targets=[ + Name(id='EnumWhitelist', ctx=Store())], + value=Constant(value=1))], + decorator_list=[]), + Expr( + value=Attribute( + value=Attribute( + value=Name(id='EnumWhitelist', ctx=Load()), + attr='EnumWhitelist', + ctx=Load()), + attr='_name_', + ctx=Load())), + Expr( + value=Attribute( + value=Attribute( + value=Name(id='EnumWhitelist', ctx=Load()), + attr='EnumWhitelist', + ctx=Load()), + attr='_value_', + ctx=Load()))], + type_ignores=[]) +enum_test.py:3: unused class 'E' (60% confidence) +enum_test.py:4: unused variable 'A' (60% confidence) +enum_test.py:5: unused variable 'B' (60% confidence) diff --git a/enum_test.py b/enum_test.py new file mode 100644 index 00000000..ad39e090 --- /dev/null +++ b/enum_test.py @@ -0,0 +1,7 @@ +from enum import Enum + +class E(Enum): + A = 1 + B = 2 + +# list(E) \ No newline at end of file diff --git a/vulture/core.py b/vulture/core.py index 16c71948..cb274b3b 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -5,6 +5,7 @@ import re import string import sys +from asttokens import asttokens from vulture import lines from vulture import noqa @@ -242,6 +243,10 @@ def handle_syntax_error(e): if sys.version_info >= (3, 8) # type_comments requires 3.8+ else ast.parse(code, filename=str(self.filename)) ) + tokens = asttokens.ASTTokens(code, parse=True) + print("AST without scoping info:") + print(ast.dump(node, indent=4)) + except SyntaxError as err: handle_syntax_error(err) except ValueError as err: From af7c890abbc130ffde469156e7a35a2b025c3799 Mon Sep 17 00:00:00 2001 From: addykan Date: Fri, 5 May 2023 17:00:42 -0400 Subject: [PATCH 2/7] Wrote initial enum detection code --- enum_test.py | 2 +- vulture/core.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/enum_test.py b/enum_test.py index ad39e090..92b831a3 100644 --- a/enum_test.py +++ b/enum_test.py @@ -4,4 +4,4 @@ class E(Enum): A = 1 B = 2 -# list(E) \ No newline at end of file +list(E) \ No newline at end of file diff --git a/vulture/core.py b/vulture/core.py index cb274b3b..8bb387bc 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -220,6 +220,8 @@ def get_list(typ): self.code = [] self.found_dead_code_or_error = False + self.enum_class_vars = dict() + def scan(self, code, filename=""): filename = Path(filename) self.code = code.splitlines() @@ -556,6 +558,12 @@ def visit_Call(self, node): ): self._handle_new_format_string(node.func.value.s) + if node.func.id=='list': + arg = node.args[0].id + if arg in self.enum_class_vars: + self.used_names.update(self.enum_class_vars[arg]) + + def _handle_new_format_string(self, s): def is_identifier(name): return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name)) @@ -599,6 +607,24 @@ def visit_ClassDef(self, node): self._define( self.defined_classes, node.name, node, ignore=_ignore_class ) + if self._subclassesEnum(node): + newKey = node.name + classVariables = [] + for stmt in node.body: + if isinstance(stmt, ast.Assign): + for target in stmt.targets: + classVariables.append(target.id) + self.enum_class_vars[newKey] = classVariables + + def _subclassesEnum(self, node): + for base in node.bases: + if isinstance(base, ast.Name): + if base.id.lower() == 'enum': + return True + elif isinstance(base, ast.Attribute): + if base.value.id.lower() == 'enum': + return True + return False def visit_FunctionDef(self, node): decorator_names = [ From d11e6b322c716ca65074faced1ce2fc8707cac74 Mon Sep 17 00:00:00 2001 From: addykan Date: Fri, 5 May 2023 17:18:13 -0400 Subject: [PATCH 3/7] Cleaned up debug output for unused enum bug --- vulture/core.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/vulture/core.py b/vulture/core.py index 8bb387bc..f5dc2920 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -246,9 +246,6 @@ def handle_syntax_error(e): else ast.parse(code, filename=str(self.filename)) ) tokens = asttokens.ASTTokens(code, parse=True) - print("AST without scoping info:") - print(ast.dump(node, indent=4)) - except SyntaxError as err: handle_syntax_error(err) except ValueError as err: @@ -558,12 +555,11 @@ def visit_Call(self, node): ): self._handle_new_format_string(node.func.value.s) - if node.func.id=='list': + if (isinstance(node.func, ast.Name) and node.func.id == 'list' and len(node.args) > 0 and isinstance(node.args[0], ast.Name)): arg = node.args[0].id if arg in self.enum_class_vars: self.used_names.update(self.enum_class_vars[arg]) - def _handle_new_format_string(self, s): def is_identifier(name): return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name)) @@ -607,7 +603,7 @@ def visit_ClassDef(self, node): self._define( self.defined_classes, node.name, node, ignore=_ignore_class ) - if self._subclassesEnum(node): + if self._subclassesEnum(node): newKey = node.name classVariables = [] for stmt in node.body: @@ -624,7 +620,7 @@ def _subclassesEnum(self, node): elif isinstance(base, ast.Attribute): if base.value.id.lower() == 'enum': return True - return False + return False def visit_FunctionDef(self, node): decorator_names = [ From b36da88906eb0f5c9d2846c194710ef60156f76a Mon Sep 17 00:00:00 2001 From: pm3512 Date: Fri, 5 May 2023 21:14:47 -0400 Subject: [PATCH 4/7] recursive enum check --- vulture/core.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/vulture/core.py b/vulture/core.py index f5dc2920..3e691ba3 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -5,7 +5,6 @@ import re import string import sys -from asttokens import asttokens from vulture import lines from vulture import noqa @@ -245,7 +244,7 @@ def handle_syntax_error(e): if sys.version_info >= (3, 8) # type_comments requires 3.8+ else ast.parse(code, filename=str(self.filename)) ) - tokens = asttokens.ASTTokens(code, parse=True) + print(ast.dump(node, indent=4)) except SyntaxError as err: handle_syntax_error(err) except ValueError as err: @@ -555,7 +554,12 @@ def visit_Call(self, node): ): self._handle_new_format_string(node.func.value.s) - if (isinstance(node.func, ast.Name) and node.func.id == 'list' and len(node.args) > 0 and isinstance(node.args[0], ast.Name)): + if ( + isinstance(node.func, ast.Name) + and node.func.id == "list" + and len(node.args) > 0 + and isinstance(node.args[0], ast.Name) + ): arg = node.args[0].id if arg in self.enum_class_vars: self.used_names.update(self.enum_class_vars[arg]) @@ -615,10 +619,13 @@ def visit_ClassDef(self, node): def _subclassesEnum(self, node): for base in node.bases: if isinstance(base, ast.Name): - if base.id.lower() == 'enum': + if ( + base.id.lower() == "enum" + or base.id in self.enum_class_vars + ): return True elif isinstance(base, ast.Attribute): - if base.value.id.lower() == 'enum': + if base.value.id.lower() == "enum": return True return False From d2cb7030bf3168c27166709de80e84bb55270386 Mon Sep 17 00:00:00 2001 From: pm3512 Date: Sat, 6 May 2023 18:41:22 -0400 Subject: [PATCH 5/7] added for loop handling --- vulture/core.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vulture/core.py b/vulture/core.py index 3e691ba3..b5a74b9f 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -244,7 +244,6 @@ def handle_syntax_error(e): if sys.version_info >= (3, 8) # type_comments requires 3.8+ else ast.parse(code, filename=str(self.filename)) ) - print(ast.dump(node, indent=4)) except SyntaxError as err: handle_syntax_error(err) except ValueError as err: @@ -554,9 +553,11 @@ def visit_Call(self, node): ): self._handle_new_format_string(node.func.value.s) + # handle enum.Enum members + iter_functions = ["list", "tuple", "set"] if ( isinstance(node.func, ast.Name) - and node.func.id == "list" + and node.func.id in iter_functions and len(node.args) > 0 and isinstance(node.args[0], ast.Name) ): @@ -619,10 +620,7 @@ def visit_ClassDef(self, node): def _subclassesEnum(self, node): for base in node.bases: if isinstance(base, ast.Name): - if ( - base.id.lower() == "enum" - or base.id in self.enum_class_vars - ): + if base.id.lower() == "enum": return True elif isinstance(base, ast.Attribute): if base.value.id.lower() == "enum": @@ -695,6 +693,13 @@ def visit_Assign(self, node): def visit_While(self, node): self._handle_conditional_node(node, "while") + def visit_For(self, node): + if ( + isinstance(node.iter, ast.Name) + and node.iter.id in self.enum_class_vars + ): + self.used_names.update(self.enum_class_vars[node.iter.id]) + def visit_MatchClass(self, node): for kwd_attr in node.kwd_attrs: self.used_names.add(kwd_attr) From 755f84653891a6bb54f0e34b911f935b6bde3032 Mon Sep 17 00:00:00 2001 From: pm3512 Date: Sat, 6 May 2023 18:41:50 -0400 Subject: [PATCH 6/7] added tests --- tests/test_scavenging.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_scavenging.py b/tests/test_scavenging.py index 27154cd6..6a287547 100644 --- a/tests/test_scavenging.py +++ b/tests/test_scavenging.py @@ -899,3 +899,38 @@ class Color(Enum): check(v.unused_classes, []) check(v.unused_vars, ["BLUE"]) + + +def test_enum_list(v): + v.scan( + """\ +import enum +class E(enum.Enum): + A = 1 + B = 2 + +print(list(E)) +""" + ) + + check(v.defined_classes, ["E"]) + check(v.defined_vars, ["A", "B"]) + check(v.unused_vars, []) + + +def test_enum_for(v): + v.scan( + """\ +import enum +class E(enum.Enum): + A = 1 + B = 2 + +for e in E: + print(e) +""" + ) + + check(v.defined_classes, ["E"]) + check(v.defined_vars, ["A", "B", "e"]) + check(v.unused_vars, []) From 300435c2a3b84d10dea8ef4563263205fa499123 Mon Sep 17 00:00:00 2001 From: Anuda Weerasinghe Date: Sat, 6 May 2023 18:50:05 -0400 Subject: [PATCH 7/7] adds comments describing enum iteration changes --- enum_issue_progress.md | 82 ------------------------------------------ enum_scope.txt | 64 --------------------------------- enum_test.py | 7 ---- vulture/core.py | 7 +++- 4 files changed, 6 insertions(+), 154 deletions(-) delete mode 100644 enum_issue_progress.md delete mode 100644 enum_scope.txt delete mode 100644 enum_test.py diff --git a/enum_issue_progress.md b/enum_issue_progress.md deleted file mode 100644 index 184b4b96..00000000 --- a/enum_issue_progress.md +++ /dev/null @@ -1,82 +0,0 @@ -## Our progress thus far on [issue #304](https://github.com/jendrikseipp/vulture/issues/304) - -- We traced the code path by running the debugger on the source code. -- Identified that Vulture generates an AST of the provided source code in the `scan` function in `core.py` -- Each node in the AST is then visited by the `visit` function -- The `visit` function decides what helper to use based on the type of node -- For this issue, we specifically care about the `visit_Call` and `visit_ClassDef` functions because these are invoked when the `list()` function is called, and when a class is defined with the `Enum` subclass. - -Based on these findings, here is a high level approach of the solution we will implement: - -- Extend the ast with scoping information using the [asttokens](https://asttokens.readthedocs.io/en/latest/) package. -- In `visit_Call`, check if the `node.func.id=='list'` - are we calling the list function? -- If so, get the argument(`node.args[0].id`) and use the scoping information provided by asttokens to check if the argument is a class -- If so, use the scope info about the class to check if `Enum` is a superclass. -- If so, then add all the fields defined in the class to `defined_variables` - -Debug output is attached below. See `enum_scope.txt` for more details. - -``` -Module( - body=[ - ImportFrom( - module='enum', - names=[ - alias(name='Enum')], - level=0), - ClassDef( - name='E', - bases=[ - Name(id='Enum', ctx=Load())], - keywords=[], - body=[ - Assign( - targets=[ - Name(id='A', ctx=Store())], - value=Constant(value=1)), - Assign( - targets=[ - Name(id='B', ctx=Store())], - value=Constant(value=2))], - decorator_list=[])], - type_ignores=[]) -Module( - body=[ - Import( - names=[ - alias(name='enum')]), - ClassDef( - name='EnumWhitelist', - bases=[ - Attribute( - value=Name(id='enum', ctx=Load()), - attr='Enum', - ctx=Load())], - keywords=[], - body=[ - Assign( - targets=[ - Name(id='EnumWhitelist', ctx=Store())], - value=Constant(value=1))], - decorator_list=[]), - Expr( - value=Attribute( - value=Attribute( - value=Name(id='EnumWhitelist', ctx=Load()), - attr='EnumWhitelist', - ctx=Load()), - attr='_name_', - ctx=Load())), - Expr( - value=Attribute( - value=Attribute( - value=Name(id='EnumWhitelist', ctx=Load()), - attr='EnumWhitelist', - ctx=Load()), - attr='_value_', - ctx=Load()))], - type_ignores=[]) -enum_test.py:3: unused class 'E' (60% confidence) -enum_test.py:4: unused variable 'A' (60% confidence) -enum_test.py:5: unused variable 'B' (60% confidence) -``` diff --git a/enum_scope.txt b/enum_scope.txt deleted file mode 100644 index e5f4900b..00000000 --- a/enum_scope.txt +++ /dev/null @@ -1,64 +0,0 @@ -AST without scoping info: -Module( - body=[ - ImportFrom( - module='enum', - names=[ - alias(name='Enum')], - level=0), - ClassDef( - name='E', - bases=[ - Name(id='Enum', ctx=Load())], - keywords=[], - body=[ - Assign( - targets=[ - Name(id='A', ctx=Store())], - value=Constant(value=1)), - Assign( - targets=[ - Name(id='B', ctx=Store())], - value=Constant(value=2))], - decorator_list=[])], - type_ignores=[]) -AST without scoping info: -Module( - body=[ - Import( - names=[ - alias(name='enum')]), - ClassDef( - name='EnumWhitelist', - bases=[ - Attribute( - value=Name(id='enum', ctx=Load()), - attr='Enum', - ctx=Load())], - keywords=[], - body=[ - Assign( - targets=[ - Name(id='EnumWhitelist', ctx=Store())], - value=Constant(value=1))], - decorator_list=[]), - Expr( - value=Attribute( - value=Attribute( - value=Name(id='EnumWhitelist', ctx=Load()), - attr='EnumWhitelist', - ctx=Load()), - attr='_name_', - ctx=Load())), - Expr( - value=Attribute( - value=Attribute( - value=Name(id='EnumWhitelist', ctx=Load()), - attr='EnumWhitelist', - ctx=Load()), - attr='_value_', - ctx=Load()))], - type_ignores=[]) -enum_test.py:3: unused class 'E' (60% confidence) -enum_test.py:4: unused variable 'A' (60% confidence) -enum_test.py:5: unused variable 'B' (60% confidence) diff --git a/enum_test.py b/enum_test.py deleted file mode 100644 index 92b831a3..00000000 --- a/enum_test.py +++ /dev/null @@ -1,7 +0,0 @@ -from enum import Enum - -class E(Enum): - A = 1 - B = 2 - -list(E) \ No newline at end of file diff --git a/vulture/core.py b/vulture/core.py index b5a74b9f..145fa6c6 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -219,7 +219,7 @@ def get_list(typ): self.code = [] self.found_dead_code_or_error = False - self.enum_class_vars = dict() + self.enum_class_vars = dict() # stores variables defined in enum classes def scan(self, code, filename=""): filename = Path(filename) @@ -608,6 +608,7 @@ def visit_ClassDef(self, node): self._define( self.defined_classes, node.name, node, ignore=_ignore_class ) + # if subclasses enum add class variables to enum_class_vars if self._subclassesEnum(node): newKey = node.name classVariables = [] @@ -618,6 +619,9 @@ def visit_ClassDef(self, node): self.enum_class_vars[newKey] = classVariables def _subclassesEnum(self, node): + ''' + Checks if a class has Enum as a superclass + ''' for base in node.bases: if isinstance(base, ast.Name): if base.id.lower() == "enum": @@ -694,6 +698,7 @@ def visit_While(self, node): self._handle_conditional_node(node, "while") def visit_For(self, node): + # Handle iterating over Enum if ( isinstance(node.iter, ast.Name) and node.iter.id in self.enum_class_vars