atom · winstliu · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021
diff --git a/grammars/python.cson b/grammars/python.cson
@@ -38,51 +38,54 @@
     'include': '#language_variables'
   }
   {
-    'match': '\\b(?i:(0x\\h*)L)'
+    'match': '\\b(?i:(0x\\h(?:_?\\h)*)L)\\b'
     'name': 'constant.numeric.integer.long.hexadecimal.python'
   }
   {
-    'match': '\\b(?i:(0x\\h*))'
+    'match': '\\b(?i:(0x\\h(?:_?\\h)*))\\b'
     'name': 'constant.numeric.integer.hexadecimal.python'
   }
   {
-    'match': '\\b(?i:(0o?[0-7]+)L)'
+    'match': '\\b(?i:(0o?[0-7](?:_?[0-7])*)L)\\b'
     'name': 'constant.numeric.integer.long.octal.python'
   }
   {
-    'match': '\\b(?i:(0o?[0-7]+))'
+    'match': '\\b(?i:(0o?[0-7](?:_?[0-7])*))\\b'
     'name': 'constant.numeric.integer.octal.python'
   }
   {
-    'match': '\\b(?i:(0b[01]+)L)',
+    'match': '\\b(?i:(0b[01](?:_?[01])*)L)\\b'
     'name': 'constant.numeric.integer.long.binary.python'
   }
   {
-    'match': '\\b(?i:(0b[01]+))',
+    'match': '\\b(?i:(0b[01](?:_?[01])*))\\b'
     'name': 'constant.numeric.integer.binary.python'
   }
   {
-    'match': '\\b(?i:(((\\d+(\\.(?=[^a-zA-Z_])\\d*)?|(?<=[^0-9a-zA-Z_])\\.\\d+)(e[\\-\\+]?\\d+)?))J)'
+    # Use a custom word boundary for the second half as floats beginning with . won't be matched by \b
+    'match': '(?i:(((\\b\\d(?:_?\\d)*(\\.(\\d(?:_?\\d)*)?)?|(?<=[^0-9a-zA-Z_]|^)\\.\\d(?:_?\\d)*)(e[\\-\\+]?\\d(?:_?\\d)*)?))J)\\b'
     'name': 'constant.numeric.complex.python'
   }
   {
-    'match': '\\b(?i:(\\d+\\.\\d*(e[\\-\\+]?\\d+)?))(?=[^a-zA-Z_])'
+    # Use a custom word boundary as floats ending with . won't be matched by \b
+    'match': '\\b(?i:(\\d(?:_?\\d)*\\.(\\d(?:_?\\d)*)?(e[\\-\\+]?\\d(?:_?\\d)*)?))(?=[^a-zA-Z_]|$)'
     'name': 'constant.numeric.float.python'
   }
   {
-    'match': '(?<=[^0-9a-zA-Z_])(?i:(\\.\\d+(e[\\-\\+]?\\d+)?))'
+    # Use a custom word boundary as floats beginning with . won't be matched by \b
+    'match': '(?<=[^0-9a-zA-Z_]|^)(?i:(\\.\\d(?:_?\\d)*(e[\\-\\+]?\\d(?:_?\\d)*)?))\\b'
     'name': 'constant.numeric.float.python'
   }
   {
-    'match': '\\b(?i:(\\d+e[\\-\\+]?\\d+))'
+    'match': '\\b(?i:(\\d(?:_?\\d)*e[\\-\\+]?\\d(?:_?\\d)*))\\b'
     'name': 'constant.numeric.float.python'
   }
   {
-    'match': '\\b(?i:([1-9]+[0-9]*|0)L)'
+    'match': '\\b(?i:([1-9](?:_?\\d)*|0)L)\\b'
     'name': 'constant.numeric.integer.long.decimal.python'
   }
   {
-    'match': '\\b([1-9]+[0-9]*|0)'
+    'match': '\\b(?:[1-9](?:_?\\d)*|0)\\b'
     'name': 'constant.numeric.integer.decimal.python'
   }
   {

diff --git a/spec/python-spec.coffee b/spec/python-spec.coffee
@@ -31,6 +31,251 @@ describe "Python grammar", ->
 
     expect(tokens[0]).toEqual value: 'yield from', scopes: ['source.python', 'keyword.control.statement.python']
 
+  describe "numbers", ->
+    describe "integers", ->
+      describe "binary", ->
+        it "tokenizes binary numbers", ->
+          {tokens} = grammar.tokenizeLine '0b01'
+
+          expect(tokens[0]).toEqual value: '0b01', scopes: ['source.python', 'constant.numeric.integer.binary.python']
+
+        it "tokenizes binary numbers with separators", ->
+          {tokens} = grammar.tokenizeLine '0b0000_0100_1101'
+
+          expect(tokens[0]).toEqual value: '0b0000_0100_1101', scopes: ['source.python', 'constant.numeric.integer.binary.python']
+
+        it "does not tokenize binary numbers with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '0b0000__0100_1101'
+
+          expect(tokens[0]).toEqual value: '0b0000__0100_1101', scopes: ['source.python']
+
+        it "does not tokenize binary numbers with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '0b_0000_0100_1101'
+
+          expect(tokens[0]).toEqual value: '0b_0000_0100_1101', scopes: ['source.python']
+
+        it "does not tokenize binary numbers with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '0b0000_0100_1101_'
+
+          expect(tokens[0]).toEqual value: '0b0000_0100_1101_', scopes: ['source.python']
+
+        it "does not tokenize non-binary numbers", ->
+          {tokens} = grammar.tokenizeLine '0b7'
+
+          expect(tokens[0]).toEqual value: '0b7', scopes: ['source.python']
+
+      describe "octal", ->
+        it "tokenizes octal numbers", ->
+          {tokens} = grammar.tokenizeLine '0o72'
+
+          expect(tokens[0]).toEqual value: '0o72', scopes: ['source.python', 'constant.numeric.integer.octal.python']
+
+        it "tokenizes octal numbers with separators", ->
+          {tokens} = grammar.tokenizeLine '0o236_777_644'
+
+          expect(tokens[0]).toEqual value: '0o236_777_644', scopes: ['source.python', 'constant.numeric.integer.octal.python']
+
+        it "does not tokenize octal numbers with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '0o236__777_644'
+
+          expect(tokens[0]).toEqual value: '0o236__777_644', scopes: ['source.python']
+
+        it "does not tokenize octal numbers with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '0o_236_777_644'
+
+          expect(tokens[0]).toEqual value: '0o_236_777_644', scopes: ['source.python']
+
+        it "does not tokenize octal numbers with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '0o236_777_644_'
+
+          expect(tokens[0]).toEqual value: '0o236_777_644_', scopes: ['source.python']
+
+        it "does not tokenize non-octal numbers", ->
+          {tokens} = grammar.tokenizeLine '0o8'
+
+          expect(tokens[0]).toEqual value: '0o8', scopes: ['source.python']
+
+      describe "hexadecimal", ->
+        it "tokenizes hexadecimal numbers", ->
+          {tokens} = grammar.tokenizeLine '0xe3'
+
+          expect(tokens[0]).toEqual value: '0xe3', scopes: ['source.python', 'constant.numeric.integer.hexadecimal.python']
+
+        it "tokenizes hexadecimal numbers with separators", ->
+          {tokens} = grammar.tokenizeLine '0xdead_beef_1337'
+
+          expect(tokens[0]).toEqual value: '0xdead_beef_1337', scopes: ['source.python', 'constant.numeric.integer.hexadecimal.python']
+
+        it "does not tokenize hexadecimal numbers with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '0xdead__beef_1337'
+
+          expect(tokens[0]).toEqual value: '0xdead__beef_1337', scopes: ['source.python']
+
+        it "does not tokenize hexadecimal numbers with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '0x_dead_beef_1337'
+
+          expect(tokens[0]).toEqual value: '0x_dead_beef_1337', scopes: ['source.python']
+
+        it "does not tokenize hexadecimal numbers with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '0xdead_beef_1337_'
+
+          expect(tokens[0]).toEqual value: '0xdead_beef_1337_', scopes: ['source.python']
+
+        it "does not tokenize non-hexadecimal numbers", ->
+          {tokens} = grammar.tokenizeLine '0xg'
+
+          expect(tokens[0]).toEqual value: '0xg', scopes: ['source.python']
+
+      describe "decimal", ->
+        it "tokenizes decimal numbers", ->
+          {tokens} = grammar.tokenizeLine '92'
+
+          expect(tokens[0]).toEqual value: '92', scopes: ['source.python', 'constant.numeric.integer.decimal.python']
+
+        it "tokenizes decimal numbers with separators", ->
+          {tokens} = grammar.tokenizeLine '123_456_789'
+
+          expect(tokens[0]).toEqual value: '123_456_789', scopes: ['source.python', 'constant.numeric.integer.decimal.python']
+
+        it "does not tokenize decimal numbers with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '123__456_789'
+
+          expect(tokens[0]).toEqual value: '123__456_789', scopes: ['source.python']
+
+        it "does not tokenize decimal numbers with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '_123_456_789'
+
+          expect(tokens[0]).toEqual value: '_123_456_789', scopes: ['source.python']
+
+        it "does not tokenize decimal numbers with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '123_456_789_'
+
+          expect(tokens[0]).toEqual value: '123_456_789_', scopes: ['source.python']
+
+        it "does not tokenize non-decimal numbers", ->
+          {tokens} = grammar.tokenizeLine '3f'
+
+          expect(tokens[0]).toEqual value: '3f', scopes: ['source.python']
+
+    describe "floats", ->
+      describe "decimal", ->
+        it "tokenizes decimal floats", ->
+          {tokens} = grammar.tokenizeLine '32.68'
+
+          expect(tokens[0]).toEqual value: '32.68', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes decimal floats without a fractional portion", ->
+          {tokens} = grammar.tokenizeLine '32.'
+
+          expect(tokens[0]).toEqual value: '32.', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes decimal floats without a whole portion", ->
+          {tokens} = grammar.tokenizeLine '.68'
+
+          expect(tokens[0]).toEqual value: '.68', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes decimal floats with separators", ->
+          {tokens} = grammar.tokenizeLine '373_827.639_817'
+
+          expect(tokens[0]).toEqual value: '373_827.639_817', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "does not tokenize decimal floats with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '373__827.639_817'
+
+          expect(tokens[0]).toEqual value: '373__827', scopes: ['source.python']
+
+        it "does not tokenize decimal floats with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '373_827._639_817'
+
+          expect(tokens[0]).toEqual value: '373_827', scopes: ['source.python', 'constant.numeric.integer.decimal.python']
+
+        it "does not tokenize decimal floats with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '373_827_.639_817'
+
+          expect(tokens[0]).toEqual value: '373_827_', scopes: ['source.python']
+
+      describe "exponential", ->
+        it "tokenizes exponential floats", ->
+          {tokens} = grammar.tokenizeLine '97e43'
+
+          expect(tokens[0]).toEqual value: '97e43', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes exponential floats with an optional sign", ->
+          {tokens} = grammar.tokenizeLine '97e+43'
+
+          expect(tokens[0]).toEqual value: '97e+43', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes exponential decimal floats", ->
+          {tokens} = grammar.tokenizeLine '97.22e-43'
+
+          expect(tokens[0]).toEqual value: '97.22e-43', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "tokenizes exponential floats with separators", ->
+          {tokens} = grammar.tokenizeLine '62_2e-2_839'
+
+          expect(tokens[0]).toEqual value: '62_2e-2_839', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "does not tokenize exponential floats with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '62_2e-2__839'
+
+          expect(tokens[0]).toEqual value: '62_2e', scopes: ['source.python']
+
+        it "does not tokenize exponential floats with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '_62_2e-2_839'
+
+          expect(tokens[0]).toEqual value: '_62_2e', scopes: ['source.python']
+
+        it "does not tokenize exponential floats with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '62_2e-2_839_'
+
+          expect(tokens[0]).toEqual value: '62_2e', scopes: ['source.python']
+
+        it "does not tokenize exponential floats with a fractional exponent", ->
+          {tokens} = grammar.tokenizeLine '97.22e-43.3'
+
+          expect(tokens[0]).toEqual value: '97.22e-43', scopes: ['source.python', 'constant.numeric.float.python']
+
+      describe "complex", ->
+        it "tokenizes complex floats", ->
+          {tokens} = grammar.tokenizeLine '63j'
+
+          expect(tokens[0]).toEqual value: '63j', scopes: ['source.python', 'constant.numeric.complex.python']
+
+        it "tokenizes complex decimal floats", ->
+          {tokens} = grammar.tokenizeLine '63.72j'
+
+          expect(tokens[0]).toEqual value: '63.72j', scopes: ['source.python', 'constant.numeric.complex.python']
+
+        it "tokenizes complex exponential floats", ->
+          {tokens} = grammar.tokenizeLine '63e-72j'
+
+          expect(tokens[0]).toEqual value: '63e-72j', scopes: ['source.python', 'constant.numeric.complex.python']
+
+        it "tokenizes complex decimal exponential floats", ->
+          {tokens} = grammar.tokenizeLine '63.e-72j'
+
+          expect(tokens[0]).toEqual value: '63.e-72j', scopes: ['source.python', 'constant.numeric.complex.python']
+
+        it "tokenizes complex floats with separators", ->
+          {tokens} = grammar.tokenizeLine '62_2.83_7e-2_839j'
+
+          expect(tokens[0]).toEqual value: '62_2.83_7e-2_839j', scopes: ['source.python', 'constant.numeric.complex.python']
+
+        it "does not tokenize complex floats with more than one consecutive separator", ->
+          {tokens} = grammar.tokenizeLine '62_2.83_7e-2__839j'
+
+          expect(tokens[0]).toEqual value: '62_2.8', scopes: ['source.python', 'constant.numeric.float.python']
+
+        it "does not tokenize complex floats with a leading separator", ->
+          {tokens} = grammar.tokenizeLine '62_2._83_7e-2_839j'
+
+          expect(tokens[0]).toEqual value: '62_2', scopes: ['source.python', 'constant.numeric.integer.decimal.python']
+
+        it "does not tokenize complex floats with a trailing separator", ->
+          {tokens} = grammar.tokenizeLine '62_2.83_7e-2_839_j'
+
+          expect(tokens[0]).toEqual value: '62_2.83_7e-2_83', scopes: ['source.python', 'constant.numeric.float.python']
+
   it "tokenizes multi-line strings", ->
     tokens = grammar.tokenizeLines('"1\\\n2"')