Skip to content

Commit

Permalink
Automatically change tor circuit once if ip is blocked
Browse files Browse the repository at this point in the history
Use stem library to send a new identity signal via the tor
control port.

See #20
  • Loading branch information
user234683 committed Oct 25, 2020
1 parent bcaec7b commit 3a081a9
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 46 deletions.
7 changes: 7 additions & 0 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@
'category': 'network',
}),

('tor_control_port', {
'type': int,
'default': 9151,
'comment': '',
'category': 'network',
}),

('port_number', {
'type': int,
'default': 8080,
Expand Down
4 changes: 3 additions & 1 deletion youtube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ def error_page(e):
error_message = ('Error: Youtube blocked the request because the Tor'
' exit node is overutilized. Try getting a new exit node by'
' using the New Identity button in the Tor Browser.')
if exc_info()[1].error_message:
error_message += '\n\n' + exc_info()[1].error_message
if exc_info()[1].ip:
error_message += ' Exit node IP address: ' + exc_info()[1].ip
error_message += '\n\nExit node IP address: ' + exc_info()[1].ip
return flask.render_template('error.html', error_message=error_message, slim=slim), 502
return flask.render_template('error.html', traceback=traceback.format_exc(), slim=slim), 500

Expand Down
1 change: 1 addition & 0 deletions youtube/templates/shared.css
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ h1{
font-weight: normal;
}
#error-box, #error-message{
white-space: pre-wrap;
background-color: var(--interface-color);
width: 80%;
margin: auto;
Expand Down
162 changes: 117 additions & 45 deletions youtube/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
import gevent.queue
import gevent.lock
import collections
import stem
import stem.control
import traceback

# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
Expand Down Expand Up @@ -54,32 +57,81 @@

connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')

old_tor_connection_pool = None
tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
class TorManager:
def __init__(self):
self.old_tor_connection_pool = None
self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager(
'socks5://127.0.0.1:' + str(settings.tor_port) + '/',
cert_reqs = 'CERT_REQUIRED')
self.tor_pool_refresh_time = time.monotonic()

tor_pool_refresh_time = time.monotonic() # prevent problems due to clock changes
self.new_identity_lock = gevent.lock.BoundedSemaphore(1)
self.last_new_identity_time = time.monotonic() - 20

def get_pool(use_tor):
global old_tor_connection_pool
global tor_connection_pool
global tor_pool_refresh_time
def refresh_tor_connection_pool(self):
self.tor_connection_pool.clear()

if not use_tor:
return connection_pool
# Keep a reference for 5 min to avoid it getting garbage collected
# while sockets still in use
self.old_tor_connection_pool = self.tor_connection_pool

# Tor changes circuits after 10 minutes: https://tor.stackexchange.com/questions/262/for-how-long-does-a-circuit-stay-alive
current_time = time.monotonic()
if current_time - tor_pool_refresh_time > 300: # close pool after 5 minutes
tor_connection_pool.clear()
self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager(
'socks5://127.0.0.1:' + str(settings.tor_port) + '/',
cert_reqs = 'CERT_REQUIRED')
self.tor_pool_refresh_time = time.monotonic()

# Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use
old_tor_connection_pool = tor_connection_pool
def get_tor_connection_pool(self):
# Tor changes circuits after 10 minutes:
# https://tor.stackexchange.com/questions/262/for-how-long-does-a-circuit-stay-alive
current_time = time.monotonic()

tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
tor_pool_refresh_time = current_time
# close pool after 5 minutes
if current_time - self.tor_pool_refresh_time > 300:
self.refresh_tor_connection_pool()

return tor_connection_pool
return self.tor_connection_pool

def new_identity(self, time_failed_request_started):
'''return error, or None if no error and the identity is fresh'''
print('new_identity: new_identity called')
# blocks if another greenlet currently has the lock
self.new_identity_lock.acquire()
print('new_identity: New identity lock acquired')

try:
# This was caused by a request that failed within a previous,
# stale identity
if time_failed_request_started <= self.last_new_identity_time:
print('new_identity: Cancelling; request was from stale identity')
return None

delta = time.monotonic() - self.last_new_identity_time
if delta < 20:
print('new_identity: Retried already within last 20 seconds')
return 'Retried with new circuit once (max) within last 20 seconds.'
try:
port = settings.tor_control_port
with stem.control.Controller.from_port(port=port) as controller:
controller.authenticate()
print('new_identity: Getting new identity')
controller.signal(stem.Signal.NEWNYM)
print('new_identity: NEWNYM signal sent')
self.last_new_identity_time = time.monotonic()
self.refresh_tor_connection_pool()
return None
except stem.SocketError:
traceback.print_exc()
return 'Failed to connect to Tor control port.'
finally:
self.new_identity_lock.release()

tor_manager = TorManager()


def get_pool(use_tor):
if not use_tor:
return connection_pool
return tor_manager.get_tor_connection_pool()


class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
Expand All @@ -103,11 +155,12 @@ def http_response(self, request, response):
https_response = http_response

class FetchError(Exception):
def __init__(self, code, reason='', ip=None):
def __init__(self, code, reason='', ip=None, error_message=None):
Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
self.code = code
self.reason = reason
self.ip = ip
self.error_message = error_message

def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
Expand Down Expand Up @@ -184,32 +237,51 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None):
start_time = time.time()

response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.time()

content = response.read()
read_finish = time.time()

cleanup_func(response) # release_connection for urllib3
content = decode_content(
content,
response.getheader('Content-Encoding', default='identity'))

if (response.status == 429
and content.startswith(b'<!DOCTYPE')
and b'Our systems have detected unusual traffic' in content):
ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content)
ip = ip.group(1).decode('ascii') if ip else None
raise FetchError('429', reason=response.reason, ip=ip)

elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None)
while True:
start_time = time.time()

response, cleanup_func = fetch_url_response(
url, headers, timeout=timeout,
cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
use_tor=use_tor)
response_time = time.time()

content = response.read()

read_finish = time.time()

cleanup_func(response) # release_connection for urllib3
content = decode_content(
content,
response.getheader('Content-Encoding', default='identity'))

if (response.status == 429
and content.startswith(b'<!DOCTYPE')
and b'Our systems have detected unusual traffic' in content):
ip = re.search(
br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
content)
ip = ip.group(1).decode('ascii') if ip else None

# don't get new identity if we're not using Tor
if not use_tor:
raise FetchError('429', reason=response.reason, ip=ip)

print('Error: Youtube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip)

# get new identity
error = tor_manager.new_identity(start_time)
if error:
raise FetchError(
'429', reason=response.reason, ip=ip,
error_message='Automatic circuit change: ' + error)
else:
continue # retry now that we have new identity

elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason,
ip=None)
break

if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
Expand Down

0 comments on commit 3a081a9

Please sign in to comment.