diff options
Diffstat (limited to 'proxy.py')
-rwxr-xr-x | proxy.py | 270 |
1 files changed, 0 insertions, 270 deletions
diff --git a/proxy.py b/proxy.py deleted file mode 100755 index b0f31e8..0000000 --- a/proxy.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/python -t -import os, sys, string, time -import asynchat, asyncore, socket, httplib, urlparse -from heapq import heappush, heappop -try: - import cStringIO as StringIO -except ImportError: - import StringIO - - -ENDPOINTS = [ - ('10.2.2.11', 8888), - ('10.3.1.2', 8888), -# ('10.1.1.156', 8888), -] - -kB = 1024 -#minimum entity size to start a paralel fetch -THRESHOLD = 512 * kB -#first fetch-range blocksize -INIT_BLOCKSIZE = 512 * kB -#lower bound of fetch-range blocksize optimization -MIN_BLOCKSIZE = 512 * kB -#time each fetcher spent on his range, calculated using -#speed measured while using INIT_BLOCKSIZE -TIME_SLICE = 5 -#start a new fetcher on a endpoint X-bytes before the -#old one finished -FETCHER_JUMPSTART = 32 * kB - -################# - -class Fetcher(asynchat.async_chat): - def __init__(self, reader, proxy, url, headers, range): - self.reader = reader - self.proxy = proxy - self.url = url - self.headers = headers - self.range = range - - self.pos = self.range[0] - self.start_time = 0 - self.stop_time = 0 - self.http_status = "" - self.http_header = "" - self.state = 0 #0=status, 1=header, 2=body - - asynchat.async_chat.__init__(self) - self.set_terminator("\r\n") - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.connect(self.proxy) - - def handle_connect (self): - print self, "Start" - self.send("GET http://%s:%s%s HTTP/1.0\r\n" % ( self.url.hostname, self.url.port or 80, self.url.path )) - for key in filter(lambda k: k not in ("range"), self.headers.keys()): #send origin request headers - self.send("%s: %s\r\n" % (key, self.headers[key])) - self.send("Range: bytes=%s-%s\r\n" % (self.range[0], self.range[1])) - self.send("\r\n") - self.start_time = time.time() - - def time(self): - if self.stop_time == 0: - return time.time() - self.start_time - else: - return self.stop_time - self.start_time - - def speed(self): - return (self.pos - self.range[0]) / self.time() - - def collect_incoming_data(self, data): - if self.state==2: #body - self.reader.handle_incoming_data(self, data) - self.pos += len(data) - if self.pos >= self.range[1]: - self.stop_time = time.time() - print self, "finished" - #make sure the next fetcher will be started - self.reader.handle_incoming_data(self) - self.close() - elif self.state ==1: #header - self.http_header += data - else: #status - self.http_status += data - - def found_terminator(self): - if self.state == 0: #got status-line - self.state = 1 - self.set_terminator("\r\n\r\n") - elif self.state == 1: #got headers - self.state = 2 - self.set_terminator(None) - self.reader.handle_incoming_http_header(self.http_header) - - def __str__(self): - return "<Fetcher proxy=%s url=%s range=%s" % (self.proxy, urlparse.urlunparse(self.url), self.range) - -class MultipleProxyReader(object): - def __init__(self, channel, url, header, content_length): - self.channel = channel - self.url = url - self.header = header - self.content_length = content_length - - self.header_sent = False - self.fetch_pos = 0 - self.write_pos = 0 - self.buffer = "" - self.blocks = list() - self.fetchers = list() - - for proxy in ENDPOINTS: - self.fetchers.append( Fetcher(self, proxy, self.url, self.header, self.next_range(INIT_BLOCKSIZE)) ) - - def handle_incoming_data(self, fetcher, data=None): - if not data: - self.fetchers = filter(lambda f: f != fetcher, self.fetchers) - else: - heappush(self.blocks, (fetcher.pos, data)) - - if fetcher.range[1] - fetcher.pos < FETCHER_JUMPSTART \ - and self.fetch_pos + 1 < self.content_length and not self.channel.is_closed \ - and len( filter( (lambda f: f.proxy == fetcher.proxy), self.fetchers) ) < 2: - #Start a new fetcher on this line if this fetchers is X-Bytes before finishing his jobs - blocksize = max(int(TIME_SLICE * fetcher.speed()), MIN_BLOCKSIZE) - fetch_range = self.next_range(blocksize) - print "Start new Fetcher, bs=%s range=%s" % (blocksize,fetch_range) - self.fetchers.append( Fetcher(self, fetcher.proxy, self.url, self.header, fetch_range) ) - - while self.send_next_data(): - pass - - def next_range(self, suggested_blocksize): - start = self.fetch_pos - self.fetch_pos = min(self.fetch_pos + suggested_blocksize, self.content_length) - return (start, self.fetch_pos-1) - - def handle_incoming_http_header(self, header): - if self.header_sent: - pass - else: - self.header_sent = True - - self.channel.push("HTTP/1.0 200 OK\r\n") - # Sends header from first response - headers = httplib.HTTPMessage(StringIO.StringIO(header)) - for key in filter(lambda k: k not in ("content-range", "content-length"), headers.dict.keys()): - self.channel.push("%s: %s\r\n" % (key, headers.dict[key])) - self.channel.push("Content-Length: %s" % self.content_length) - self.channel.push("X-Proxy: Magicproxy (superpower activated)\r\n") - self.channel.push("\r\n") - - def send_next_data(self): - if self.channel.is_closed: - print self, "request side closed the connection" - self.channel.close_when_done() - #XXX terminate all running fetcher - return False - - #print self, "expect data at %s in" % self.write_pos, self.blocks.keys() - if len(self.blocks)>0 and min(self.blocks)[0] == self.write_pos: - item = heappop(self.blocks) - self.channel.push(item[1]) - self.write_pos += len(item[1]) - return True - - if self.write_pos + 1 >= self.content_length: - print self, "job done %s blocks left" % len(self.blocks) - #XXX prevent next calls to send_next_data - self.channel.close_when_done() - - return False - - def __str__(self): - return "<MultipleProxyReader url=%s content_length=%s>" % (urlparse.urlunparse(self.url), self.content_length) - -class HTTPChannel(asynchat.async_chat): - def __init__(self, server, sock, addr): - self.server = server - - self.data = StringIO.StringIO() - self.is_closed = False - self.request = None - - asynchat.async_chat.__init__(self, sock) - self.set_terminator("\r\n\r\n") - - def handle_close(self): - self.is_closed = True - - def collect_incoming_data(self, data): - self.data.write(data) - if self.data.tell() > 16384: - self.close_when_done() - - def found_terminator(self): - if not self.request: - # parse http header - self.data.seek(0) - self.request = string.split(self.data.readline(), None, 2) - if len(self.request) != 3: - # badly formed request; just shut down - self.close_when_done() - else: - headers = httplib.HTTPMessage(self.data).dict - self.server.handle_request(self, self.request[0], self.request[1], headers) - else: - pass # ignore body data, for now - -class HTTPProxyServer(asyncore.dispatcher): - def __init__(self): - self.port = 8080 - - asyncore.dispatcher.__init__(self) - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.set_reuse_addr() - self.bind(("", 8080)) - self.listen(5) - - def shutdown(self): - #TODO Hier Proxy sauber beenden - #self.channel.close_when_done() - sys.exit() - - def handle_accept(self): - conn, addr = self.accept() - HTTPChannel(self, conn, addr) - - def handle_request(self, channel, method, path, headers): - url = urlparse.urlparse(path) - if method != "GET" or url.query != "": - #do not handle non-GET or GET with Query (?foo=bla) requests - return self._bypass_request(channel, method, url, headers) - - #check for content-length header with a HEAD request - conn = httplib.HTTPConnection(url.hostname, url.port or 80) - conn.request("HEAD", url.path) - resp = conn.getresponse() - content_length = filter(lambda it: it[0] == "content-length", resp.getheaders()) - if len( content_length ) == 0: - # no content length given, bypass this request - print "missing content-length, bypass" - self._bypass_request(channel, "GET", url, headers) - else: - content_length = int(content_length[0][1]) - - if content_length < THRESHOLD: - self._bypass_request(channel, "GET", url, headers) - else: - MultipleProxyReader(channel, url, headers, content_length) - - def _bypass_request(self, channel, method, url, headers): - print "_bypass request: %s %s" % (method, urlparse.urlunparse(url)) - #XXX hier sollte nicht proxy gespielt werden sondern - #die daten 1-zu-1 durchgereicht werden. - #Weiterhin sollte sichergestellt werden, dass die requests - #zu Host X1 immer ueber Proxy Y1 geroutet werden - # etwa proxy=proxies[ stuff(hostname) % len(proxies) ] - conn = httplib.HTTPConnection(url.hostname, url.port or 80) - conn.request(method, url.path, body="", headers=headers) - resp = conn.getresponse() - channel.push("HTTP/1.0 200 OK\r\nX-Proxy: Magicproxy (superpower disabled)\r\n") - channel.push( "\r\n".join(map(lambda k: "%s: %s" % (k[0],k[1]), resp.getheaders())) ) - channel.push("\r\n\r\n") - channel.push(resp.read()) - channel.close_when_done() - -if __name__ == "__main__": - proxy = HTTPProxyServer() - asyncore.loop() |