diff options
author | yvesf <yvesf-git@xapek.org> | 2010-04-05 22:11:37 +0200 |
---|---|---|
committer | yvesf <yvesf-git@xapek.org> | 2010-04-05 22:11:37 +0200 |
commit | d1914490462869fff47ba2294c32a5e6959e5832 (patch) | |
tree | 23e83707243a2aa9e0eb354ec9cdd15a8eb302db | |
parent | 75041878bd789d2b14d3ccebcb0eaeca2c3a6aca (diff) | |
parent | efe938d11ed46b8c328e9707ddfa61a3f5da3a4c (diff) | |
download | magicproxy-d1914490462869fff47ba2294c32a5e6959e5832.tar.gz magicproxy-d1914490462869fff47ba2294c32a5e6959e5832.zip |
Merge branch 'next'
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | bin/magicproxy | 4 | ||||
-rw-r--r-- | deb_dist/python-magicproxy_0.1-1_all.deb | bin | 0 -> 6274 bytes | |||
-rw-r--r-- | dist/magicproxy-0.1-py2.5.egg | bin | 0 -> 12185 bytes | |||
-rwxr-xr-x | magicproxy/__init__.py | 304 | ||||
-rw-r--r-- | magicproxy/gui.py (renamed from gui.py) | 5 | ||||
-rwxr-xr-x | proxy.py | 270 | ||||
-rw-r--r-- | setup.py | 24 |
8 files changed, 336 insertions, 272 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/bin/magicproxy b/bin/magicproxy new file mode 100755 index 0000000..5caf0ac --- /dev/null +++ b/bin/magicproxy @@ -0,0 +1,4 @@ +#!/usr/bin/python +import magicproxy +import sys +sys.exit(magicproxy.main()) diff --git a/deb_dist/python-magicproxy_0.1-1_all.deb b/deb_dist/python-magicproxy_0.1-1_all.deb Binary files differnew file mode 100644 index 0000000..d673251 --- /dev/null +++ b/deb_dist/python-magicproxy_0.1-1_all.deb diff --git a/dist/magicproxy-0.1-py2.5.egg b/dist/magicproxy-0.1-py2.5.egg Binary files differnew file mode 100644 index 0000000..f866670 --- /dev/null +++ b/dist/magicproxy-0.1-py2.5.egg diff --git a/magicproxy/__init__.py b/magicproxy/__init__.py new file mode 100755 index 0000000..7aba472 --- /dev/null +++ b/magicproxy/__init__.py @@ -0,0 +1,304 @@ +#!/usr/bin/python -t +#TODO: ignore non-functional endpoints +#TODO: support Range: ... in requests + +import os, sys, string, time, md5, random +import asynchat, asyncore, socket, httplib, urlparse +from heapq import heappush, heappop +import cStringIO as StringIO + +kB = 1024 +class DefaultConfiguration: + """bind to that""" + listen=("",8080) + + """available http-proxies""" + endpoints=[ + ('10.2.2.11', 8888), + #('10.3.1.2',8888) + ] + + """minimum entity size to start parallelize fetch""" + threshold = 512*kB + + """initial size/range for a fetcher-job""" + initial_blocksize=512*kB + + """minimal size/range for a fetcher-job""" + minimal_blocksize=512*kB + + """(sec) #time each fetcher spent on his range, + calculated using speed measured while using initial_blocksize""" + time_slice=5 + + """start a new fetcher on a endpoint X-bytes before the old one finished""" + fetcher_jumpstart=32*kB + +################# + +class Fetcher(asynchat.async_chat): + def __init__(self, reader, proxy, url, headers, fetch_range): + self.reader = reader + self.proxy = proxy + self.url = url + self.headers = headers + self.range = fetch_range + + self.pos = (self.range[0] != -1) and self.range[0] or 0 + self.start_time = 0 + self.stop_time = 0 + self.http_status = "" + self.http_header = StringIO.StringIO() + self.state = 0 #0=status, 1=header, 2=body + asynchat.async_chat.__init__(self) + self.set_terminator("\r\n") + self.create_socket(socket.AF_INET, socket.SOCK_STREAM) + self.connect(self.proxy) + + def __str__(self): + return "<Fetcher proxy=%s host=%s path=%s range=%s" % (self.proxy, self.url.hostname, self.url.path, self.range) + + def handle_connect (self): + print self, "connected" + buf = "GET http://%s:%s%s HTTP/1.0\r\n" % ( self.url.hostname, self.url.port or 80, self.url.path ) + for key in filter(lambda k: k not in ("range"), self.headers.keys()): #send origin request headers + buf += "%s: %s\r\n" % (key, self.headers[key]) + if self.range != (-1,-1): + buf += "Range: bytes=%s-%s\r\n" % (self.range[0], self.range[1]) + buf += "\r\n" + self.push(buf) + self.start_time = time.time() + + def time(self): + if self.stop_time == 0: + return time.time() - self.start_time + else: + return self.stop_time - self.start_time + + def speed(self): + return (self.pos - self.range[0]) / self.time() + + def collect_incoming_data(self, data): + if self.state==2: #body + length = len(data) + if self.range != (-1,-1) and self.pos + length >= self.range[1] + 1: + #if this request is the first one (whithout Range: ...) then the server + # dont send us our expected range, we must cut it at some point (here) + bytes_remaining = self.range[1] - ( self.pos ) + data=data[:bytes_remaining+1] + print self,"cut: pos=%s length=%s => %s" % (self.pos, length, len(data)) + length = len(data) + if length == 0: + self.reader.handle_incoming_data(self) + self.close() + return + if not self.reader.handle_incoming_data(self, data, length): + self.close() + return + self.pos += length + if self.range != (-1,-1) and self.pos >= self.range[1]: + self.stop_time = time.time() + print self, "finished with %s kB/s" % (self.speed() / 1024) + self.reader.handle_incoming_data(self) + self.close() + elif self.state ==1: #header + self.http_header.write( data ) + else: #status + self.http_status += data + + def found_terminator(self): + if self.state == 0: #got status-line + status = self.http_status.split(" ") + if len(status) > 1: + try: + self.http_status_code = int(status[1]) + except: + self.http_status_code = 520 #Bad Gateway + else: + self.http_status_code = 520 #Bad Gateway + self.state = 1 + self.set_terminator("\r\n\r\n") #end of header + elif self.state == 1: #got headers + self.state = 2 + self.set_terminator(None) + self.reader.handle_incoming_http_header(self, self.http_status_code, self.http_header) + +class MagicHTTPProxyClient(object): + def __init__(self, channel, url, header): + self.channel = channel + self.config = self.channel.server.config + self.url = url + self.header = header + + self.content_length = -1 + self.header_sent = False + self.fetch_pos = 0 + self.write_pos = 0 + self.buffer = "" + self.blocks = list() + self.fetchers = list() + + proxy = self.config.endpoints[ random.randint(0, len(self.config.endpoints)-1) ] + self.fetchers.append( Fetcher(self, proxy, self.url, self.header, (-1,-1)) ) + + def __str__(self): + return "<MagicHTTPProxyClient host=%s path=%s content_length=%s>" % (self.url.hostname, self.url.path, self.content_length) + + def handle_incoming_data(self, fetcher, data=None, length=0): + if not data: + #fetcher is done, remove from list + self.fetchers = filter(lambda f: f != fetcher, self.fetchers) + print "Remove: %s" % fetcher + else: + assert fetcher.pos < fetcher.range[1] or fetcher.range == (-1,-1) + heappush(self.blocks, (fetcher.pos, data, length)) + + if not self.channel.connected: + print self, "request side closed the connection" + return False + + if fetcher.range != (-1,-1) \ + and fetcher.range[1] - (fetcher.pos+length) < self.config.fetcher_jumpstart \ + and self.fetch_pos + 1 < self.content_length \ + and len( filter(lambda f: f.proxy == fetcher.proxy, self.fetchers) ) < 2: + #Start a new fetcher if this fetcher is X-Bytes before finished his job + blocksize = max(int(self.config.time_slice * fetcher.speed()), self.config.minimal_blocksize) + fetch_range = self.next_range(blocksize) + self.fetchers.append( Fetcher(self, fetcher.proxy, self.url, self.header, fetch_range) ) + + #if len(self.blocks)>0: + #print self,"fetch_pos=%s write_pos=%s get=%s with length=%s pending=%s" % (self.fetch_pos, self.write_pos, min(self.blocks)[0],min(self.blocks)[2], len(self.blocks)) + + buf = "" + while len(self.blocks)>0 and min(self.blocks)[0] == self.write_pos: + item = heappop(self.blocks) + buf += item[1] + self.write_pos += item[2] + + if buf != "": + self.channel.push(buf) + + if self.write_pos + 1 >= self.content_length: + print self, "job done %s blocks left" % len(self.blocks) + self.channel.close_when_done() + return True + + def next_range(self, suggested_blocksize): + assert self.content_length != -1 + start = self.fetch_pos + self.fetch_pos = min(self.fetch_pos + suggested_blocksize, self.content_length) + return (start, self.fetch_pos-1) + + def handle_incoming_http_header(self, fetcher, status_code, header): + if not self.channel.connected: + return + if self.header_sent: + if status_code < 200 or status_code >= 300: + print self, "Error: got error code %s in %s. Giving up" % (status_code, fetcher) + self.channel.close() + else: + self.header_sent = True + + # Sends header from first response + header.seek(0) + headers = httplib.HTTPMessage(header) + + content_length = filter(lambda i: i == "content-length", headers.dict.keys()) + #if there are content-length headers decide if entity size is + #bigger then threshold, if true then start n proxies (n=#endpoints) + if len(content_length) == 1: + content_length = int(headers.dict["content-length"]) + if content_length >= self.config.threshold: + self.content_length = content_length + fetcher.range = self.next_range(self.config.initial_blocksize) + for proxy in filter(lambda p: fetcher.proxy != p, self.config.endpoints): + if self.fetch_pos == self.content_length -1: + break + self.fetchers.append(Fetcher( self, proxy, self.url, self.header, self.next_range(self.config.initial_blocksize))) + + else: + content_length = None + + buf = "HTTP/1.1 %s OK\r\n" % (status_code) + buf += "".join(map(lambda key: "%s: %s\r\n" % (key, headers.dict[key]), + filter(lambda k: k not in ("content-range", "content-length"), + headers.dict.keys()))) + if content_length: + buf += "Content-Length: %s\r\n" % content_length + buf += "Content-Range: bytes 0-%s/%s\r\n" % (content_length-1, content_length) + buf += "X-Proxy: Magicproxy; using proxies %s\r\n" % ", ".join(map(lambda host: "%s:%s"%host, self.config.endpoints)) + buf += "\r\n" + self.channel.push(buf) + +class HTTPChannel(asynchat.async_chat): + def __init__(self, server, sock, addr): + self.server = server + self.data = StringIO.StringIO() + + asynchat.async_chat.__init__(self, sock) + self.set_terminator("\r\n\r\n") + + def handle_close(self): + self.connected = False + self.close() + + def collect_incoming_data(self, data): + self.data.write(data) + if self.data.tell() > 16384: + self.close_when_done() + + def found_terminator(self): + # parse http header + self.data.seek(0) + self.request = string.split(self.data.readline(), None, 2) + if len(self.request) != 3: + # badly formed request; just shut down + self.close_when_done() + else: + self.set_terminator(None) + headers = httplib.HTTPMessage(self.data).dict + self.handle_request(self.request[0], self.request[1], headers) + + def handle_request(self, method, path, headers): + url = urlparse.urlparse(path) + if method != "GET" or url.query != "": + #do not handle non-GET or GET with Query (?foo=bla) requests + proxy = self.server.config.endpoints[ int( md5.md5(url.hostname).hexdigest(),16 ) % len(self.server.config.endpoints) ] + print Fetcher(self, proxy, url, headers, (-1,-1)) + else: + MagicHTTPProxyClient(self, url, headers) + + def handle_incoming_http_header(self,fetcher, status_code, header): + header.seek(0) + headers = httplib.HTTPMessage(header) + buf = "HTTP/1.1 %s OK\r\n" % status_code + buf += "\r\n".join(map(lambda hdr: "%s: %s" % (hdr,headers.dict[hdr]), headers.dict.keys())) + buf += "\r\n\r\n" + self.push(buf) + + + def handle_incoming_data(self, fetcher, data=None, length=0): + if data: + self.push(data) + return True + +class HTTPProxyServer(asyncore.dispatcher): + def __init__(self,config): + self.config = config + asyncore.dispatcher.__init__(self) + self.create_socket(socket.AF_INET, socket.SOCK_STREAM) + self.set_reuse_addr() + self.bind(self.config.listen) + self.listen(5) + + def __str__(self): + return "<HTTPProxyServer listen=%s:%s>" % self.config.listen + + def handle_accept(self): + conn, addr = self.accept() + HTTPChannel(self, conn, addr) + +def main(): + proxy = HTTPProxyServer(DefaultConfiguration) + print proxy + asyncore.loop() diff --git a/gui.py b/magicproxy/gui.py index 20e8c29..441a1f2 100644 --- a/gui.py +++ b/magicproxy/gui.py @@ -62,7 +62,6 @@ class ProxyGUI(gtk.Window): table.attach(b_settings, 0, 1, 1, 2) table.attach(b_quit, 1, 2, 1, 2) - self.add(table) self.show_all() @@ -70,13 +69,15 @@ class ProxyGUI(gtk.Window): def on_endpoints(self, widget): pass + def on_start(self, widget): if not self.proxy: self.proxy = proxy.HTTPProxyServer() Thread(target=proxy.asyncore.loop).start() def on_settings(self, widget): - pass + Settings() + def on_quit(self, widget): if self.proxy: self.proxy.shutdown() diff --git a/proxy.py b/proxy.py deleted file mode 100755 index b0f31e8..0000000 --- a/proxy.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/python -t -import os, sys, string, time -import asynchat, asyncore, socket, httplib, urlparse -from heapq import heappush, heappop -try: - import cStringIO as StringIO -except ImportError: - import StringIO - - -ENDPOINTS = [ - ('10.2.2.11', 8888), - ('10.3.1.2', 8888), -# ('10.1.1.156', 8888), -] - -kB = 1024 -#minimum entity size to start a paralel fetch -THRESHOLD = 512 * kB -#first fetch-range blocksize -INIT_BLOCKSIZE = 512 * kB -#lower bound of fetch-range blocksize optimization -MIN_BLOCKSIZE = 512 * kB -#time each fetcher spent on his range, calculated using -#speed measured while using INIT_BLOCKSIZE -TIME_SLICE = 5 -#start a new fetcher on a endpoint X-bytes before the -#old one finished -FETCHER_JUMPSTART = 32 * kB - -################# - -class Fetcher(asynchat.async_chat): - def __init__(self, reader, proxy, url, headers, range): - self.reader = reader - self.proxy = proxy - self.url = url - self.headers = headers - self.range = range - - self.pos = self.range[0] - self.start_time = 0 - self.stop_time = 0 - self.http_status = "" - self.http_header = "" - self.state = 0 #0=status, 1=header, 2=body - - asynchat.async_chat.__init__(self) - self.set_terminator("\r\n") - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.connect(self.proxy) - - def handle_connect (self): - print self, "Start" - self.send("GET http://%s:%s%s HTTP/1.0\r\n" % ( self.url.hostname, self.url.port or 80, self.url.path )) - for key in filter(lambda k: k not in ("range"), self.headers.keys()): #send origin request headers - self.send("%s: %s\r\n" % (key, self.headers[key])) - self.send("Range: bytes=%s-%s\r\n" % (self.range[0], self.range[1])) - self.send("\r\n") - self.start_time = time.time() - - def time(self): - if self.stop_time == 0: - return time.time() - self.start_time - else: - return self.stop_time - self.start_time - - def speed(self): - return (self.pos - self.range[0]) / self.time() - - def collect_incoming_data(self, data): - if self.state==2: #body - self.reader.handle_incoming_data(self, data) - self.pos += len(data) - if self.pos >= self.range[1]: - self.stop_time = time.time() - print self, "finished" - #make sure the next fetcher will be started - self.reader.handle_incoming_data(self) - self.close() - elif self.state ==1: #header - self.http_header += data - else: #status - self.http_status += data - - def found_terminator(self): - if self.state == 0: #got status-line - self.state = 1 - self.set_terminator("\r\n\r\n") - elif self.state == 1: #got headers - self.state = 2 - self.set_terminator(None) - self.reader.handle_incoming_http_header(self.http_header) - - def __str__(self): - return "<Fetcher proxy=%s url=%s range=%s" % (self.proxy, urlparse.urlunparse(self.url), self.range) - -class MultipleProxyReader(object): - def __init__(self, channel, url, header, content_length): - self.channel = channel - self.url = url - self.header = header - self.content_length = content_length - - self.header_sent = False - self.fetch_pos = 0 - self.write_pos = 0 - self.buffer = "" - self.blocks = list() - self.fetchers = list() - - for proxy in ENDPOINTS: - self.fetchers.append( Fetcher(self, proxy, self.url, self.header, self.next_range(INIT_BLOCKSIZE)) ) - - def handle_incoming_data(self, fetcher, data=None): - if not data: - self.fetchers = filter(lambda f: f != fetcher, self.fetchers) - else: - heappush(self.blocks, (fetcher.pos, data)) - - if fetcher.range[1] - fetcher.pos < FETCHER_JUMPSTART \ - and self.fetch_pos + 1 < self.content_length and not self.channel.is_closed \ - and len( filter( (lambda f: f.proxy == fetcher.proxy), self.fetchers) ) < 2: - #Start a new fetcher on this line if this fetchers is X-Bytes before finishing his jobs - blocksize = max(int(TIME_SLICE * fetcher.speed()), MIN_BLOCKSIZE) - fetch_range = self.next_range(blocksize) - print "Start new Fetcher, bs=%s range=%s" % (blocksize,fetch_range) - self.fetchers.append( Fetcher(self, fetcher.proxy, self.url, self.header, fetch_range) ) - - while self.send_next_data(): - pass - - def next_range(self, suggested_blocksize): - start = self.fetch_pos - self.fetch_pos = min(self.fetch_pos + suggested_blocksize, self.content_length) - return (start, self.fetch_pos-1) - - def handle_incoming_http_header(self, header): - if self.header_sent: - pass - else: - self.header_sent = True - - self.channel.push("HTTP/1.0 200 OK\r\n") - # Sends header from first response - headers = httplib.HTTPMessage(StringIO.StringIO(header)) - for key in filter(lambda k: k not in ("content-range", "content-length"), headers.dict.keys()): - self.channel.push("%s: %s\r\n" % (key, headers.dict[key])) - self.channel.push("Content-Length: %s" % self.content_length) - self.channel.push("X-Proxy: Magicproxy (superpower activated)\r\n") - self.channel.push("\r\n") - - def send_next_data(self): - if self.channel.is_closed: - print self, "request side closed the connection" - self.channel.close_when_done() - #XXX terminate all running fetcher - return False - - #print self, "expect data at %s in" % self.write_pos, self.blocks.keys() - if len(self.blocks)>0 and min(self.blocks)[0] == self.write_pos: - item = heappop(self.blocks) - self.channel.push(item[1]) - self.write_pos += len(item[1]) - return True - - if self.write_pos + 1 >= self.content_length: - print self, "job done %s blocks left" % len(self.blocks) - #XXX prevent next calls to send_next_data - self.channel.close_when_done() - - return False - - def __str__(self): - return "<MultipleProxyReader url=%s content_length=%s>" % (urlparse.urlunparse(self.url), self.content_length) - -class HTTPChannel(asynchat.async_chat): - def __init__(self, server, sock, addr): - self.server = server - - self.data = StringIO.StringIO() - self.is_closed = False - self.request = None - - asynchat.async_chat.__init__(self, sock) - self.set_terminator("\r\n\r\n") - - def handle_close(self): - self.is_closed = True - - def collect_incoming_data(self, data): - self.data.write(data) - if self.data.tell() > 16384: - self.close_when_done() - - def found_terminator(self): - if not self.request: - # parse http header - self.data.seek(0) - self.request = string.split(self.data.readline(), None, 2) - if len(self.request) != 3: - # badly formed request; just shut down - self.close_when_done() - else: - headers = httplib.HTTPMessage(self.data).dict - self.server.handle_request(self, self.request[0], self.request[1], headers) - else: - pass # ignore body data, for now - -class HTTPProxyServer(asyncore.dispatcher): - def __init__(self): - self.port = 8080 - - asyncore.dispatcher.__init__(self) - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.set_reuse_addr() - self.bind(("", 8080)) - self.listen(5) - - def shutdown(self): - #TODO Hier Proxy sauber beenden - #self.channel.close_when_done() - sys.exit() - - def handle_accept(self): - conn, addr = self.accept() - HTTPChannel(self, conn, addr) - - def handle_request(self, channel, method, path, headers): - url = urlparse.urlparse(path) - if method != "GET" or url.query != "": - #do not handle non-GET or GET with Query (?foo=bla) requests - return self._bypass_request(channel, method, url, headers) - - #check for content-length header with a HEAD request - conn = httplib.HTTPConnection(url.hostname, url.port or 80) - conn.request("HEAD", url.path) - resp = conn.getresponse() - content_length = filter(lambda it: it[0] == "content-length", resp.getheaders()) - if len( content_length ) == 0: - # no content length given, bypass this request - print "missing content-length, bypass" - self._bypass_request(channel, "GET", url, headers) - else: - content_length = int(content_length[0][1]) - - if content_length < THRESHOLD: - self._bypass_request(channel, "GET", url, headers) - else: - MultipleProxyReader(channel, url, headers, content_length) - - def _bypass_request(self, channel, method, url, headers): - print "_bypass request: %s %s" % (method, urlparse.urlunparse(url)) - #XXX hier sollte nicht proxy gespielt werden sondern - #die daten 1-zu-1 durchgereicht werden. - #Weiterhin sollte sichergestellt werden, dass die requests - #zu Host X1 immer ueber Proxy Y1 geroutet werden - # etwa proxy=proxies[ stuff(hostname) % len(proxies) ] - conn = httplib.HTTPConnection(url.hostname, url.port or 80) - conn.request(method, url.path, body="", headers=headers) - resp = conn.getresponse() - channel.push("HTTP/1.0 200 OK\r\nX-Proxy: Magicproxy (superpower disabled)\r\n") - channel.push( "\r\n".join(map(lambda k: "%s: %s" % (k[0],k[1]), resp.getheaders())) ) - channel.push("\r\n\r\n") - channel.push(resp.read()) - channel.close_when_done() - -if __name__ == "__main__": - proxy = HTTPProxyServer() - asyncore.loop() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..bb33db6 --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup( + name='magicproxy', + version='0.1', + description='poor mans http multiplexer', + author='xapek.org', + author_email='magicproxy@xapek.org', + url='http://xapek.org', + packages=find_packages(), + scripts=["bin/magicproxy"], +# package_data={'doc':['*.txt'], 'xml':['*.xml']}, +# include_package_data = True, + long_description="""magic proxy split your http requests in smaller chunks +and distribute them over several http-proxies wich can be connected via different +dial-up connection""", + classifiers=[ + "Programming Language :: Python", + "Topic :: Software Development :: Libraries :: Python Modules", + "Intended Audience :: Developers",] + ) + |