From 4fb7d2e1c0e9e80038148163650f47e69270e80a Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Tue, 24 Nov 2015 23:24:31 +0100 Subject: Cleanup and solve trailing '/' problem with redirects * Format README as markdown. Mention dependencies * Fix 'timeout' * Run autopep8 on all python sources * Fix unittests * For HEAD requests explicitly allow to follow redirects. This solves the problem if you request: https://server/dir and then the server wants to redirect you to https://server/dir/ This should solve #1 --- README.md | 29 +++++++++++++++++++++ README.txt | 30 ---------------------- fuse-httpfs | 11 ++++---- httpfs/__init__.py | 59 ++++++++++++++++++++++++++++++++----------- test/__init__.py | 2 +- test/test_httpfs.py | 42 ++++++++++++++++++++---------- test/testwww/dir/subdir/empty | 0 7 files changed, 108 insertions(+), 65 deletions(-) create mode 100644 README.md delete mode 100644 README.txt create mode 100644 test/testwww/dir/subdir/empty diff --git a/README.md b/README.md new file mode 100644 index 0000000..703c58e --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# fuse-httpfs + +python-requests based fuse read-only filesystem + +## Usage + +python3 dependencies: + +* requests +* fusepy + +### Mount + +* Create a directory to be used as a mountpoint. +* run with --help + +### Using + +* access the mountpoint +* open directory for schema (http/https) +* open an (maybe non-existing) directoring with the desired hostname + + +Remote machines configure in ~/.netrc will appear automatically. python-requests will pick-up the authentication infos from .netrc + + +## Run the tests + + python3.4 -m unittest test diff --git a/README.txt b/README.txt deleted file mode 100644 index bba5554..0000000 --- a/README.txt +++ /dev/null @@ -1,30 +0,0 @@ -=========== -fuse-httpfs -=========== - -python-requests based fuse read-only filesystem - -Usage ------ - -### Setup - -Create a directory to be used as a mountpoint. - -### Starting - -run with --help - -### Using - -* access the mountpoint -* open directory for schema (http/https) -* open an (maybe non-existing) directoring with the desired hostname - - -Remote machines configure in ~/.netrc will appear automatically. python-requests will pick-up the authentication infos from .netrc - - -## Run the tests - - python3.4 -m unittest test diff --git a/fuse-httpfs b/fuse-httpfs index 56662a8..e6a8c24 100755 --- a/fuse-httpfs +++ b/fuse-httpfs @@ -5,7 +5,8 @@ import logging import argparse from httpfs import Config, Httpfs -FORMAT = "%(threadName)s %(asctime)-15s %(levelname)s:%(name)s %(message)s" +FORMAT = "%(threadName)s %(asctime)-15s %(levelname)s:%(name)s " + \ + "%(filename)s:%(lineno)s %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) p = argparse.ArgumentParser( @@ -18,10 +19,8 @@ p.add_argument("--no_foreground", action="store_true", default=False, p.add_argument("--debug", action="store_true", help="Enable fuse debug") p.add_argument("--nothreads", action="store_true", help="Disable fuse threads") -p.add_argument("--connect_timeout", type=int, - default=Config.timeout[0], help="HTTP connect timeout") -p.add_argument("--read_timeout", type=int, - default=Config.timeout[1], help="HTTP read timeout") +p.add_argument("--timeout", type=float, default=None, + help="HTTP connect and read timeout") p.add_argument("--ssl", choices=["default", "system", "none"], help="SSL Verification", default="default") p.add_argument("--system-ca", default="/etc/ssl/certs/ca-certificates.crt", @@ -29,7 +28,7 @@ p.add_argument("--system-ca", default="/etc/ssl/certs/ca-certificates.crt", args = vars(p.parse_args(sys.argv[1:])) -Config.timeout = (args.pop("connect_timeout"), args.pop("read_timeout")) +Config.timeout = args.pop("timeout") Config.mountpoint = args.pop("mountpoint") Config.verify = args.pop("ssl") Config.system_ca = args.pop("system_ca") diff --git a/httpfs/__init__.py b/httpfs/__init__.py index de4449c..b69c26e 100644 --- a/httpfs/__init__.py +++ b/httpfs/__init__.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 import os import sys import time @@ -17,12 +16,13 @@ import requests class Config(object): mountpoint = None - timeout = (5, 25) # connect_timeout, read_timeout + timeout = None verify = None system_ca = None class Path: + def __init__(self, parent, name): self.parent = parent self.name = name @@ -48,17 +48,26 @@ class Path: class File(Path): + def __init__(self, parent, name): super().__init__(parent, name) self.lastModified = None self.size = None + self.mode = 0o444 def init(self): url = self.buildUrl() logging.info("File url={} name={}".format(url, self.name)) - r = self.getSession().head(url, timeout=Config.timeout) + r = self.getSession().head(url, timeout=Config.timeout, + allow_redirects=True) r.close() - if r.status_code != 200: + if r.status_code >= 400 and r.status_code <= 499: + self.size = 0 + self.mode = 0o000 + self.initialized = True + self.lastModified = 0 + return + elif r.status_code != 200: error = "Status code != 200 for {}: {}".format(url, r.status_code) raise Exception(error) self.size = int(r.headers['content-length']) @@ -71,7 +80,7 @@ class File(Path): if not self.initialized: self.init() url = self.buildUrl() - bytesRange = '{}-{}'.format(offset, min(self.size, offset+size-1)) + bytesRange = '{}-{}'.format(offset, min(self.size, offset + size - 1)) headers = {'range': 'bytes=' + bytesRange} logging.info("File.get url={} range={}".format(url, bytesRange)) r = self.getSession().get(url, headers=headers, timeout=Config.timeout) @@ -91,20 +100,30 @@ class File(Path): if not self.initialized: self.init() t = self.lastModified - return dict(st_mode=(S_IFREG | 0o444), st_nlink=1, st_size=self.size, - st_ctime=t, st_mtime=t, st_atime=t) + return dict(st_mode=(S_IFREG | self.mode), st_nlink=1, + st_size=self.size, + st_ctime=t, st_mtime=t, st_atime=t, + st_uid=os.getuid(), st_gid=os.getgid()) class Directory(Path): + def __init__(self, parent, name): super().__init__(parent, name) self.entries = {} + self.mode = 0o555 def init(self): url = self.buildUrl() + "/" logging.info("Directory url={} name={}".format(url, self.name)) r = self.getSession().get(url, stream=True, timeout=Config.timeout) - if r.status_code != 200: + if r.status_code >= 400 and r.status_code <= 499: + self.mode = 0o000 + logging.info("Directory is 4xx {}".format(url)) + r.close() + self.initialized = True + return + elif r.status_code != 200: raise Exception("Status code not 200 for {}: {}".format( url, r.status_code)) @@ -128,14 +147,17 @@ class Directory(Path): nentries = 1 if self.initialized: nentries += len(self.entries) - return dict(st_mode=(S_IFDIR | 0o555), st_nlink=nentries, - st_ctime=t, st_mtime=t, st_atime=t) + return dict(st_mode=(S_IFDIR | self.mode), st_nlink=nentries, + st_ctime=t, st_mtime=t, st_atime=t, + st_uid=os.getuid(), st_gid=os.getgid()) class Server(Directory): + def __init__(self, parent, name): super().__init__(parent, name) self.session = requests.Session() + self.session.allow_redirects = True if Config.verify == "default": pass elif Config.verify == "system": @@ -154,6 +176,7 @@ class Server(Directory): class Schema(Directory): + def __init__(self, parent, name): super().__init__(parent, name) self.initialized = True @@ -163,6 +186,7 @@ class Schema(Directory): class Root(Directory): + def __init__(self): super().__init__(None, "") self.initialized = True @@ -172,6 +196,7 @@ class Root(Directory): class RelativeLinkCollector(HTMLParser): + def __init__(self, parent): super().__init__(self, convert_charrefs=True) self.parent = parent @@ -195,6 +220,7 @@ class RelativeLinkCollector(HTMLParser): class Httpfs(fuse.LoggingMixIn, fuse.Operations): """A read only http/https/ftp filesystem using python-requests.""" + def __init__(self): self.root = Root() @@ -235,6 +261,7 @@ class Httpfs(fuse.LoggingMixIn, fuse.Operations): schema, *p = path[1:].split("/") if schema not in self.root.entries: + logging.debug("schema %s not in root.entries", schema) return None prevEntry = self.root.entries[schema] if p == []: @@ -262,14 +289,16 @@ class Httpfs(fuse.LoggingMixIn, fuse.Operations): # the server don't return it, then just create it # assuming its an directory, if a HEAD is successful d = Directory.fromPath(prevEntry, lastElement) - r = d.getSession().head(d.buildUrl(), - timeout=Config.timeout) + url = d.buildUrl() + r = d.getSession().head(url, timeout=Config.timeout, + allow_redirects=True) if r.status_code == 200: - logging.info("Create directory for path which was not " + - "discovered by Index of: {}".format(path)) + logging.info("Create directory for path: {} " + + "at: {}".format(path, url)) prevEntry.entries[lastElement] = d else: - logging.info("Path not found: {}".format(path)) + logging.info("Path not found ({}): {} for {}".format( + r.status_code, path, url)) return None return prevEntry.entries[lastElement] diff --git a/test/__init__.py b/test/__init__.py index 9e0b053..caeb5e5 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -3,4 +3,4 @@ import logging from test.test_httpfs import * -logging.basicConfig(level=logging.ERROR) +logging.basicConfig(level=logging.INFO) diff --git a/test/test_httpfs.py b/test/test_httpfs.py index bb5435c..b6c6b65 100644 --- a/test/test_httpfs.py +++ b/test/test_httpfs.py @@ -6,19 +6,22 @@ from stat import S_IFDIR, S_IFREG from http.server import HTTPServer from http.server import SimpleHTTPRequestHandler -from httpfs import Httpfs +import httpfs -def translate_path(p): - return "test/testwww/" + p +def get_fs_path(p): + path = os.path.join(os.path.dirname(__file__), + "testwww", os.path.relpath(p)) + logging.info("get_fs_path: translated path %s -> %s", p, path) + return path class Handler(SimpleHTTPRequestHandler): + def translate_path(self, path): - p = super().translate_path(path) - pNew = translate_path(os.path.relpath(p)) - logging.info("translated %s to %s", p, pNew) - return pNew + path_new = get_fs_path(super().translate_path(path)) + logging.info("translated %s to %s", path, path_new) + return path_new def handle_one_request(self, *a): SimpleHTTPRequestHandler.handle_one_request(self, *a) @@ -26,13 +29,16 @@ class Handler(SimpleHTTPRequestHandler): class TestBase(TestCase): + def setUp(self): self.server = HTTPServer(('', 0), Handler) self.server.requests = [] self.server_thread = threading.Thread(target=self.server.serve_forever) self.server_thread.daemon = True self.server_thread.start() - self.httpfs = Httpfs() + + httpfs.Config.verify = "default" + self.httpfs = httpfs.Httpfs() self.port = self.server.socket.getsockname()[1] def tearDown(self): @@ -43,11 +49,11 @@ class TestBase(TestCase): return "/http/localhost:{}".format(self.port) def stat(self, path): - logging.info("Translate path %s -> %s", path, translate_path(path)) - return os.stat(translate_path(path)) + return os.stat(get_fs_path(path)) class TestZwei(TestBase): + def test_root(self): r = self.httpfs.readdir(self.basePath(), None) self.assertEqual(len(self.server.requests), 2) # made 2 requests @@ -65,9 +71,19 @@ class TestZwei(TestBase): def test_dir(self): r = self.httpfs.readdir(self.basePath() + "/", None) self.assertEqual(len(r), 4) + r = self.httpfs.readdir(self.basePath() + "/dir", None) - self.assertEqual(len(r), 4) + self.assertEqual(len(r), 5) + + def test_subdir(self): + r = self.httpfs.readdir(self.basePath() + "/dir/subdir", None) + self.assertEqual(len(r), 3) + + def test_dir_ending_slash(self): + r = self.httpfs.readdir(self.basePath() + "/dir/", None) + self.assertEqual(len(r), 5) def test_read(self): - r = self.httpfs.read(self.basePath() + "/dir/bla1", 1000, 0, None) - self.assertEqual(r.strip(), b"bla1") + expected = open(get_fs_path("dir/bla1"), "rb").read() + result = self.httpfs.read(self.basePath() + "/dir/bla1", 1000, 0, None) + self.assertEqual(expected, result) diff --git a/test/testwww/dir/subdir/empty b/test/testwww/dir/subdir/empty new file mode 100644 index 0000000..e69de29 -- cgit v1.2.1