summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Fischer <yvesf-git@xapek.org>2015-11-24 23:24:31 +0100
committerYves Fischer <yvesf-git@xapek.org>2015-11-24 23:24:31 +0100
commit4fb7d2e1c0e9e80038148163650f47e69270e80a (patch)
tree8848b94169945884985b421cc0a5aab133885ab3
parente420859e28e950e2761829612816145b6e1232e9 (diff)
downloadfuse-httpfs-4fb7d2e1c0e9e80038148163650f47e69270e80a.tar.gz
fuse-httpfs-4fb7d2e1c0e9e80038148163650f47e69270e80a.zip
Cleanup and solve trailing '/' problem with redirects
* Format README as markdown. Mention dependencies * Fix 'timeout' * Run autopep8 on all python sources * Fix unittests * For HEAD requests explicitly allow to follow redirects. This solves the problem if you request: https://server/dir and then the server wants to redirect you to https://server/dir/ This should solve #1
-rw-r--r--README.md (renamed from README.txt)17
-rwxr-xr-xfuse-httpfs11
-rw-r--r--httpfs/__init__.py59
-rw-r--r--test/__init__.py2
-rw-r--r--test/test_httpfs.py42
-rw-r--r--test/testwww/dir/subdir/empty0
6 files changed, 87 insertions, 44 deletions
diff --git a/README.txt b/README.md
index bba5554..703c58e 100644
--- a/README.txt
+++ b/README.md
@@ -1,19 +1,18 @@
-===========
-fuse-httpfs
-===========
+# fuse-httpfs
python-requests based fuse read-only filesystem
-Usage
------
+## Usage
-### Setup
+python3 dependencies:
-Create a directory to be used as a mountpoint.
+* requests
+* fusepy
-### Starting
+### Mount
-run with --help
+* Create a directory to be used as a mountpoint.
+* run with --help
### Using
diff --git a/fuse-httpfs b/fuse-httpfs
index 56662a8..e6a8c24 100755
--- a/fuse-httpfs
+++ b/fuse-httpfs
@@ -5,7 +5,8 @@ import logging
import argparse
from httpfs import Config, Httpfs
-FORMAT = "%(threadName)s %(asctime)-15s %(levelname)s:%(name)s %(message)s"
+FORMAT = "%(threadName)s %(asctime)-15s %(levelname)s:%(name)s " + \
+ "%(filename)s:%(lineno)s %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
p = argparse.ArgumentParser(
@@ -18,10 +19,8 @@ p.add_argument("--no_foreground", action="store_true", default=False,
p.add_argument("--debug", action="store_true", help="Enable fuse debug")
p.add_argument("--nothreads", action="store_true",
help="Disable fuse threads")
-p.add_argument("--connect_timeout", type=int,
- default=Config.timeout[0], help="HTTP connect timeout")
-p.add_argument("--read_timeout", type=int,
- default=Config.timeout[1], help="HTTP read timeout")
+p.add_argument("--timeout", type=float, default=None,
+ help="HTTP connect and read timeout")
p.add_argument("--ssl", choices=["default", "system", "none"],
help="SSL Verification", default="default")
p.add_argument("--system-ca", default="/etc/ssl/certs/ca-certificates.crt",
@@ -29,7 +28,7 @@ p.add_argument("--system-ca", default="/etc/ssl/certs/ca-certificates.crt",
args = vars(p.parse_args(sys.argv[1:]))
-Config.timeout = (args.pop("connect_timeout"), args.pop("read_timeout"))
+Config.timeout = args.pop("timeout")
Config.mountpoint = args.pop("mountpoint")
Config.verify = args.pop("ssl")
Config.system_ca = args.pop("system_ca")
diff --git a/httpfs/__init__.py b/httpfs/__init__.py
index de4449c..b69c26e 100644
--- a/httpfs/__init__.py
+++ b/httpfs/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
import os
import sys
import time
@@ -17,12 +16,13 @@ import requests
class Config(object):
mountpoint = None
- timeout = (5, 25) # connect_timeout, read_timeout
+ timeout = None
verify = None
system_ca = None
class Path:
+
def __init__(self, parent, name):
self.parent = parent
self.name = name
@@ -48,17 +48,26 @@ class Path:
class File(Path):
+
def __init__(self, parent, name):
super().__init__(parent, name)
self.lastModified = None
self.size = None
+ self.mode = 0o444
def init(self):
url = self.buildUrl()
logging.info("File url={} name={}".format(url, self.name))
- r = self.getSession().head(url, timeout=Config.timeout)
+ r = self.getSession().head(url, timeout=Config.timeout,
+ allow_redirects=True)
r.close()
- if r.status_code != 200:
+ if r.status_code >= 400 and r.status_code <= 499:
+ self.size = 0
+ self.mode = 0o000
+ self.initialized = True
+ self.lastModified = 0
+ return
+ elif r.status_code != 200:
error = "Status code != 200 for {}: {}".format(url, r.status_code)
raise Exception(error)
self.size = int(r.headers['content-length'])
@@ -71,7 +80,7 @@ class File(Path):
if not self.initialized:
self.init()
url = self.buildUrl()
- bytesRange = '{}-{}'.format(offset, min(self.size, offset+size-1))
+ bytesRange = '{}-{}'.format(offset, min(self.size, offset + size - 1))
headers = {'range': 'bytes=' + bytesRange}
logging.info("File.get url={} range={}".format(url, bytesRange))
r = self.getSession().get(url, headers=headers, timeout=Config.timeout)
@@ -91,20 +100,30 @@ class File(Path):
if not self.initialized:
self.init()
t = self.lastModified
- return dict(st_mode=(S_IFREG | 0o444), st_nlink=1, st_size=self.size,
- st_ctime=t, st_mtime=t, st_atime=t)
+ return dict(st_mode=(S_IFREG | self.mode), st_nlink=1,
+ st_size=self.size,
+ st_ctime=t, st_mtime=t, st_atime=t,
+ st_uid=os.getuid(), st_gid=os.getgid())
class Directory(Path):
+
def __init__(self, parent, name):
super().__init__(parent, name)
self.entries = {}
+ self.mode = 0o555
def init(self):
url = self.buildUrl() + "/"
logging.info("Directory url={} name={}".format(url, self.name))
r = self.getSession().get(url, stream=True, timeout=Config.timeout)
- if r.status_code != 200:
+ if r.status_code >= 400 and r.status_code <= 499:
+ self.mode = 0o000
+ logging.info("Directory is 4xx {}".format(url))
+ r.close()
+ self.initialized = True
+ return
+ elif r.status_code != 200:
raise Exception("Status code not 200 for {}: {}".format(
url, r.status_code))
@@ -128,14 +147,17 @@ class Directory(Path):
nentries = 1
if self.initialized:
nentries += len(self.entries)
- return dict(st_mode=(S_IFDIR | 0o555), st_nlink=nentries,
- st_ctime=t, st_mtime=t, st_atime=t)
+ return dict(st_mode=(S_IFDIR | self.mode), st_nlink=nentries,
+ st_ctime=t, st_mtime=t, st_atime=t,
+ st_uid=os.getuid(), st_gid=os.getgid())
class Server(Directory):
+
def __init__(self, parent, name):
super().__init__(parent, name)
self.session = requests.Session()
+ self.session.allow_redirects = True
if Config.verify == "default":
pass
elif Config.verify == "system":
@@ -154,6 +176,7 @@ class Server(Directory):
class Schema(Directory):
+
def __init__(self, parent, name):
super().__init__(parent, name)
self.initialized = True
@@ -163,6 +186,7 @@ class Schema(Directory):
class Root(Directory):
+
def __init__(self):
super().__init__(None, "")
self.initialized = True
@@ -172,6 +196,7 @@ class Root(Directory):
class RelativeLinkCollector(HTMLParser):
+
def __init__(self, parent):
super().__init__(self, convert_charrefs=True)
self.parent = parent
@@ -195,6 +220,7 @@ class RelativeLinkCollector(HTMLParser):
class Httpfs(fuse.LoggingMixIn, fuse.Operations):
"""A read only http/https/ftp filesystem using python-requests."""
+
def __init__(self):
self.root = Root()
@@ -235,6 +261,7 @@ class Httpfs(fuse.LoggingMixIn, fuse.Operations):
schema, *p = path[1:].split("/")
if schema not in self.root.entries:
+ logging.debug("schema %s not in root.entries", schema)
return None
prevEntry = self.root.entries[schema]
if p == []:
@@ -262,14 +289,16 @@ class Httpfs(fuse.LoggingMixIn, fuse.Operations):
# the server don't return it, then just create it
# assuming its an directory, if a HEAD is successful
d = Directory.fromPath(prevEntry, lastElement)
- r = d.getSession().head(d.buildUrl(),
- timeout=Config.timeout)
+ url = d.buildUrl()
+ r = d.getSession().head(url, timeout=Config.timeout,
+ allow_redirects=True)
if r.status_code == 200:
- logging.info("Create directory for path which was not " +
- "discovered by Index of: {}".format(path))
+ logging.info("Create directory for path: {} " +
+ "at: {}".format(path, url))
prevEntry.entries[lastElement] = d
else:
- logging.info("Path not found: {}".format(path))
+ logging.info("Path not found ({}): {} for {}".format(
+ r.status_code, path, url))
return None
return prevEntry.entries[lastElement]
diff --git a/test/__init__.py b/test/__init__.py
index 9e0b053..caeb5e5 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -3,4 +3,4 @@ import logging
from test.test_httpfs import *
-logging.basicConfig(level=logging.ERROR)
+logging.basicConfig(level=logging.INFO)
diff --git a/test/test_httpfs.py b/test/test_httpfs.py
index bb5435c..b6c6b65 100644
--- a/test/test_httpfs.py
+++ b/test/test_httpfs.py
@@ -6,19 +6,22 @@ from stat import S_IFDIR, S_IFREG
from http.server import HTTPServer
from http.server import SimpleHTTPRequestHandler
-from httpfs import Httpfs
+import httpfs
-def translate_path(p):
- return "test/testwww/" + p
+def get_fs_path(p):
+ path = os.path.join(os.path.dirname(__file__),
+ "testwww", os.path.relpath(p))
+ logging.info("get_fs_path: translated path %s -> %s", p, path)
+ return path
class Handler(SimpleHTTPRequestHandler):
+
def translate_path(self, path):
- p = super().translate_path(path)
- pNew = translate_path(os.path.relpath(p))
- logging.info("translated %s to %s", p, pNew)
- return pNew
+ path_new = get_fs_path(super().translate_path(path))
+ logging.info("translated %s to %s", path, path_new)
+ return path_new
def handle_one_request(self, *a):
SimpleHTTPRequestHandler.handle_one_request(self, *a)
@@ -26,13 +29,16 @@ class Handler(SimpleHTTPRequestHandler):
class TestBase(TestCase):
+
def setUp(self):
self.server = HTTPServer(('', 0), Handler)
self.server.requests = []
self.server_thread = threading.Thread(target=self.server.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
- self.httpfs = Httpfs()
+
+ httpfs.Config.verify = "default"
+ self.httpfs = httpfs.Httpfs()
self.port = self.server.socket.getsockname()[1]
def tearDown(self):
@@ -43,11 +49,11 @@ class TestBase(TestCase):
return "/http/localhost:{}".format(self.port)
def stat(self, path):
- logging.info("Translate path %s -> %s", path, translate_path(path))
- return os.stat(translate_path(path))
+ return os.stat(get_fs_path(path))
class TestZwei(TestBase):
+
def test_root(self):
r = self.httpfs.readdir(self.basePath(), None)
self.assertEqual(len(self.server.requests), 2) # made 2 requests
@@ -65,9 +71,19 @@ class TestZwei(TestBase):
def test_dir(self):
r = self.httpfs.readdir(self.basePath() + "/", None)
self.assertEqual(len(r), 4)
+
r = self.httpfs.readdir(self.basePath() + "/dir", None)
- self.assertEqual(len(r), 4)
+ self.assertEqual(len(r), 5)
+
+ def test_subdir(self):
+ r = self.httpfs.readdir(self.basePath() + "/dir/subdir", None)
+ self.assertEqual(len(r), 3)
+
+ def test_dir_ending_slash(self):
+ r = self.httpfs.readdir(self.basePath() + "/dir/", None)
+ self.assertEqual(len(r), 5)
def test_read(self):
- r = self.httpfs.read(self.basePath() + "/dir/bla1", 1000, 0, None)
- self.assertEqual(r.strip(), b"bla1")
+ expected = open(get_fs_path("dir/bla1"), "rb").read()
+ result = self.httpfs.read(self.basePath() + "/dir/bla1", 1000, 0, None)
+ self.assertEqual(expected, result)
diff --git a/test/testwww/dir/subdir/empty b/test/testwww/dir/subdir/empty
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/testwww/dir/subdir/empty