First working version

This commit is contained in:
Alexis Métaireau 2016-01-10 00:34:47 +01:00
commit 1e92a0f469
6 changed files with 193 additions and 0 deletions

27
README.rst Normal file
View File

@ -0,0 +1,27 @@
Create ZIM files out of HTTP websites
#####################################
Given any WebSite, get a ZIM file out of it!
How to use it?
##############
Install it using pip
::
$ pip install zimit
Then, run it how you want, for instance with pserve::
$ pserve zimit.ini
In a separate process, you also need to run the worker::
$ rqworker
Then, the only thing you need is an HTTP client to tell the proxy to download
a package. So you can go with your browser at
http://localhost:6543/blog.notmyidea.org
And it will download it at the location specified in the configuration file.

BIN
favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

32
setup.py Normal file
View File

@ -0,0 +1,32 @@
import os
from setuptools import setup, find_packages
here = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(here, 'README.rst')) as f:
README = f.read()
setup(name='zimit',
version=0.1,
description='zimit',
long_description=README,
classifiers=[
"Programming Language :: Python",
"Framework :: Pylons",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application"
],
keywords="web services",
author='',
author_email='',
url='',
packages=find_packages(),
include_package_data=True,
zip_safe=False,
install_requires=['cornice', 'waitress', 'rq', 'colander'],
entry_points="""\
[paste.app_factory]
main=zimit:main
""",
paster_plugins=['pyramid'])

48
zimit.ini Normal file
View File

@ -0,0 +1,48 @@
[app:main]
use = egg:zimit
pyramid.reload_templates = true
pyramid.debug_authorization = false
pyramid.debug_notfound = false
pyramid.debug_routematch = false
pyramid.debug_templates = true
pyramid.default_locale_name = en
zimit.zimwriterfs_bin = /home/alexis/dev/openzim/zimwriterfs/zimwriterfs
zimit.httrack_bin = /usr/bin/httrack
[server:main]
use = egg:waitress#main
host = 0.0.0.0
port = 6543
# Begin logging configuration
[loggers]
keys = root, gplayproxy
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = INFO
handlers = console
[logger_gplayproxy]
level = DEBUG
handlers =
qualname = gplayproxy
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s
# End logging configuration

22
zimit/__init__.py Normal file
View File

@ -0,0 +1,22 @@
import subprocess
import shlex
from pyramid.config import Configurator
from pyramid.events import NewRequest
from redis import Redis
from rq import Queue
def main(global_config, **settings):
config = Configurator(settings=settings)
config.registry.queue = Queue(connection=Redis())
def attach_objects_to_request(event):
event.request.queue = config.registry.queue
config.add_subscriber(attach_objects_to_request, NewRequest)
config.include("cornice")
config.scan("zimit.views")
return config.make_wsgi_app()

64
zimit/views.py Normal file
View File

@ -0,0 +1,64 @@
import tempfile
import subprocess
import shlex
import shutil
from cornice import Service
from colander import MappingSchema, SchemaNode, String, drop
zimwriterfs_bin = "/home/alexis/dev/openzim/zimwriterfs/zimwriterfs"
httrack_bin = "/usr/bin/httrack"
default_author = "Alexis Metaireau"
def spawn(cmd):
print cmd
return subprocess.Popen(shlex.split(cmd))
def zim_it(config):
location = download_website(config['url'])
create_zim(location, config)
def download_website(url):
path = tempfile.mkdtemp("website")
p = spawn("%s --path %s %s" % (httrack_bin, path, url))
p.wait()
shutil.copy('/home/alexis/dev/zimit/favicon.ico', path)
return path
def create_zim(location, config):
config.update({
'bin': zimwriterfs_bin,
'location': location,
'output': 'test.zim',
'icon': 'favicon.ico',
'publisher': 'Alexis Metaireau',
})
# Spawn zimwriterfs with the correct options.
p = spawn(('{bin} -w "{welcome}" -l "{language}" -t "{title}"'
' -d "{description}" -f {icon} -c "{author}"'
' -p "{publisher}" {location} {output}').format(**config))
p.wait()
class WebSiteSchema(MappingSchema):
url = SchemaNode(String(), location="body", type='str')
title = SchemaNode(String(), location="body", type='str')
email = SchemaNode(String(), location="body", type='str')
description = SchemaNode(String(), default="-",
location="body", type='str')
author = SchemaNode(String(), default=default_author,
location="body", type='str')
welcome = SchemaNode(String(), default="index.html",
location="body", type='str')
language = SchemaNode(String(), default="en",
location="body", type='str')
webpage = Service(name='website', path='/website')
@webpage.post(schema=WebSiteSchema)
def crawl_new_website(request):
request.queue.enqueue(zim_it, request.validated, timeout=1800)
request.response.status_code = 201
return {'success': True}