Add a status API. Fix #5

This commit is contained in:
Alexis Métaireau 2016-06-20 18:46:30 +02:00
parent 728a90a7dd
commit ddb0eb69e3
No known key found for this signature in database
GPG Key ID: 1EDF5A7A485D4A11
6 changed files with 88 additions and 23 deletions

View File

@ -33,6 +33,12 @@ Optional parameters
- **description**: The description that will be embedded in the Zim file - **description**: The description that will be embedded in the Zim file
- **author**: The author of the content - **author**: The author of the content
Return values
-------------
- **job_id**: The job id is returned in JSON format. It can be used to know the
status of the process.
Status codes Status codes
------------ ------------
@ -41,6 +47,24 @@ Status codes
it contains information about what is missing. it contains information about what is missing.
- `201 Created` will be returned if the process started. - `201 Created` will be returned if the process started.
GET /status/{jobid}
===================
Retrieve the status of a job and displays the associated logs.
Return values
-------------
- **status**: The status of the job, it is one of 'queued', finished',
'failed', 'started' and 'deferred'.
- **log**: The logs of the job.
Status codes
------------
- `404 Not Found` will be returned in case the requested job does not exist.
- `200 OK` will be returned in any other case.
Okay, so how do I install it on my server? Okay, so how do I install it on my server?
########################################## ##########################################
@ -79,7 +103,7 @@ Installing the dependencies
:: ::
sudo apt-get install httrack libzim-dev libmagic-dev liblzma-dev libz-dev build-essential libtool redis-server automake pkg-config sudo apt-get install httrack libzim-dev libmagic-dev liblzma-dev libz-dev build-essential libtool libgumbo-dev redis-server automake pkg-config
Installing zimwriterfs Installing zimwriterfs
====================== ======================

View File

@ -5,8 +5,6 @@ from pyramid.static import static_view
from redis import Redis from redis import Redis
from rq import Queue from rq import Queue
from zimit import creator
def main(global_config, **settings): def main(global_config, **settings):
config = Configurator(settings=settings) config = Configurator(settings=settings)
@ -14,8 +12,6 @@ def main(global_config, **settings):
def attach_objects_to_request(event): def attach_objects_to_request(event):
event.request.queue = config.registry.queue event.request.queue = config.registry.queue
settings = event.request.registry.settings
event.request.zim_creator = creator.load_from_settings(settings)
config.add_subscriber(attach_objects_to_request, NewRequest) config.add_subscriber(attach_objects_to_request, NewRequest)

View File

@ -1,3 +1,4 @@
import os
import os.path import os.path
import shutil import shutil
import tempfile import tempfile
@ -21,17 +22,21 @@ class ZimCreator(object):
""" """
def __init__(self, zimwriterfs_bin, output_location, def __init__(self, zimwriterfs_bin, output_location,
author=DEFAULT_AUTHOR, httrack_bin=HTTRACK_BIN): author=DEFAULT_AUTHOR, httrack_bin=HTTRACK_BIN,
logging=None):
self.output_location = output_location self.output_location = output_location
self.author = author self.author = author
self.zimwriterfs_bin = zimwriterfs_bin self.zimwriterfs_bin = zimwriterfs_bin
self.httrack_bin = httrack_bin self.httrack_bin = httrack_bin
self.logging = logging
utils.ensure_paths_exists( utils.ensure_paths_exists(
self.zimwriterfs_bin, self.zimwriterfs_bin,
self.httrack_bin, self.httrack_bin,
self.output_location self.output_location)
)
def _spawn(self, cmd):
return utils.spawn(cmd, self.logging)
def download_website(self, url, destination_path): def download_website(self, url, destination_path):
"""Downloads the website using HTTrack and wait for the results to """Downloads the website using HTTrack and wait for the results to
@ -44,8 +49,7 @@ class ZimCreator(object):
The absolute location of a folder where the files will be written. The absolute location of a folder where the files will be written.
""" """
options = (self.httrack_bin, destination_path, url) options = (self.httrack_bin, destination_path, url)
p = utils.spawn("%s --path %s %s" % options) self._spawn("%s --path %s %s" % options)
p.wait() # Wait until the content is available (this is synchronous)
def prepare_website_folder(self, url, input_location): def prepare_website_folder(self, url, input_location):
"""Prepare the website files to make them ready to be embedded in a zim """Prepare the website files to make them ready to be embedded in a zim
@ -86,8 +90,7 @@ class ZimCreator(object):
' -d "{description}" -f {icon} -c "{author}"' ' -d "{description}" -f {icon} -c "{author}"'
' -p "{publisher}" {location} {output}' ' -p "{publisher}" {location} {output}'
).format(**zim_options) ).format(**zim_options)
p = utils.spawn(options) self._spawn(options)
p.wait()
return output_name return output_name
def create_zim_from_website(self, url, zim_options): def create_zim_from_website(self, url, zim_options):
@ -113,7 +116,7 @@ class ZimCreator(object):
return zim_file return zim_file
def load_from_settings(settings): def load_from_settings(settings, logging=None):
"""Load the ZimCreator object from the given pyramid settings, converting """Load the ZimCreator object from the given pyramid settings, converting
them to actual parameters. them to actual parameters.
@ -129,5 +132,6 @@ def load_from_settings(settings):
zimwriterfs_bin=settings['zimit.zimwriterfs_bin'], zimwriterfs_bin=settings['zimit.zimwriterfs_bin'],
httrack_bin=settings.get('zimit.httrack_bin'), httrack_bin=settings.get('zimit.httrack_bin'),
output_location=settings.get('zimit.output_location'), output_location=settings.get('zimit.output_location'),
author=settings.get('zimit.default_author') author=settings.get('zimit.default_author'),
logging=logging
) )

View File

@ -3,9 +3,17 @@ import shlex
import subprocess import subprocess
def spawn(cmd): def spawn(cmd, logfile=None):
"""Quick shortcut to spawn a command on the filesystem""" """Quick shortcut to spawn a command on the filesystem"""
return subprocess.Popen(shlex.split(cmd)) if logfile is not None:
with open(logfile, "w") as f:
prepared_cmd = shlex.split("stdbuf -o0 %s" % cmd)
process = subprocess.Popen(prepared_cmd, stdout=f)
else:
prepared_cmd = shlex.split(cmd)
process = subprocess.Popen(prepared_cmd)
process.wait()
return process
def ensure_paths_exists(*paths): def ensure_paths_exists(*paths):

View File

@ -1,11 +1,14 @@
import os
from cornice import Service from cornice import Service
from colander import MappingSchema, SchemaNode, String from colander import MappingSchema, SchemaNode, String
from pyramid.httpexceptions import HTTPTemporaryRedirect from pyramid.httpexceptions import HTTPTemporaryRedirect, HTTPNotFound
from zimit.worker import create_zim from zimit.worker import create_zim
website = Service(name='website', path='/website') website = Service(name='website', path='/website')
home = Service(name='home', path='/') home = Service(name='home', path='/')
status = Service(name='status', path='/status/{id}')
@home.get() @home.get()
@ -29,11 +32,32 @@ class WebSiteSchema(MappingSchema):
@website.post(schema=WebSiteSchema) @website.post(schema=WebSiteSchema)
def crawl_new_website(request): def crawl_new_website(request):
request.queue.enqueue( job = request.queue.enqueue(
create_zim, create_zim,
request.registry.settings, request.registry.settings,
request.zim_creator,
request.validated, request.validated,
timeout=1800) timeout=1800)
request.response.status_code = 201 request.response.status_code = 201
return {'success': True} return {
'job_id': job.id
}
@status.get()
def display_status(request):
job = request.queue.fetch_job(request.matchdict["id"])
if job is None:
raise HTTPNotFound()
log_dir = request.registry.settings.get('zimit.logdir', '/tmp')
log_file = os.path.join(log_dir, "%s.log" % job.id)
log_content = None
if os.path.exists(log_file):
with open(log_file) as f:
log_content = f.read()
return {
"status": job.status,
"log": log_content
}

View File

@ -1,11 +1,20 @@
from mailer import send_zim_url import os
import urlparse import urlparse
from rq import get_current_job
def create_zim(settings, zimCreator, options): from zimit.mailer import send_zim_url
from zimit.creator import load_from_settings
def create_zim(settings, options):
"""Call the zim creator and the mailer when it is finished. """Call the zim creator and the mailer when it is finished.
""" """
zim_file = zimCreator.create_zim_from_website(options['url'], options) job = get_current_job()
log_dir = settings.get('zimit.logdir', '/tmp')
log_file = os.path.join(log_dir, "%s.log" % job.id)
zim_creator = load_from_settings(settings, log_file)
zim_file = zim_creator.create_zim_from_website(options['url'], options)
output_url = settings.get('zimit.output_url') output_url = settings.get('zimit.output_url')
zim_url = urlparse.urljoin(output_url, zim_file) zim_url = urlparse.urljoin(output_url, zim_file)
send_zim_url(settings, options['email'], zim_url) send_zim_url(settings, options['email'], zim_url)