mirror of
https://github.com/kiwix/kiwix-tools.git
synced 2025-09-20 18:39:16 -04:00
+ new trunk zimlib
This commit is contained in:
parent
189d97b220
commit
3d44961ead
@ -80,7 +80,7 @@ static int accessHandlerCallback(void *cls,
|
||||
/* Load the article from the ZIM file */
|
||||
cout << "Loading '" << title << "' in namespace '" << ns << "'" << endl;
|
||||
try {
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns[0], zim::QUnicodeString(title));
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns[0], title);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include <string>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/dirent.h>
|
||||
#include <zim/qunicode.h>
|
||||
#include <zim/file.h>
|
||||
#include <limits>
|
||||
#include <iosfwd>
|
||||
@ -50,11 +49,13 @@ namespace zim
|
||||
|
||||
std::string getParameter() const { return getDirent().getParameter(); }
|
||||
|
||||
QUnicodeString getTitle() const { return getDirent().getTitle(); }
|
||||
std::string getTitle() const { return getDirent().getTitle(); }
|
||||
std::string getUrl() const { return getDirent().getUrl(); }
|
||||
std::string getLongUrl() const { return getDirent().getLongUrl(); }
|
||||
|
||||
MimeType getLibraryMimeType() const { return getDirent().getMimeType(); }
|
||||
uint16_t getLibraryMimeType() const { return getDirent().getMimeType(); }
|
||||
const std::string&
|
||||
getMimeType() const;
|
||||
getMimeType() const { return file.getMimeType(getLibraryMimeType()); }
|
||||
|
||||
bool isRedirect() const { return getDirent().isRedirect(); }
|
||||
|
||||
@ -67,8 +68,8 @@ namespace zim
|
||||
|
||||
bool operator< (const Article& a) const
|
||||
{ return getNamespace() < a.getNamespace()
|
||||
|| getNamespace() == a.getNamespace()
|
||||
&& getTitle() < a.getTitle(); }
|
||||
|| (getNamespace() == a.getNamespace()
|
||||
&& getTitle() < a.getTitle()); }
|
||||
|
||||
Cluster getCluster() const
|
||||
{ return file.getCluster(getDirent().getClusterNumber()); }
|
||||
@ -87,8 +88,6 @@ namespace zim
|
||||
File& getFile() { return file; }
|
||||
size_type getIndex() const { return idx; }
|
||||
|
||||
QUnicodeString getUrl() const { return getDirent().getUrl(); }
|
||||
|
||||
bool good() const { return idx != std::numeric_limits<size_type>::max(); }
|
||||
};
|
||||
|
||||
|
@ -42,9 +42,9 @@ namespace zim
|
||||
{ }
|
||||
|
||||
Blob(ClusterImpl* cluster, const char* data, unsigned size)
|
||||
: _cluster(cluster),
|
||||
_data(data),
|
||||
_size(size)
|
||||
: _data(data),
|
||||
_size(size),
|
||||
_cluster(cluster)
|
||||
{ }
|
||||
|
||||
const char* data() const { return _data; }
|
||||
|
@ -22,14 +22,16 @@
|
||||
|
||||
#include <string>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/qunicode.h>
|
||||
#include <limits>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Dirent
|
||||
{
|
||||
bool redirect;
|
||||
MimeType mimeType;
|
||||
uint16_t mimeType;
|
||||
|
||||
size_type version;
|
||||
|
||||
size_type clusterNumber; // only used when redirect is false
|
||||
size_type blobNumber; // only used when redirect is false
|
||||
@ -37,14 +39,18 @@ namespace zim
|
||||
size_type redirectIndex; // only used when redirect is true
|
||||
|
||||
char ns;
|
||||
QUnicodeString title;
|
||||
std::string title;
|
||||
std::string url;
|
||||
std::string parameter;
|
||||
|
||||
public:
|
||||
Dirent() {}
|
||||
|
||||
bool isRedirect() const { return redirect; }
|
||||
MimeType getMimeType() const { return mimeType; }
|
||||
uint16_t getMimeType() const { return mimeType; }
|
||||
|
||||
size_type getVersion() const { return version; }
|
||||
void setVersion(size_type v) { version = v; }
|
||||
|
||||
size_type getClusterNumber() const { return isRedirect() ? 0 : clusterNumber; }
|
||||
size_type getBlobNumber() const { return isRedirect() ? 0 : blobNumber; }
|
||||
@ -54,26 +60,28 @@ namespace zim
|
||||
size_type getRedirectIndex() const { return isRedirect() ? redirectIndex : 0; }
|
||||
|
||||
char getNamespace() const { return ns; }
|
||||
const QUnicodeString& getTitle() const { return title; }
|
||||
const std::string& getTitle() const { return title.empty() ? url : title; }
|
||||
const std::string& getUrl() const { return url; }
|
||||
std::string getLongUrl() const;
|
||||
const std::string& getParameter() const { return parameter; }
|
||||
|
||||
uint16_t getExtraLen() const
|
||||
{
|
||||
uint16_t s = title.getValue().size();
|
||||
if (!parameter.empty())
|
||||
s += (parameter.size() + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
unsigned getDirentSize() const
|
||||
{
|
||||
return (isRedirect() ? 10 : 14) + getExtraLen();
|
||||
unsigned ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
|
||||
if (title != url)
|
||||
ret += title.size();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void setTitle(char ns_, const QUnicodeString& title_)
|
||||
void setTitle(const std::string& title_)
|
||||
{
|
||||
title = title_;
|
||||
}
|
||||
|
||||
void setUrl(char ns_, const std::string& url_)
|
||||
{
|
||||
ns = ns_;
|
||||
title = title_;
|
||||
url = url_;
|
||||
}
|
||||
|
||||
void setParameter(const std::string& parameter_)
|
||||
@ -85,12 +93,12 @@ namespace zim
|
||||
{
|
||||
redirect = true;
|
||||
redirectIndex = idx;
|
||||
mimeType = zimMimeNone;
|
||||
mimeType = std::numeric_limits<uint16_t>::max();
|
||||
clusterNumber = 0;
|
||||
blobNumber = 0;
|
||||
}
|
||||
|
||||
void setArticle(MimeType mimeType_, size_type clusterNumber_, size_type blobNumber_)
|
||||
void setArticle(uint16_t mimeType_, size_type clusterNumber_, size_type blobNumber_)
|
||||
{
|
||||
redirect = false;
|
||||
mimeType = mimeType_;
|
||||
@ -98,7 +106,6 @@ namespace zim
|
||||
blobNumber = blobNumber_;
|
||||
}
|
||||
|
||||
QUnicodeString getUrl() const;
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Dirent& fh);
|
||||
|
@ -45,11 +45,14 @@ namespace zim
|
||||
const std::string& getFilename() const { return impl->getFilename(); }
|
||||
const Fileheader& getFileheader() const { return impl->getFileheader(); }
|
||||
|
||||
Dirent getDirent(size_type idx);
|
||||
Dirent getDirent(size_type idx) { return impl->getDirent(idx); }
|
||||
Dirent getDirentByTitle(size_type idx) { return impl->getDirentByTitle(idx); }
|
||||
size_type getCountArticles() const { return impl->getCountArticles(); }
|
||||
|
||||
Article getArticle(size_type idx) const;
|
||||
Article getArticle(char ns, const QUnicodeString& title, bool collate = false);
|
||||
Article getArticle(char ns, const std::string& url);
|
||||
Article getArticleByTitle(size_type idx);
|
||||
Article getArticleByTitle(char ns, const std::string& title);
|
||||
|
||||
Cluster getCluster(size_type idx) const { return impl->getCluster(idx); }
|
||||
size_type getCountClusters() const { return impl->getCountClusters(); }
|
||||
@ -72,12 +75,17 @@ namespace zim
|
||||
class const_iterator;
|
||||
|
||||
const_iterator begin();
|
||||
const_iterator beginByTitle();
|
||||
const_iterator end();
|
||||
std::pair<bool, const_iterator> findx(char ns, const QUnicodeString& title, bool collate = false);
|
||||
const_iterator find(char ns, const QUnicodeString& title, bool collate = false);
|
||||
std::pair<bool, const_iterator> findxByTitle(char ns, const std::string& title);
|
||||
std::pair<bool, const_iterator> findx(char ns, const std::string& url);
|
||||
const_iterator findByTitle(char ns, const std::string& title);
|
||||
const_iterator find(char ns, const std::string& url);
|
||||
|
||||
bool good() const { return impl.getPointer() != 0; }
|
||||
time_t getMTime() const { return impl->getMTime(); }
|
||||
|
||||
const std::string& getMimeType(uint16_t idx) const { return impl->getMimeType(idx); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -38,7 +38,9 @@ namespace zim
|
||||
private:
|
||||
Uuid uuid;
|
||||
size_type articleCount;
|
||||
offset_type indexPtrPos;
|
||||
offset_type titleIdxPos;
|
||||
offset_type urlPtrPos;
|
||||
offset_type mimeListPos;
|
||||
size_type blobCount;
|
||||
offset_type blobPtrPos;
|
||||
size_type mainPage;
|
||||
@ -47,7 +49,8 @@ namespace zim
|
||||
public:
|
||||
Fileheader()
|
||||
: articleCount(0),
|
||||
indexPtrPos(0),
|
||||
titleIdxPos(0),
|
||||
urlPtrPos(0),
|
||||
blobCount(0),
|
||||
blobPtrPos(0),
|
||||
mainPage(std::numeric_limits<size_type>::max()),
|
||||
@ -60,22 +63,28 @@ namespace zim
|
||||
size_type getArticleCount() const { return articleCount; }
|
||||
void setArticleCount(size_type s) { articleCount = s; }
|
||||
|
||||
offset_type getIndexPtrPos() const { return indexPtrPos; }
|
||||
void setIndexPtrPos(offset_type p) { indexPtrPos = p; }
|
||||
offset_type getTitleIdxPos() const { return titleIdxPos; }
|
||||
void setTitleIdxPos(offset_type p) { titleIdxPos = p; }
|
||||
|
||||
size_type getClusterCount() const { return blobCount; }
|
||||
void setClusterCount(size_type s) { blobCount = s; }
|
||||
offset_type getUrlPtrPos() const { return urlPtrPos; }
|
||||
void setUrlPtrPos(offset_type p) { urlPtrPos = p; }
|
||||
|
||||
offset_type getMimeListPos() const { return mimeListPos; }
|
||||
void setMimeListPos(offset_type p) { mimeListPos = p; }
|
||||
|
||||
size_type getClusterCount() const { return blobCount; }
|
||||
void setClusterCount(size_type s) { blobCount = s; }
|
||||
|
||||
offset_type getClusterPtrPos() const { return blobPtrPos; }
|
||||
void setClusterPtrPos(offset_type p) { blobPtrPos = p; }
|
||||
|
||||
bool hasMainPage() const { return mainPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getMainPage() const { return mainPage; }
|
||||
void setMainPage(size_type s) { mainPage = s; }
|
||||
bool hasMainPage() const { return mainPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getMainPage() const { return mainPage; }
|
||||
void setMainPage(size_type s) { mainPage = s; }
|
||||
|
||||
bool hasLayoutPage() const { return layoutPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getLayoutPage() const { return layoutPage; }
|
||||
void setLayoutPage(size_type s) { layoutPage = s; }
|
||||
bool hasLayoutPage() const { return layoutPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getLayoutPage() const { return layoutPage; }
|
||||
void setLayoutPage(size_type s) { layoutPage = s; }
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Fileheader& fh);
|
||||
|
@ -26,7 +26,6 @@
|
||||
#include <map>
|
||||
#include <zim/refcounted.h>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/qunicode.h>
|
||||
#include <zim/fileheader.h>
|
||||
#include <zim/cache.h>
|
||||
#include <zim/dirent.h>
|
||||
@ -40,10 +39,6 @@ namespace zim
|
||||
Fileheader header;
|
||||
std::string filename;
|
||||
|
||||
typedef std::vector<offset_type> OffsetsType;
|
||||
OffsetsType indexOffsets;
|
||||
OffsetsType clusterOffsets;
|
||||
|
||||
Cache<size_type, Dirent> direntCache;
|
||||
Cache<offset_type, Cluster> clusterCache;
|
||||
typedef std::map<char, size_type> NamespaceCache;
|
||||
@ -53,6 +48,11 @@ namespace zim
|
||||
std::string namespaces;
|
||||
time_t mtime;
|
||||
|
||||
typedef std::vector<std::string> MimeTypes;
|
||||
MimeTypes mimeTypes;
|
||||
|
||||
offset_type getOffset(offset_type ptrOffset, size_type idx);
|
||||
|
||||
public:
|
||||
explicit FileImpl(const char* fname);
|
||||
|
||||
@ -62,11 +62,13 @@ namespace zim
|
||||
const Fileheader& getFileheader() const { return header; }
|
||||
|
||||
Dirent getDirent(size_type idx);
|
||||
size_type getCountArticles() const { return indexOffsets.size(); }
|
||||
Dirent getDirentByTitle(size_type idx);
|
||||
size_type getIndexByTitle(size_type idx);
|
||||
size_type getCountArticles() const { return header.getArticleCount(); }
|
||||
|
||||
Cluster getCluster(size_type idx);
|
||||
size_type getCountClusters() const { return clusterOffsets.size(); }
|
||||
offset_type getClusterOffset(size_type idx) const { return clusterOffsets[idx]; }
|
||||
size_type getCountClusters() const { return header.getClusterCount(); }
|
||||
offset_type getClusterOffset(size_type idx) { return getOffset(header.getClusterPtrPos(), idx); }
|
||||
|
||||
size_type getNamespaceBeginOffset(char ch);
|
||||
size_type getNamespaceEndOffset(char ch);
|
||||
@ -76,6 +78,7 @@ namespace zim
|
||||
std::string getNamespaces();
|
||||
bool hasNamespace(char ch);
|
||||
|
||||
const std::string& getMimeType(uint16_t idx) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -27,30 +27,40 @@ namespace zim
|
||||
{
|
||||
class File::const_iterator : public std::iterator<std::bidirectional_iterator_tag, Article>
|
||||
{
|
||||
public:
|
||||
enum Mode {
|
||||
UrlIterator,
|
||||
ArticleIterator
|
||||
};
|
||||
|
||||
private:
|
||||
File* file;
|
||||
size_type idx;
|
||||
mutable Article article;
|
||||
Mode mode;
|
||||
|
||||
bool is_end() const { return file == 0 || idx >= file->getCountArticles(); }
|
||||
|
||||
public:
|
||||
explicit const_iterator(File* file_ = 0, size_type idx_ = 0)
|
||||
explicit const_iterator(File* file_ = 0, size_type idx_ = 0, Mode mode_ = UrlIterator)
|
||||
: file(file_),
|
||||
idx(idx_)
|
||||
idx(idx_),
|
||||
mode(mode_)
|
||||
{ }
|
||||
|
||||
size_type getIndex() const { return idx; }
|
||||
const File& getFile() const { return *file; }
|
||||
|
||||
bool operator== (const const_iterator& it) const
|
||||
{ return is_end() && it.is_end()
|
||||
|| file == it.file && idx == it.idx; }
|
||||
{ return (is_end() && it.is_end())
|
||||
|| (file == it.file && idx == it.idx); }
|
||||
bool operator!= (const const_iterator& it) const
|
||||
{ return !operator==(it); }
|
||||
|
||||
const_iterator& operator++()
|
||||
{
|
||||
++idx;
|
||||
article = Article();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -64,6 +74,7 @@ namespace zim
|
||||
const_iterator& operator--()
|
||||
{
|
||||
--idx;
|
||||
article = Article();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -74,17 +85,17 @@ namespace zim
|
||||
return *this;
|
||||
}
|
||||
|
||||
Article operator*() const
|
||||
const Article& operator*() const
|
||||
{
|
||||
if (article.getIndex() != idx)
|
||||
article = file->getArticle(idx);
|
||||
if (!article.good())
|
||||
article = mode == UrlIterator ? file->getArticle(idx)
|
||||
: file->getArticleByTitle(idx);
|
||||
return article;
|
||||
}
|
||||
|
||||
pointer operator->() const
|
||||
{
|
||||
if (article.getIndex() != idx)
|
||||
article = file->getArticle(idx);
|
||||
operator*();
|
||||
return &article;
|
||||
}
|
||||
|
||||
|
94
src/zimlib/include/zim/lzmastream.h
Normal file
94
src/zimlib/include/zim/lzmastream.h
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_LZMASTREAM_H
|
||||
#define ZIM_LZMASTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <lzma.h>
|
||||
#include <vector>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class LzmaError : public std::runtime_error
|
||||
{
|
||||
lzma_ret ret;
|
||||
|
||||
public:
|
||||
LzmaError(lzma_ret ret_, const std::string& msg)
|
||||
: std::runtime_error(msg),
|
||||
ret(ret_)
|
||||
{ }
|
||||
|
||||
lzma_ret getRetcode() const { return ret; }
|
||||
};
|
||||
|
||||
class LzmaStreamBuf : public std::streambuf
|
||||
{
|
||||
lzma_stream stream;
|
||||
std::vector<char_type> obuffer;
|
||||
std::streambuf* sink;
|
||||
|
||||
public:
|
||||
LzmaStreamBuf(std::streambuf* sink_,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192);
|
||||
~LzmaStreamBuf();
|
||||
|
||||
/// see std::streambuf
|
||||
int_type overflow(int_type c);
|
||||
/// see std::streambuf
|
||||
int_type underflow();
|
||||
/// see std::streambuf
|
||||
int sync();
|
||||
/// end stream
|
||||
int end();
|
||||
|
||||
void setSink(std::streambuf* sink_) { sink = sink_; }
|
||||
};
|
||||
|
||||
class LzmaStream : public std::ostream
|
||||
{
|
||||
LzmaStreamBuf streambuf;
|
||||
|
||||
public:
|
||||
explicit LzmaStream(std::streambuf* sink,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192)
|
||||
: std::ostream(0),
|
||||
streambuf(sink, preset, check, bufsize)
|
||||
{ init(&streambuf); }
|
||||
explicit LzmaStream(std::ostream& sink,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192)
|
||||
: std::ostream(0),
|
||||
streambuf(sink.rdbuf(), preset, check, bufsize)
|
||||
{ init(&streambuf); }
|
||||
|
||||
void end();
|
||||
void setSink(std::streambuf* sink) { streambuf.setSink(sink); }
|
||||
void setSink(std::ostream& sink) { streambuf.setSink(sink.rdbuf()); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_LZMASTREAM_H
|
@ -93,8 +93,8 @@ namespace zim
|
||||
{ }
|
||||
|
||||
void search(Results& results, const std::string& expr);
|
||||
void find(Results& results, char ns, const QUnicodeString& praefix, unsigned limit = searchLimit);
|
||||
void find(Results& results, char ns, const QUnicodeString& begin, const QUnicodeString& end, unsigned limit = searchLimit);
|
||||
void find(Results& results, char ns, const std::string& praefix, unsigned limit = searchLimit);
|
||||
void find(Results& results, char ns, const std::string& begin, const std::string& end, unsigned limit = searchLimit);
|
||||
|
||||
static double getWeightOcc() { return weightOcc; }
|
||||
static double getWeightOccOff() { return weightOccOff; }
|
||||
|
@ -31,25 +31,6 @@
|
||||
|
||||
namespace zim
|
||||
{
|
||||
template <typename objectType>
|
||||
class InternalRefCounted
|
||||
{
|
||||
protected:
|
||||
bool unlink(objectType* object)
|
||||
{
|
||||
if (object)
|
||||
object->release();
|
||||
return false;
|
||||
}
|
||||
|
||||
void link(const InternalRefCounted& ptr, objectType* object)
|
||||
{
|
||||
if (object)
|
||||
object->addRef();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <typename objectType>
|
||||
class SmartPtr
|
||||
{
|
||||
|
@ -32,7 +32,7 @@ namespace zim
|
||||
public:
|
||||
virtual void onData(const std::string& data) = 0;
|
||||
virtual void onToken(const std::string& token) = 0;
|
||||
virtual void onLink(char ns, const std::string& title) = 0;
|
||||
virtual void onLink(char ns, const std::string& url) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
91
src/zimlib/include/zim/unlzmastream.h
Normal file
91
src/zimlib/include/zim/unlzmastream.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ZIM_UNLZMASTREAM_H
|
||||
#define ZIM_UNLZMASTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <lzma.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class UnlzmaError : public std::runtime_error
|
||||
{
|
||||
lzma_ret ret;
|
||||
|
||||
public:
|
||||
UnlzmaError(lzma_ret ret_, const std::string& msg)
|
||||
: std::runtime_error(msg),
|
||||
ret(ret_)
|
||||
{ }
|
||||
|
||||
lzma_ret getRetcode() const { return ret; }
|
||||
};
|
||||
|
||||
class UnlzmaStreamBuf : public std::streambuf
|
||||
{
|
||||
lzma_stream stream;
|
||||
char_type* iobuffer;
|
||||
unsigned bufsize;
|
||||
std::streambuf* sinksource;
|
||||
|
||||
char_type* ibuffer() { return iobuffer; }
|
||||
std::streamsize ibuffer_size() { return bufsize >> 1; }
|
||||
char_type* obuffer() { return iobuffer + ibuffer_size(); }
|
||||
std::streamsize obuffer_size() { return bufsize >> 1; }
|
||||
|
||||
public:
|
||||
explicit UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize = 8192);
|
||||
~UnlzmaStreamBuf();
|
||||
|
||||
/// see std::streambuf
|
||||
int_type overflow(int_type c);
|
||||
/// see std::streambuf
|
||||
int_type underflow();
|
||||
/// see std::streambuf
|
||||
int sync();
|
||||
|
||||
void setSinksource(std::streambuf* sinksource_) { sinksource = sinksource_; }
|
||||
};
|
||||
|
||||
class UnlzmaStream : public std::iostream
|
||||
{
|
||||
UnlzmaStreamBuf streambuf;
|
||||
|
||||
public:
|
||||
explicit UnlzmaStream(std::streambuf* sinksource, unsigned bufsize = 8192)
|
||||
: std::iostream(0),
|
||||
streambuf(sinksource, bufsize)
|
||||
{ init(&streambuf); }
|
||||
explicit UnlzmaStream(std::ios& sinksource, unsigned bufsize = 8192)
|
||||
: std::iostream(0),
|
||||
streambuf(sinksource.rdbuf(), bufsize)
|
||||
{ init(&streambuf); }
|
||||
|
||||
void setSinksource(std::streambuf* sinksource) { streambuf.setSinksource(sinksource); }
|
||||
void setSinksource(std::ios& sinksource) { streambuf.setSinksource(sinksource.rdbuf()); }
|
||||
void setSink(std::ostream& sink) { streambuf.setSinksource(sink.rdbuf()); }
|
||||
void setSource(std::istream& source) { streambuf.setSinksource(source.rdbuf()); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_UNLZMASTREAM_H
|
||||
|
@ -42,23 +42,7 @@ namespace zim
|
||||
zimcompLzma
|
||||
};
|
||||
|
||||
enum MimeType
|
||||
{
|
||||
zimMimeNone = -1,
|
||||
zimMimeTextHtml,
|
||||
zimMimeTextPlain,
|
||||
zimMimeImageJpeg,
|
||||
zimMimeImagePng,
|
||||
zimMimeImageTiff,
|
||||
zimMimeTextCss,
|
||||
zimMimeImageGif,
|
||||
zimMimeIndex,
|
||||
zimMimeApplicationJavaScript,
|
||||
zimMimeImageIcon,
|
||||
zimMimeTextXml,
|
||||
zimMimeTextHtmlTemplate
|
||||
};
|
||||
|
||||
static const char MimeHtmlTemplate[] = "text/x-zim-htmltemplate";
|
||||
}
|
||||
|
||||
#endif // ZIM_ZIM_H
|
||||
|
@ -24,32 +24,74 @@
|
||||
#include <iostream>
|
||||
#include <zim/zim.h>
|
||||
|
||||
/*
|
||||
ZInt implements a int compressor and decompressor. The algorithm compresses
|
||||
small values into fewer bytes.
|
||||
|
||||
The idea is to add information about used bytes in the first byte. The number
|
||||
of additional bytes used is specified by the number of set bits counted from
|
||||
the most significant bit. So the numbers 0-127 are encoded as is, since they
|
||||
fit into the 7 low order bits and the high order bit specifies, that no
|
||||
additional bytes are used. The number starting from 128 up to 16383 need more
|
||||
than 7 bits, so we need to set the highest order bit to 1 and the next bit to
|
||||
0, leaving 6 bits of actual data, which is used as the low order bits of the
|
||||
number.
|
||||
|
||||
Since the numbers 0-127 are already encoded in one byte, the 127 is
|
||||
substracted from the actual number, so a 2 byte zero is actually a 128.
|
||||
|
||||
The same logic continues on the 3rd, 4th, ... byte. Up to 7 additional bytes
|
||||
are used, so the first byte must contain at least one 0.
|
||||
|
||||
binary range
|
||||
------------------------------- --------------------------------------------------
|
||||
0xxx xxxx 0 - 127
|
||||
10xx xxxx xxxx xxxx 128 - (2^14+128-1 = 16511)
|
||||
110x xxxx xxxx xxxx xxxx xxxx 16512 - (2^21+16512-1 = 2113663)
|
||||
1110 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
|
||||
2113664 - (2^28+2113664-1 = 270549119)
|
||||
...
|
||||
|
||||
*/
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class IZIntStream
|
||||
class ZIntStream
|
||||
{
|
||||
std::istream& stream;
|
||||
std::istream* _istream;
|
||||
std::ostream* _ostream;
|
||||
|
||||
public:
|
||||
explicit IZIntStream(std::istream& stream_)
|
||||
: stream(stream_)
|
||||
{ }
|
||||
/// prepare ZIntStream for compression or decompression
|
||||
explicit ZIntStream(std::iostream& iostream)
|
||||
: _istream(&iostream),
|
||||
_ostream(&iostream)
|
||||
{ }
|
||||
|
||||
IZIntStream& get(size_type &value);
|
||||
operator void*() const { return stream; }
|
||||
};
|
||||
/// prepare ZIntStream for decompression
|
||||
explicit ZIntStream(std::istream& istream)
|
||||
: _istream(&istream),
|
||||
_ostream(0)
|
||||
{ }
|
||||
|
||||
class OZIntStream
|
||||
{
|
||||
std::ostream& stream;
|
||||
/// prepare ZIntStream for compression
|
||||
explicit ZIntStream(std::ostream& ostream)
|
||||
: _istream(0),
|
||||
_ostream(&ostream)
|
||||
{ }
|
||||
|
||||
public:
|
||||
explicit OZIntStream(std::ostream& stream_)
|
||||
: stream(stream_)
|
||||
{ }
|
||||
/// decompresses one value from input stream and returns it
|
||||
size_type get();
|
||||
|
||||
OZIntStream& put(size_type value);
|
||||
operator void*() const { return stream; }
|
||||
ZIntStream& get(size_type &value)
|
||||
{ value = get(); return *this; }
|
||||
|
||||
/// compresses one value to output stream
|
||||
ZIntStream& put(size_type value);
|
||||
|
||||
operator bool() const
|
||||
{ return (_istream == 0 || *_istream)
|
||||
&& (_ostream == 0 || *_ostream); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -28,50 +28,6 @@ log_define("zim.article")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
const std::string& Article::getMimeType() const
|
||||
{
|
||||
static const std::string textHtml = "text/html; charset=UTF-8";
|
||||
static const std::string textPlain = "text/plain";
|
||||
static const std::string textXml = "application/xml";
|
||||
static const std::string imageJpeg = "image/jpeg";
|
||||
static const std::string imagePng = "image/png";
|
||||
static const std::string imageTiff = "image/tiff";
|
||||
static const std::string textCss = "text/css";
|
||||
static const std::string imageGif = "image/gif";
|
||||
static const std::string index = "text/plain";
|
||||
static const std::string applicationJavaScript = "application/x-javascript";
|
||||
static const std::string imageIcon = "image/x-icon";
|
||||
|
||||
switch (getLibraryMimeType())
|
||||
{
|
||||
case zimMimeTextHtml:
|
||||
case zimMimeTextHtmlTemplate:
|
||||
return textHtml;
|
||||
case zimMimeTextPlain:
|
||||
return textPlain;
|
||||
case zimMimeImageJpeg:
|
||||
return imageJpeg;
|
||||
case zimMimeImagePng:
|
||||
return imagePng;
|
||||
case zimMimeImageTiff:
|
||||
return imageTiff;
|
||||
case zimMimeTextCss:
|
||||
return textCss;
|
||||
case zimMimeImageGif:
|
||||
return imageGif;
|
||||
case zimMimeIndex:
|
||||
return index;
|
||||
case zimMimeApplicationJavaScript:
|
||||
return applicationJavaScript;
|
||||
case zimMimeImageIcon:
|
||||
return imageIcon;
|
||||
case zimMimeTextXml:
|
||||
return textXml;
|
||||
}
|
||||
|
||||
return textHtml;
|
||||
}
|
||||
|
||||
size_type Article::getArticleSize() const
|
||||
{
|
||||
Dirent dirent = getDirent();
|
||||
@ -108,9 +64,9 @@ namespace zim
|
||||
log_trace("onToken(\"" << token << "\")");
|
||||
|
||||
if (token == "title")
|
||||
out << article.getTitle().toUtf8();
|
||||
out << article.getTitle();
|
||||
else if (token == "url")
|
||||
out << article.getUrl().toUtf8();
|
||||
out << article.getUrl();
|
||||
else if (token == "namespace")
|
||||
out << article.getNamespace();
|
||||
else if (token == "content")
|
||||
@ -126,11 +82,11 @@ namespace zim
|
||||
}
|
||||
}
|
||||
|
||||
void Ev::onLink(char ns, const std::string& title)
|
||||
void Ev::onLink(char ns, const std::string& url)
|
||||
{
|
||||
if (maxRecurse <= 0)
|
||||
throw std::runtime_error("maximum recursive limit is reached");
|
||||
article.getFile().getArticle(ns, QUnicodeString::fromUtf8(title)).getPage(out, false, maxRecurse - 1);
|
||||
article.getFile().getArticle(ns, url).getPage(out, false, maxRecurse - 1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -146,7 +102,7 @@ namespace zim
|
||||
{
|
||||
log_trace("Article::getPage(" << layout << ", " << maxRecurse << ')');
|
||||
|
||||
if (getLibraryMimeType() == zimMimeTextHtml || getLibraryMimeType() == zimMimeTextHtmlTemplate)
|
||||
if (getMimeType().compare(0, 9, "text/html") == 0 || getMimeType() == MimeHtmlTemplate)
|
||||
{
|
||||
if (layout && file.getFileheader().hasLayoutPage())
|
||||
{
|
||||
@ -162,7 +118,7 @@ namespace zim
|
||||
|
||||
return;
|
||||
}
|
||||
else if (getLibraryMimeType() == zimMimeTextHtmlTemplate)
|
||||
else if (getMimeType() == MimeHtmlTemplate)
|
||||
{
|
||||
Blob data = getData();
|
||||
|
||||
|
@ -43,7 +43,7 @@ namespace zim
|
||||
|
||||
for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
|
||||
{
|
||||
std::string title = it->getTitle().toUtf8();
|
||||
std::string title = it->getTitle();
|
||||
if (title.find(expr) != std::string::npos)
|
||||
ret.push_back(*it);
|
||||
}
|
||||
|
@ -66,8 +66,6 @@ namespace zim
|
||||
|
||||
Bunzip2StreamBuf::int_type Bunzip2StreamBuf::overflow(int_type c)
|
||||
{
|
||||
log_debug("Bunzip2StreamBuf::overflow");
|
||||
|
||||
if (pptr())
|
||||
{
|
||||
// initialize input-stream for
|
||||
@ -81,10 +79,8 @@ namespace zim
|
||||
stream.next_out = ibuffer();
|
||||
stream.avail_out = ibuffer_size();
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
ret = ::BZ2_bzDecompress(&stream);
|
||||
checkError(ret, stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);
|
||||
|
||||
// copy ibuffer to sinksource
|
||||
std::streamsize count = ibuffer_size() - stream.avail_out;
|
||||
@ -118,14 +114,12 @@ namespace zim
|
||||
{
|
||||
// there is data already available
|
||||
// read compressed data from source into ibuffer
|
||||
log_debug("in_avail=" << sinksource->in_avail());
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), mymin(sinksource->in_avail(), ibuffer_size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// no data available
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
|
||||
log_debug(stream.avail_in << " bytes read from source");
|
||||
if (stream.avail_in == 0)
|
||||
return traits_type::eof();
|
||||
}
|
||||
@ -137,9 +131,7 @@ namespace zim
|
||||
|
||||
// at least one character received from source - pass to decompressor
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
int ret = ::BZ2_bzDecompress(&stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);
|
||||
|
||||
checkError(ret, stream);
|
||||
|
||||
|
@ -56,14 +56,11 @@ namespace zim
|
||||
|
||||
Bzip2StreamBuf::~Bzip2StreamBuf()
|
||||
{
|
||||
log_debug("bzCompressEnd");
|
||||
::BZ2_bzCompressEnd(&stream);
|
||||
}
|
||||
|
||||
Bzip2StreamBuf::int_type Bzip2StreamBuf::overflow(int_type c)
|
||||
{
|
||||
log_debug("Bzip2StreamBuf::overflow");
|
||||
|
||||
// initialize input-stream
|
||||
stream.next_in = &obuffer[0];
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
@ -74,9 +71,7 @@ namespace zim
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
// deflate
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_RUN");
|
||||
int ret = checkError(::BZ2_bzCompress(&stream, BZ_RUN), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);
|
||||
checkError(::BZ2_bzCompress(&stream, BZ_RUN), stream);
|
||||
|
||||
// copy zbuffer to sink / consume deflated data
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
@ -106,8 +101,6 @@ namespace zim
|
||||
|
||||
int Bzip2StreamBuf::sync()
|
||||
{
|
||||
log_debug("Bzip2StreamBuf::sync");
|
||||
|
||||
// initialize input-stream for
|
||||
stream.next_in = &obuffer[0];
|
||||
stream.avail_in = pptr() - pbase();
|
||||
@ -119,9 +112,7 @@ namespace zim
|
||||
stream.next_out = zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_FLUSH");
|
||||
ret = checkError(::BZ2_bzCompress(&stream, BZ_FLUSH), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
@ -141,8 +132,6 @@ namespace zim
|
||||
|
||||
int Bzip2StreamBuf::end()
|
||||
{
|
||||
log_debug("Bzip2StreamBuf::end");
|
||||
|
||||
char zbuffer[8192];
|
||||
// initialize input-stream for
|
||||
stream.next_in = &obuffer[0];
|
||||
@ -154,9 +143,7 @@ namespace zim
|
||||
stream.next_out = zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_FINISH");
|
||||
ret = checkError(::BZ2_bzCompress(&stream, BZ_FINISH), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
|
@ -19,16 +19,33 @@
|
||||
|
||||
#include <zim/cluster.h>
|
||||
#include <zim/blob.h>
|
||||
#include <zim/endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef ENABLE_ZLIB
|
||||
#include <zim/deflatestream.h>
|
||||
#include <zim/inflatestream.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_BZIP2
|
||||
#include <zim/bzip2stream.h>
|
||||
#include <zim/bunzip2stream.h>
|
||||
#include <zim/endian.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_LZMA
|
||||
#include <zim/lzmastream.h>
|
||||
#include <zim/unlzmastream.h>
|
||||
#endif
|
||||
|
||||
log_define("zim.cluster")
|
||||
|
||||
#define log_debug1(e)
|
||||
|
||||
namespace zim
|
||||
{
|
||||
Cluster::Cluster()
|
||||
@ -50,7 +67,7 @@ namespace zim
|
||||
|
||||
void ClusterImpl::read(std::istream& in)
|
||||
{
|
||||
log_debug("read");
|
||||
log_debug1("read");
|
||||
|
||||
// read first offset, which specifies, how many offsets we need to read
|
||||
size_type offset;
|
||||
@ -63,7 +80,7 @@ namespace zim
|
||||
size_type n = offset / 4;
|
||||
size_type a = offset;
|
||||
|
||||
log_debug("first offset is " << offset << " n=" << n << " a=" << a);
|
||||
log_debug1("first offset is " << offset << " n=" << n << " a=" << a);
|
||||
|
||||
// read offsets
|
||||
offsets.clear();
|
||||
@ -75,11 +92,11 @@ namespace zim
|
||||
in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
|
||||
if (in.fail())
|
||||
{
|
||||
log_debug("fail at " << n);
|
||||
log_debug1("fail at " << n);
|
||||
return;
|
||||
}
|
||||
offset = fromLittleEndian(&offset);
|
||||
log_debug("offset=" << offset << '(' << offset-a << ')');
|
||||
log_debug1("offset=" << offset << '(' << offset-a << ')');
|
||||
offsets.push_back(offset - a);
|
||||
}
|
||||
|
||||
@ -88,7 +105,7 @@ namespace zim
|
||||
{
|
||||
n = offsets.back() - offsets.front();
|
||||
data.resize(n);
|
||||
log_debug("read " << n << " bytes of data");
|
||||
log_debug1("read " << n << " bytes of data");
|
||||
in.read(&(data[0]), n);
|
||||
}
|
||||
}
|
||||
@ -109,12 +126,9 @@ namespace zim
|
||||
|
||||
void ClusterImpl::addBlob(const Blob& blob)
|
||||
{
|
||||
log_debug("addBlob(ptr, " << blob.size() << ')');
|
||||
log_debug1("addBlob(ptr, " << blob.size() << ')');
|
||||
data.insert(data.end(), blob.data(), blob.end());
|
||||
offsets.push_back(data.size());
|
||||
|
||||
for (unsigned n = 0; n < offsets.size(); ++n)
|
||||
log_debug("offset[" << n << "]=" << offsets[n]);
|
||||
}
|
||||
|
||||
Blob ClusterImpl::getBlob(size_type n) const
|
||||
@ -141,6 +155,8 @@ namespace zim
|
||||
|
||||
std::istream& operator>> (std::istream& in, ClusterImpl& clusterImpl)
|
||||
{
|
||||
log_trace("read cluster");
|
||||
|
||||
char c;
|
||||
in.get(c);
|
||||
clusterImpl.setCompression(static_cast<CompressionType>(c));
|
||||
@ -154,22 +170,42 @@ namespace zim
|
||||
|
||||
case zimcompZip:
|
||||
{
|
||||
#ifdef ENABLE_ZLIB
|
||||
log_debug("uncompress data (zlib)");
|
||||
zim::InflateStream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("zlib not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompBzip2:
|
||||
{
|
||||
#ifdef ENABLE_BZIP2
|
||||
log_debug("uncompress data (bzip2)");
|
||||
zim::Bunzip2Stream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("bzip2 not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompLzma:
|
||||
throw std::runtime_error("lzma decompression is not implemented");
|
||||
{
|
||||
#ifdef ENABLE_LZMA
|
||||
log_debug("uncompress data (lzma)");
|
||||
zim::UnlzmaStream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("lzma not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
log_error("invalid compression flag " << c);
|
||||
@ -187,6 +223,8 @@ namespace zim
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const ClusterImpl& clusterImpl)
|
||||
{
|
||||
log_trace("write cluster");
|
||||
|
||||
out.put(static_cast<char>(clusterImpl.getCompression()));
|
||||
|
||||
switch(clusterImpl.getCompression())
|
||||
@ -198,24 +236,65 @@ namespace zim
|
||||
|
||||
case zimcompZip:
|
||||
{
|
||||
#ifdef ENABLE_ZLIB
|
||||
log_debug("compress data (zlib)");
|
||||
zim::DeflateStream os(out);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.flush();
|
||||
#else
|
||||
throw std::runtime_error("zlib not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompBzip2:
|
||||
{
|
||||
#ifdef ENABLE_BZIP2
|
||||
log_debug("compress data (bzip2)");
|
||||
zim::Bzip2Stream os(out);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.end();
|
||||
#else
|
||||
throw std::runtime_error("bzip2 not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompLzma:
|
||||
throw std::runtime_error("lzma compression is not implemented");
|
||||
{
|
||||
#ifdef ENABLE_LZMA
|
||||
uint32_t lzmaPreset = 3 | LZMA_PRESET_EXTREME;
|
||||
/**
|
||||
* read lzma preset from environment
|
||||
* ZIM_LZMA_PRESET is a number followed optionally by a
|
||||
* suffix 'e'. The number gives the preset and the suffix tells,
|
||||
* if LZMA_PRESET_EXTREME should be set.
|
||||
* e.g.:
|
||||
* ZIM_LZMA_LEVEL=9 => 9
|
||||
* ZIM_LZMA_LEVEL=3e => 3 + extreme
|
||||
*/
|
||||
const char* e = ::getenv("ZIM_LZMA_LEVEL");
|
||||
if (e)
|
||||
{
|
||||
char flag = '\0';
|
||||
std::istringstream s(e);
|
||||
s >> lzmaPreset >> flag;
|
||||
if (flag == 'e')
|
||||
lzmaPreset |= LZMA_PRESET_EXTREME;
|
||||
}
|
||||
|
||||
log_debug("compress data (lzma, " << std::hex << lzmaPreset << ")");
|
||||
zim::LzmaStream os(out, lzmaPreset);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.end();
|
||||
#else
|
||||
throw std::runtime_error("lzma not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
std::ostringstream msg;
|
||||
|
99
src/zimlib/src/config.h
Normal file
99
src/zimlib/src/config.h
Normal file
@ -0,0 +1,99 @@
|
||||
/* src/zimlib/src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/zimlib/src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* set zim cluster cache size to number of cached chunks */
|
||||
#define CLUSTER_CACHE_SIZE 16
|
||||
|
||||
/* set zim dirent cache size to number of cached chunks */
|
||||
#define DIRENT_CACHE_SIZE 51200
|
||||
|
||||
/* defined if bzip2 compression is enabled */
|
||||
#define ENABLE_BZIP2 1
|
||||
|
||||
/* defined if lzma compression is enabled */
|
||||
#define ENABLE_LZMA 1
|
||||
|
||||
/* defined if zlib compression is enabled */
|
||||
#define ENABLE_ZLIB 1
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the `bz2' library (-lbz2). */
|
||||
#define HAVE_LIBBZ2 1
|
||||
|
||||
/* Define to 1 if you have the `lzma' library (-llzma). */
|
||||
#define HAVE_LIBLZMA 1
|
||||
|
||||
/* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
|
||||
#define HAVE_LIBMICROHTTPD 1
|
||||
|
||||
/* Define to 1 if you have the `unac' library (-lunac). */
|
||||
#define HAVE_LIBUNAC 1
|
||||
|
||||
/* Define to 1 if you have the `z' library (-lz). */
|
||||
#define HAVE_LIBZ 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the `stat64' function. */
|
||||
#define HAVE_STAT64 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#define LT_OBJDIR ".libs/"
|
||||
|
||||
/* set lzma uncompress memory size to number of MB */
|
||||
#define LZMA_MEMORY_SIZE 128
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "kiwix"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "kiwix"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "kiwix 0.9"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "kiwix"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "0.9"
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "0.9"
|
@ -6,6 +6,15 @@
|
||||
/* set zim dirent cache size to number of cached chunks */
|
||||
#undef DIRENT_CACHE_SIZE
|
||||
|
||||
/* defined if bzip2 compression is enabled */
|
||||
#undef ENABLE_BZIP2
|
||||
|
||||
/* defined if lzma compression is enabled */
|
||||
#undef ENABLE_LZMA
|
||||
|
||||
/* defined if zlib compression is enabled */
|
||||
#undef ENABLE_ZLIB
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
@ -15,6 +24,9 @@
|
||||
/* Define to 1 if you have the `bz2' library (-lbz2). */
|
||||
#undef HAVE_LIBBZ2
|
||||
|
||||
/* Define to 1 if you have the `lzma' library (-llzma). */
|
||||
#undef HAVE_LIBLZMA
|
||||
|
||||
/* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
|
||||
#undef HAVE_LIBMICROHTTPD
|
||||
|
||||
@ -55,6 +67,9 @@
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* set lzma uncompress memory size to number of MB */
|
||||
#undef LZMA_MEMORY_SIZE
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
|
@ -70,8 +70,6 @@ namespace zim
|
||||
|
||||
DeflateStreamBuf::int_type DeflateStreamBuf::overflow(int_type c)
|
||||
{
|
||||
log_debug("DeflateStreamBuf::overflow");
|
||||
|
||||
// initialize input-stream
|
||||
stream.next_in = reinterpret_cast<Bytef*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
@ -82,9 +80,7 @@ namespace zim
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
// deflate
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
checkError(::deflate(&stream, Z_NO_FLUSH), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
|
||||
// copy zbuffer to sink / consume deflated data
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
@ -114,8 +110,6 @@ namespace zim
|
||||
|
||||
int DeflateStreamBuf::sync()
|
||||
{
|
||||
log_debug("DeflateStreamBuf::sync");
|
||||
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<Bytef*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - pbase();
|
||||
@ -126,9 +120,7 @@ namespace zim
|
||||
stream.next_out = (Bytef*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
checkError(::deflate(&stream, Z_SYNC_FLUSH), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
@ -157,9 +149,7 @@ namespace zim
|
||||
stream.next_out = (Bytef*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
int ret = checkError(::deflate(&stream, Z_FINISH), stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
|
@ -35,33 +35,35 @@ namespace zim
|
||||
{
|
||||
union
|
||||
{
|
||||
char d[12];
|
||||
char d[16];
|
||||
long a;
|
||||
} header;
|
||||
header.d[0] = static_cast<char>(dirent.isRedirect());
|
||||
header.d[1] = static_cast<char>(dirent.getMimeType());
|
||||
header.d[2] = '\0';
|
||||
toLittleEndian(dirent.getMimeType(), header.d);
|
||||
header.d[2] = static_cast<char>(dirent.getParameter().size());
|
||||
header.d[3] = dirent.getNamespace();
|
||||
|
||||
log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().getValue().size() << " extralen=" << dirent.getExtraLen());
|
||||
log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().size());
|
||||
|
||||
toLittleEndian(dirent.getVersion(), header.d + 4);
|
||||
|
||||
if (dirent.isRedirect())
|
||||
{
|
||||
toLittleEndian(dirent.getRedirectIndex(), header.d + 4);
|
||||
toLittleEndian(dirent.getExtraLen(), header.d + 8);
|
||||
out.write(header.d, 10);
|
||||
toLittleEndian(dirent.getRedirectIndex(), header.d + 8);
|
||||
out.write(header.d, 12);
|
||||
}
|
||||
else
|
||||
{
|
||||
toLittleEndian(dirent.getClusterNumber(), header.d + 4);
|
||||
toLittleEndian(dirent.getBlobNumber(), header.d + 8);
|
||||
toLittleEndian(dirent.getExtraLen(), header.d + 12);
|
||||
out.write(header.d, 14);
|
||||
toLittleEndian(dirent.getClusterNumber(), header.d + 8);
|
||||
toLittleEndian(dirent.getBlobNumber(), header.d + 12);
|
||||
out.write(header.d, 16);
|
||||
}
|
||||
|
||||
out << dirent.getTitle().getValue();
|
||||
if (!dirent.getParameter().empty())
|
||||
out << '\0' << dirent.getParameter();
|
||||
out << dirent.getUrl() << '\0';
|
||||
|
||||
std::string t = dirent.getTitle();
|
||||
if (t != dirent.getUrl())
|
||||
out << t;
|
||||
out << '\0' << dirent.getParameter();
|
||||
|
||||
return out;
|
||||
}
|
||||
@ -71,34 +73,34 @@ namespace zim
|
||||
union
|
||||
{
|
||||
long a;
|
||||
char d[14];
|
||||
char d[16];
|
||||
} header;
|
||||
|
||||
in.read(header.d, 10);
|
||||
in.read(header.d, 12);
|
||||
if (in.fail())
|
||||
{
|
||||
log_warn("error reading dirent header");
|
||||
return in;
|
||||
}
|
||||
|
||||
if (in.gcount() != 10)
|
||||
if (in.gcount() != 12)
|
||||
{
|
||||
log_warn("error reading dirent header (2)");
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
bool redirect = header.d[0];
|
||||
uint16_t mimeType = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d));
|
||||
bool redirect = (mimeType == std::numeric_limits<uint16_t>::max());
|
||||
char ns = header.d[3];
|
||||
size_type extraLen;
|
||||
size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
|
||||
dirent.setVersion(version);
|
||||
|
||||
if (redirect)
|
||||
{
|
||||
log_debug("read redirect entry");
|
||||
size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
|
||||
|
||||
size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
|
||||
extraLen = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d + 8));
|
||||
|
||||
log_debug("redirectIndex=" << redirectIndex << " extraLen=" << extraLen);
|
||||
log_debug("redirectIndex=" << redirectIndex);
|
||||
|
||||
dirent.setRedirect(redirectIndex);
|
||||
}
|
||||
@ -106,7 +108,7 @@ namespace zim
|
||||
{
|
||||
log_debug("read article entry");
|
||||
|
||||
in.read(header.d + 10, 4);
|
||||
in.read(header.d + 12, 4);
|
||||
if (in.fail())
|
||||
{
|
||||
log_warn("error reading article dirent header");
|
||||
@ -116,56 +118,48 @@ namespace zim
|
||||
if (in.gcount() != 4)
|
||||
{
|
||||
log_warn("error reading article dirent header (2)");
|
||||
return in;
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
MimeType mimeType = static_cast<MimeType>(header.d[1]);
|
||||
size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
|
||||
size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
|
||||
extraLen = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d + 12));
|
||||
size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
|
||||
size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 12));
|
||||
|
||||
log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber << " extraLen=" << extraLen);
|
||||
log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber);
|
||||
|
||||
dirent.setArticle(mimeType, clusterNumber, blobNumber);
|
||||
}
|
||||
|
||||
char ch;
|
||||
std::string url;
|
||||
std::string title;
|
||||
std::string parameter;
|
||||
|
||||
log_debug("read title and parameters; extraLen=" << extraLen);
|
||||
log_debug("read url, title and parameters");
|
||||
|
||||
title.reserve(extraLen);
|
||||
while (extraLen && in.get(ch) && ch != '\0')
|
||||
{
|
||||
while (in.get(ch) && ch != '\0')
|
||||
url += ch;
|
||||
|
||||
while (in.get(ch) && ch != '\0')
|
||||
title += ch;
|
||||
--extraLen;
|
||||
}
|
||||
|
||||
if (in && extraLen)
|
||||
{
|
||||
--extraLen;
|
||||
parameter.reserve(extraLen);
|
||||
while (extraLen-- && in.get(ch))
|
||||
parameter += ch;
|
||||
}
|
||||
uint8_t extraLen = static_cast<uint8_t>(header.d[2]);
|
||||
while (extraLen-- > 0 && in.get(ch))
|
||||
parameter += ch;
|
||||
|
||||
dirent.setTitle(ns, QUnicodeString(title));
|
||||
dirent.setUrl(ns, url);
|
||||
dirent.setTitle(title);
|
||||
dirent.setParameter(parameter);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
QUnicodeString Dirent::getUrl() const
|
||||
std::string Dirent::getLongUrl() const
|
||||
{
|
||||
log_trace("Dirent::getUrl()");
|
||||
log_trace("Dirent::getLongUrl()");
|
||||
log_debug("namespace=" << getNamespace() << " title=" << getTitle());
|
||||
|
||||
log_debug("namespace=" << getNamespace());
|
||||
log_debug("title=" << getTitle());
|
||||
|
||||
return QUnicodeString(std::string(1, getNamespace()) + '/' + getTitle().getValue());
|
||||
return std::string(1, getNamespace()) + '/' + getUrl();
|
||||
}
|
||||
|
||||
}
|
||||
|
58
src/zimlib/src/envvalue.cpp
Normal file
58
src/zimlib/src/envvalue.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
unsigned envValue(const char* env, unsigned def)
|
||||
{
|
||||
const char* v = ::getenv(env);
|
||||
if (v)
|
||||
{
|
||||
std::istringstream s(v);
|
||||
s >> def;
|
||||
}
|
||||
return def;
|
||||
}
|
||||
|
||||
unsigned envMemSize(const char* env, unsigned def)
|
||||
{
|
||||
const char* v = ::getenv(env);
|
||||
if (v)
|
||||
{
|
||||
char unit = '\0';
|
||||
std::istringstream s(v);
|
||||
s >> def >> unit;
|
||||
|
||||
switch (unit)
|
||||
{
|
||||
case 'k':
|
||||
case 'K': def *= 1024; break;
|
||||
case 'm':
|
||||
case 'M': def *= 1024 * 1024; break;
|
||||
case 'g':
|
||||
case 'G': def *= 1024 * 1024 * 1024; break;
|
||||
}
|
||||
}
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
29
src/zimlib/src/envvalue.h
Normal file
29
src/zimlib/src/envvalue.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ENVVALUE_H
|
||||
#define ZIM_ENVVALUE_H
|
||||
|
||||
namespace zim
|
||||
{
|
||||
unsigned envValue(const char* env, unsigned def);
|
||||
unsigned envMemSize(const char* env, unsigned def);
|
||||
}
|
||||
|
||||
#endif // ZIM_ENVVALUE_H
|
@ -26,22 +26,27 @@ log_define("zim.file")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
Dirent File::getDirent(size_type idx)
|
||||
{
|
||||
log_trace("File::getDirent(" << idx << ')');
|
||||
|
||||
return impl->getDirent(idx);
|
||||
}
|
||||
|
||||
Article File::getArticle(size_type idx) const
|
||||
{
|
||||
return Article(*this, idx);
|
||||
}
|
||||
|
||||
Article File::getArticle(char ns, const QUnicodeString& title, bool collate)
|
||||
Article File::getArticle(char ns, const std::string& url)
|
||||
{
|
||||
log_trace("File::getArticle('" << ns << "', \"" << title << "\", " << collate << ')');
|
||||
std::pair<bool, const_iterator> r = findx(ns, title, collate);
|
||||
log_trace("File::getArticle('" << ns << "', \"" << url << ')');
|
||||
std::pair<bool, const_iterator> r = findx(ns, url);
|
||||
return r.first ? *r.second : Article();
|
||||
}
|
||||
|
||||
Article File::getArticleByTitle(size_type idx)
|
||||
{
|
||||
return Article(*this, impl->getIndexByTitle(idx));
|
||||
}
|
||||
|
||||
Article File::getArticleByTitle(char ns, const std::string& title)
|
||||
{
|
||||
log_trace("File::getArticleByTitle('" << ns << "', \"" << title << ')');
|
||||
std::pair<bool, const_iterator> r = findxByTitle(ns, title);
|
||||
return r.first ? *r.second : Article();
|
||||
}
|
||||
|
||||
@ -54,12 +59,15 @@ namespace zim
|
||||
File::const_iterator File::begin()
|
||||
{ return const_iterator(this, 0); }
|
||||
|
||||
File::const_iterator File::beginByTitle()
|
||||
{ return const_iterator(this, 0, const_iterator::ArticleIterator); }
|
||||
|
||||
File::const_iterator File::end()
|
||||
{ return const_iterator(this, getCountArticles()); }
|
||||
|
||||
std::pair<bool, File::const_iterator> File::findx(char ns, const QUnicodeString& title, bool collate)
|
||||
std::pair<bool, File::const_iterator> File::findx(char ns, const std::string& url)
|
||||
{
|
||||
log_debug("find article " << ns << " \"" << title << "\", " << collate << " in file \"" << getFilename() << '"');
|
||||
log_debug("find article by url " << ns << " \"" << url << "\", in file \"" << getFilename() << '"');
|
||||
|
||||
size_type l = getNamespaceBeginOffset(ns);
|
||||
size_type u = getNamespaceEndOffset(ns);
|
||||
@ -79,8 +87,8 @@ namespace zim
|
||||
|
||||
int c = ns < d.getNamespace() ? -1
|
||||
: ns > d.getNamespace() ? 1
|
||||
: (collate ? title.compareCollate(QUnicodeString(d.getTitle()))
|
||||
: title.compare(QUnicodeString(d.getTitle())));
|
||||
: url.compare(d.getUrl());
|
||||
|
||||
if (c < 0)
|
||||
u = p;
|
||||
else if (c > 0)
|
||||
@ -93,20 +101,70 @@ namespace zim
|
||||
}
|
||||
|
||||
Dirent d = getDirent(l);
|
||||
int c = collate ? title.compareCollate(QUnicodeString(d.getTitle()))
|
||||
: title.compare(QUnicodeString(d.getTitle()));
|
||||
int c = url.compare(d.getUrl());
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, l));
|
||||
}
|
||||
|
||||
log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
|
||||
log_debug("article not found after " << itcount << " iterations (\"" << d.getUrl() << "\" does not match)");
|
||||
return std::pair<bool, const_iterator>(false, const_iterator(this, u));
|
||||
}
|
||||
|
||||
File::const_iterator File::find(char ns, const QUnicodeString& title, bool collate)
|
||||
std::pair<bool, File::const_iterator> File::findxByTitle(char ns, const std::string& title)
|
||||
{
|
||||
return findx(ns, title, collate).second;
|
||||
log_debug("find article by title " << ns << " \"" << title << "\", in file \"" << getFilename() << '"');
|
||||
|
||||
size_type l = getNamespaceBeginOffset(ns);
|
||||
size_type u = getNamespaceEndOffset(ns);
|
||||
|
||||
if (l == u)
|
||||
{
|
||||
log_debug("namespace " << ns << " not found");
|
||||
return std::pair<bool, const_iterator>(false, end());
|
||||
}
|
||||
|
||||
unsigned itcount = 0;
|
||||
while (u - l > 1)
|
||||
{
|
||||
++itcount;
|
||||
size_type p = l + (u - l) / 2;
|
||||
Dirent d = getDirentByTitle(p);
|
||||
|
||||
int c = ns < d.getNamespace() ? -1
|
||||
: ns > d.getNamespace() ? 1
|
||||
: title.compare(d.getTitle());
|
||||
|
||||
if (c < 0)
|
||||
u = p;
|
||||
else if (c > 0)
|
||||
l = p;
|
||||
else
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, p, const_iterator::ArticleIterator));
|
||||
}
|
||||
}
|
||||
|
||||
Dirent d = getDirentByTitle(l);
|
||||
int c = title.compare(d.getTitle());
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, l, const_iterator::ArticleIterator));
|
||||
}
|
||||
|
||||
log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
|
||||
return std::pair<bool, const_iterator>(false, const_iterator(this, u, const_iterator::ArticleIterator));
|
||||
}
|
||||
|
||||
File::const_iterator File::find(char ns, const std::string& url)
|
||||
{ return findx(ns, url).second; }
|
||||
|
||||
File::const_iterator File::findByTitle(char ns, const std::string& title)
|
||||
{ return findxByTitle(ns, title).second; }
|
||||
|
||||
}
|
||||
|
@ -27,34 +27,36 @@ log_define("zim.file.header")
|
||||
namespace zim
|
||||
{
|
||||
const size_type Fileheader::zimMagic = 0x044d495a; // ="ZIM^d"
|
||||
const size_type Fileheader::zimVersion = 4;
|
||||
const size_type Fileheader::size = 56;
|
||||
const size_type Fileheader::zimVersion = 5;
|
||||
const size_type Fileheader::size = 72;
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Fileheader& fh)
|
||||
{
|
||||
char header[56];
|
||||
char header[Fileheader::size];
|
||||
toLittleEndian(Fileheader::zimMagic, header);
|
||||
toLittleEndian(Fileheader::zimVersion, header + 4);
|
||||
std::copy(fh.getUuid().data, fh.getUuid().data + sizeof(Uuid), header + 8);
|
||||
toLittleEndian(fh.getArticleCount(), header + 24);
|
||||
toLittleEndian(fh.getIndexPtrPos(), header + 28);
|
||||
toLittleEndian(fh.getClusterCount(), header + 36);
|
||||
toLittleEndian(fh.getClusterPtrPos(), header + 40);
|
||||
toLittleEndian(fh.getMainPage(), header + 48);
|
||||
toLittleEndian(fh.getLayoutPage(), header + 52);
|
||||
toLittleEndian(fh.getClusterCount(), header + 28);
|
||||
toLittleEndian(fh.getUrlPtrPos(), header + 32);
|
||||
toLittleEndian(fh.getTitleIdxPos(), header + 40);
|
||||
toLittleEndian(fh.getClusterPtrPos(), header + 48);
|
||||
toLittleEndian(fh.getMimeListPos(), header + 56);
|
||||
toLittleEndian(fh.getMainPage(), header + 64);
|
||||
toLittleEndian(fh.getLayoutPage(), header + 68);
|
||||
|
||||
out.write(header, 56);
|
||||
out.write(header, Fileheader::size);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::istream& operator>> (std::istream& in, Fileheader& fh)
|
||||
{
|
||||
char header[56];
|
||||
in.read(header, 56);
|
||||
char header[Fileheader::size];
|
||||
in.read(header, Fileheader::size);
|
||||
if (in.fail())
|
||||
return in;
|
||||
if (in.gcount() != 56)
|
||||
if (static_cast<size_type>(in.gcount()) != Fileheader::size)
|
||||
{
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
@ -69,8 +71,8 @@ namespace zim
|
||||
return in;
|
||||
}
|
||||
|
||||
size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header + 4));
|
||||
if (version != Fileheader::zimVersion)
|
||||
uint16_t version = fromLittleEndian(reinterpret_cast<const uint16_t*>(header + 4));
|
||||
if (version != static_cast<size_type>(Fileheader::zimVersion))
|
||||
{
|
||||
log_error("invalid zimfile version " << version << " found - "
|
||||
<< Fileheader::zimVersion << " expected");
|
||||
@ -81,17 +83,21 @@ namespace zim
|
||||
Uuid uuid;
|
||||
std::copy(header + 8, header + 24, uuid.data);
|
||||
size_type articleCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 24));
|
||||
offset_type indexPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 28));
|
||||
size_type blobCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 36));
|
||||
offset_type blobPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
|
||||
size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 48));
|
||||
size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 52));
|
||||
size_type clusterCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 28));
|
||||
offset_type urlPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 32));
|
||||
offset_type titleIdxPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
|
||||
offset_type clusterPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 48));
|
||||
offset_type mimeListPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 56));
|
||||
size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 64));
|
||||
size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 68));
|
||||
|
||||
fh.setUuid(uuid);
|
||||
fh.setArticleCount(articleCount);
|
||||
fh.setIndexPtrPos(indexPtrPos);
|
||||
fh.setClusterCount(blobCount);
|
||||
fh.setClusterPtrPos(blobPtrPos);
|
||||
fh.setClusterCount(clusterCount);
|
||||
fh.setUrlPtrPos(urlPtrPos);
|
||||
fh.setTitleIdxPos(titleIdxPos);
|
||||
fh.setClusterPtrPos(clusterPtrPos);
|
||||
fh.setMimeListPos(mimeListPos);
|
||||
fh.setMainPage(mainPage);
|
||||
fh.setLayoutPage(layoutPage);
|
||||
|
||||
|
@ -24,11 +24,11 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
#include <errno.h>
|
||||
#include "config.h"
|
||||
#include "log.h"
|
||||
#include "envvalue.h"
|
||||
|
||||
#ifdef WITH_CXXTOOLS
|
||||
# include <cxxtools/systemerror.h>
|
||||
@ -38,20 +38,6 @@ log_define("zim.file.impl")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
unsigned envValue(const char* env, unsigned def)
|
||||
{
|
||||
const char* v = ::getenv(env);
|
||||
if (v)
|
||||
{
|
||||
std::istringstream s(v);
|
||||
s >> def;
|
||||
}
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// FileImpl
|
||||
//
|
||||
@ -60,6 +46,8 @@ namespace zim
|
||||
direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
|
||||
clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE))
|
||||
{
|
||||
log_trace("read file \"" << fname << '"');
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError(std::string("can't open zim-file \"") + fname + '"');
|
||||
|
||||
@ -89,55 +77,41 @@ namespace zim
|
||||
if (zimFile.fail())
|
||||
throw ZimFileFormatError("error reading zim-file header");
|
||||
|
||||
// read index offsets
|
||||
{
|
||||
size_type indexOffsetsSize = header.getArticleCount() * sizeof(OffsetsType::value_type);
|
||||
log_debug("read " << indexOffsetsSize << " bytes indexptr");
|
||||
zimFile.seekg(header.getIndexPtrPos());
|
||||
indexOffsets.resize(header.getArticleCount());
|
||||
zimFile.read(reinterpret_cast<char*>(&indexOffsets[0]), indexOffsetsSize);
|
||||
}
|
||||
|
||||
if (isBigEndian())
|
||||
{
|
||||
for (OffsetsType::iterator it = indexOffsets.begin(); it != indexOffsets.end(); ++it)
|
||||
*it = fromLittleEndian(&*it);
|
||||
}
|
||||
|
||||
// read cluster offsets
|
||||
{
|
||||
size_type clusterOffsetsSize = header.getClusterCount() * sizeof(OffsetsType::value_type);
|
||||
log_debug("read " << clusterOffsetsSize << " bytes clusterptr");
|
||||
zimFile.seekg(header.getClusterPtrPos());
|
||||
clusterOffsets.resize(header.getClusterCount());
|
||||
zimFile.read(reinterpret_cast<char*>(&clusterOffsets[0]), clusterOffsetsSize);
|
||||
}
|
||||
|
||||
if (isBigEndian())
|
||||
{
|
||||
for (OffsetsType::iterator it = clusterOffsets.begin(); it != clusterOffsets.end(); ++it)
|
||||
*it = fromLittleEndian(&*it);
|
||||
}
|
||||
|
||||
if (clusterOffsets.empty())
|
||||
if (getCountClusters() == 0)
|
||||
log_warn("no clusters found");
|
||||
else
|
||||
{
|
||||
offset_type lastOffset = clusterOffsets.back();
|
||||
offset_type lastOffset = getClusterOffset(getCountClusters() - 1);
|
||||
log_debug("last offset=" << lastOffset << " file size=" << st.st_size);
|
||||
if (lastOffset > st.st_size)
|
||||
if (lastOffset > static_cast<offset_type>(st.st_size))
|
||||
{
|
||||
log_fatal("last offset (" << lastOffset << ") larger than file size (" << st.st_size << ')');
|
||||
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
|
||||
}
|
||||
}
|
||||
|
||||
// read mime types
|
||||
zimFile.seekg(header.getMimeListPos());
|
||||
std::string mimeType;
|
||||
while (true)
|
||||
{
|
||||
std::getline(zimFile, mimeType, '\0');
|
||||
|
||||
if (zimFile.fail())
|
||||
throw ZimFileFormatError("error reading mime type list");
|
||||
|
||||
if (mimeType.empty())
|
||||
break;
|
||||
|
||||
mimeTypes.push_back(mimeType);;
|
||||
}
|
||||
}
|
||||
|
||||
Dirent FileImpl::getDirent(size_type idx)
|
||||
{
|
||||
log_trace("FileImpl::getDirent(" << idx << ')');
|
||||
|
||||
if (idx >= indexOffsets.size())
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
|
||||
if (!zimFile)
|
||||
@ -155,7 +129,9 @@ namespace zim
|
||||
|
||||
log_debug("dirent " << idx << " not found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());
|
||||
|
||||
zimFile.seekg(indexOffsets[idx]);
|
||||
offset_type indexOffset = getOffset(header.getUrlPtrPos(), idx);
|
||||
|
||||
zimFile.seekg(indexOffset);
|
||||
if (!zimFile)
|
||||
{
|
||||
log_warn("failed to seek to directory entry");
|
||||
@ -171,18 +147,43 @@ namespace zim
|
||||
throw ZimFileFormatError("failed to read directory entry");
|
||||
}
|
||||
|
||||
log_debug("dirent read from " << indexOffsets[idx]);
|
||||
log_debug("dirent read from " << indexOffset);
|
||||
direntCache.put(idx, dirent);
|
||||
|
||||
return dirent;
|
||||
}
|
||||
|
||||
Dirent FileImpl::getDirentByTitle(size_type idx)
|
||||
{
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
return getDirent(getIndexByTitle(idx));
|
||||
}
|
||||
|
||||
size_type FileImpl::getIndexByTitle(size_type idx)
|
||||
{
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
|
||||
zimFile.seekg(header.getTitleIdxPos() + sizeof(size_type) * idx);
|
||||
size_type ret;
|
||||
zimFile.read(reinterpret_cast<char*>(&ret), sizeof(size_type));
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError("error reading title index");
|
||||
|
||||
if (isBigEndian())
|
||||
ret = fromLittleEndian(&ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Cluster FileImpl::getCluster(size_type idx)
|
||||
{
|
||||
log_trace("getCluster(" << idx << ')');
|
||||
|
||||
if (idx >= clusterOffsets.size())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
if (idx >= getCountClusters())
|
||||
throw ZimFileFormatError("cluster index out of range");
|
||||
|
||||
Cluster cluster = clusterCache.get(idx);
|
||||
if (cluster)
|
||||
@ -191,8 +192,9 @@ namespace zim
|
||||
return cluster;
|
||||
}
|
||||
|
||||
log_debug("read cluster " << idx << " from offset " << clusterOffsets[idx]);
|
||||
zimFile.seekg(clusterOffsets[idx]);
|
||||
offset_type clusterOffset = getClusterOffset(idx);
|
||||
log_debug("read cluster " << idx << " from offset " << clusterOffset);
|
||||
zimFile.seekg(clusterOffset);
|
||||
zimFile >> cluster;
|
||||
|
||||
if (zimFile.fail())
|
||||
@ -209,6 +211,21 @@ namespace zim
|
||||
return cluster;
|
||||
}
|
||||
|
||||
offset_type FileImpl::getOffset(offset_type ptrOffset, size_type idx)
|
||||
{
|
||||
zimFile.seekg(ptrOffset + sizeof(offset_type) * idx);
|
||||
offset_type offset;
|
||||
zimFile.read(reinterpret_cast<char*>(&offset), sizeof(offset_type));
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError("error reading offset");
|
||||
|
||||
if (isBigEndian())
|
||||
offset = fromLittleEndian(&offset);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_type FileImpl::getNamespaceBeginOffset(char ch)
|
||||
{
|
||||
log_trace("getNamespaceBeginOffset(" << ch << ')');
|
||||
@ -282,4 +299,16 @@ namespace zim
|
||||
return namespaces;
|
||||
}
|
||||
|
||||
const std::string& FileImpl::getMimeType(uint16_t idx) const
|
||||
{
|
||||
if (idx > mimeTypes.size())
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "unknown mime type code " << idx;
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
|
||||
return mimeTypes[idx];
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ namespace zim
|
||||
void IndexArticle::readEntriesZ()
|
||||
{
|
||||
std::istringstream s(getParameter());
|
||||
zim::IZIntStream extra(s);
|
||||
zim::ZIntStream extra(s);
|
||||
|
||||
unsigned flagfield; // field with one bit (bits 0-3) for each cateogry
|
||||
extra.get(flagfield);
|
||||
@ -84,7 +84,7 @@ namespace zim
|
||||
log_debug("read data from offset " << offset << " len " << len);
|
||||
zim::Blob b = getData();
|
||||
ptrstream data(const_cast<char*>(b.data() + offset), const_cast<char*>(b.data() + offset + len));
|
||||
IZIntStream zdata(data);
|
||||
ZIntStream zdata(data);
|
||||
|
||||
unsigned index;
|
||||
unsigned indexOffset = 0;
|
||||
|
@ -67,8 +67,6 @@ namespace zim
|
||||
|
||||
InflateStreamBuf::int_type InflateStreamBuf::overflow(int_type c)
|
||||
{
|
||||
log_debug("InflateStreamBuf::overflow");
|
||||
|
||||
if (pptr())
|
||||
{
|
||||
// initialize input-stream for
|
||||
@ -82,10 +80,8 @@ namespace zim
|
||||
stream.next_out = (Bytef*)ibuffer();
|
||||
stream.avail_out = ibuffer_size();
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
ret = ::inflate(&stream, Z_SYNC_FLUSH);
|
||||
checkError(ret, stream);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);
|
||||
|
||||
// copy zbuffer to sinksource
|
||||
std::streamsize count = ibuffer_size() - stream.avail_out;
|
||||
@ -119,14 +115,12 @@ namespace zim
|
||||
{
|
||||
// there is data already available
|
||||
// read compressed data from source into ibuffer
|
||||
log_debug("in_avail=" << sinksource->in_avail());
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// no data available
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
|
||||
log_debug(stream.avail_in << " bytes read from source");
|
||||
if (stream.avail_in == 0)
|
||||
return traits_type::eof();
|
||||
}
|
||||
@ -138,9 +132,7 @@ namespace zim
|
||||
|
||||
// at least one character received from source - pass to decompressor
|
||||
|
||||
log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
|
||||
int ret = ::inflate(&stream, Z_SYNC_FLUSH);
|
||||
log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);
|
||||
|
||||
checkError(ret, stream);
|
||||
|
||||
|
181
src/zimlib/src/lzmastream.cpp
Normal file
181
src/zimlib/src/lzmastream.cpp
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/lzmastream.h>
|
||||
#include "log.h"
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
log_define("zim.lzma.compress")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
lzma_ret checkError(lzma_ret ret)
|
||||
{
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "lzma-error " << ret;
|
||||
switch (ret)
|
||||
{
|
||||
case LZMA_OK: msg << ": LZMA_OK"; break;
|
||||
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
|
||||
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
|
||||
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
|
||||
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
|
||||
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
|
||||
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
|
||||
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
|
||||
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
|
||||
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
|
||||
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
|
||||
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
|
||||
}
|
||||
log_error(msg.str());
|
||||
throw LzmaError(ret, msg.str());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
LzmaStreamBuf::LzmaStreamBuf(std::streambuf* sink_, uint32_t preset, lzma_check check, unsigned bufsize_)
|
||||
: obuffer(bufsize_),
|
||||
sink(sink_)
|
||||
{
|
||||
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
|
||||
|
||||
checkError(
|
||||
::lzma_easy_encoder(&stream, preset, check));
|
||||
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
}
|
||||
|
||||
LzmaStreamBuf::~LzmaStreamBuf()
|
||||
{
|
||||
::lzma_end(&stream);
|
||||
}
|
||||
|
||||
LzmaStreamBuf::int_type LzmaStreamBuf::overflow(int_type c)
|
||||
{
|
||||
// initialize input-stream
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
|
||||
// initialize zbuffer for compressed data
|
||||
char zbuffer[8192];
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(zbuffer);
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
// compress
|
||||
checkError(::lzma_code(&stream, LZMA_RUN));
|
||||
|
||||
// copy zbuffer to sink / consume deflated data
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
// move remaining characters to start of obuffer
|
||||
if (stream.avail_in > 0)
|
||||
memmove(&obuffer[0], stream.next_in, stream.avail_in);
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0] + stream.avail_in, &obuffer[0] + obuffer.size());
|
||||
if (c != traits_type::eof())
|
||||
sputc(traits_type::to_char_type(c));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
LzmaStreamBuf::int_type LzmaStreamBuf::underflow()
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
int LzmaStreamBuf::sync()
|
||||
{
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - pbase();
|
||||
char zbuffer[8192];
|
||||
while (stream.avail_in > 0)
|
||||
{
|
||||
// initialize zbuffer
|
||||
stream.next_out = (uint8_t*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
checkError(::lzma_code(&stream, LZMA_FINISH));
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LzmaStreamBuf::end()
|
||||
{
|
||||
char zbuffer[8192];
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - pbase();
|
||||
lzma_ret ret;
|
||||
do
|
||||
{
|
||||
// initialize zbuffer
|
||||
stream.next_out = (uint8_t*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
ret = checkError(::lzma_code(&stream, LZMA_FINISH));
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
return -1;
|
||||
}
|
||||
} while (ret != LZMA_STREAM_END);
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LzmaStream::end()
|
||||
{
|
||||
if (streambuf.end() != 0)
|
||||
setstate(failbit);
|
||||
}
|
||||
|
||||
}
|
@ -39,8 +39,8 @@ namespace zim
|
||||
bool operator() (const SearchResult& s1, const SearchResult& s2) const
|
||||
{
|
||||
return s1.getPriority() > s2.getPriority()
|
||||
|| s1.getPriority() == s2.getPriority()
|
||||
&& s1.getArticle().getTitle() > s2.getArticle().getTitle();
|
||||
|| (s1.getPriority() == s2.getPriority()
|
||||
&& s1.getArticle().getTitle() > s2.getArticle().getTitle());
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -68,7 +68,7 @@ namespace zim
|
||||
+ Search::getWeightOccOff()
|
||||
+ Search::getWeightPlus() * itw->second.addweight;
|
||||
|
||||
std::string title = article.getTitle().toUtf8();
|
||||
std::string title = article.getTitle();
|
||||
for (std::string::iterator it = title.begin(); it != title.end(); ++it)
|
||||
*it = std::tolower(*it);
|
||||
|
||||
@ -165,8 +165,7 @@ namespace zim
|
||||
|
||||
log_debug("search for token \"" << token << '"');
|
||||
|
||||
QUnicodeString qtoken = QUnicodeString::fromUtf8(token);
|
||||
IndexArticle indexarticle = indexfile.getArticle('X', qtoken, true);
|
||||
IndexArticle indexarticle = indexfile.getArticleByTitle('X', token);
|
||||
|
||||
if (indexarticle.getTotalCount() > 0)
|
||||
{
|
||||
@ -190,7 +189,7 @@ namespace zim
|
||||
{
|
||||
log_debug("no entries found - try searching for titles");
|
||||
Results results;
|
||||
find(results, 'A', qtoken);
|
||||
find(results, 'A', token);
|
||||
for (Results::const_iterator it = results.begin(); it != results.end(); ++it)
|
||||
{
|
||||
uint32_t articleIdx = it->getArticle().getIndex();
|
||||
@ -224,13 +223,13 @@ namespace zim
|
||||
std::sort(results.begin(), results.end(), PriorityGt());
|
||||
}
|
||||
|
||||
void Search::find(Results& results, char ns, const QUnicodeString& praefix, unsigned limit)
|
||||
void Search::find(Results& results, char ns, const std::string& praefix, unsigned limit)
|
||||
{
|
||||
log_debug("find results in namespace " << ns << " for praefix \"" << praefix << '"');
|
||||
for (File::const_iterator pos = articlefile.find(ns, praefix, true);
|
||||
for (File::const_iterator pos = articlefile.findByTitle(ns, praefix);
|
||||
pos != articlefile.end() && results.size() < limit; ++pos)
|
||||
{
|
||||
if (ns != pos->getNamespace() || pos->getTitle().compareCollate(0, praefix.size(), praefix) > 0)
|
||||
if (ns != pos->getNamespace() || pos->getTitle().compare(0, praefix.size(), praefix) > 0)
|
||||
{
|
||||
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match " << ns << ", \"" << praefix << '"');
|
||||
break;
|
||||
@ -240,17 +239,17 @@ namespace zim
|
||||
log_debug(results.size() << " articles in result");
|
||||
}
|
||||
|
||||
void Search::find(Results& results, char ns, const QUnicodeString& begin,
|
||||
const QUnicodeString& end, unsigned limit)
|
||||
void Search::find(Results& results, char ns, const std::string& begin,
|
||||
const std::string& end, unsigned limit)
|
||||
{
|
||||
log_debug("find results in namespace " << ns << " for praefix \"" << begin << '"');
|
||||
for (File::const_iterator pos = articlefile.find(ns, begin, true);
|
||||
for (File::const_iterator pos = articlefile.findByTitle(ns, begin);
|
||||
pos != articlefile.end() && results.size() < limit; ++pos)
|
||||
{
|
||||
log_debug("check " << pos->getNamespace() << '/' << pos->getTitle());
|
||||
if (pos->getNamespace() != ns || pos->getTitle().compareCollate(0, end.size(), end) > 0)
|
||||
if (pos->getNamespace() != ns || pos->getTitle().compare(end) > 0)
|
||||
{
|
||||
log_debug("article \"" << pos->getUrl() << "\" does not match");
|
||||
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match");
|
||||
break;
|
||||
}
|
||||
results.push_back(SearchResult(*pos));
|
||||
|
163
src/zimlib/src/unlzmastream.cpp
Normal file
163
src/zimlib/src/unlzmastream.cpp
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "zim/unlzmastream.h"
|
||||
#include "log.h"
|
||||
#include "config.h"
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include "envvalue.h"
|
||||
|
||||
log_define("zim.lzma.uncompress")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
lzma_ret checkError(lzma_ret ret)
|
||||
{
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "inflate-error " << ret;
|
||||
switch (ret)
|
||||
{
|
||||
case LZMA_OK: msg << ": LZMA_OK"; break;
|
||||
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
|
||||
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
|
||||
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
|
||||
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
|
||||
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
|
||||
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
|
||||
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
|
||||
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
|
||||
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
|
||||
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
|
||||
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
|
||||
}
|
||||
log_error(msg);
|
||||
throw UnlzmaError(ret, msg.str());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize_)
|
||||
: iobuffer(new char_type[bufsize_]),
|
||||
bufsize(bufsize_),
|
||||
sinksource(sinksource_)
|
||||
{
|
||||
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
|
||||
|
||||
unsigned memsize = envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
|
||||
checkError(
|
||||
::lzma_stream_decoder(&stream, memsize, 0));
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::~UnlzmaStreamBuf()
|
||||
{
|
||||
::lzma_end(&stream);
|
||||
delete[] iobuffer;
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::overflow(int_type c)
|
||||
{
|
||||
if (pptr())
|
||||
{
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(obuffer());
|
||||
stream.avail_in = pptr() - pbase();
|
||||
|
||||
lzma_ret ret;
|
||||
do
|
||||
{
|
||||
// initialize ibuffer
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(ibuffer());
|
||||
stream.avail_out = ibuffer_size();
|
||||
|
||||
ret = ::lzma_code(&stream, LZMA_RUN);
|
||||
checkError(ret);
|
||||
|
||||
// copy zbuffer to sinksource
|
||||
std::streamsize count = ibuffer_size() - stream.avail_out;
|
||||
std::streamsize n = sinksource->sputn(reinterpret_cast<char*>(ibuffer()), count);
|
||||
if (n < count)
|
||||
return traits_type::eof();
|
||||
} while (ret != LZMA_STREAM_END && stream.avail_in > 0);
|
||||
}
|
||||
|
||||
// reset outbuffer
|
||||
setp(obuffer(), obuffer() + obuffer_size());
|
||||
if (c != traits_type::eof())
|
||||
sputc(traits_type::to_char_type(c));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::underflow()
|
||||
{
|
||||
// read from sinksource and decompress into obuffer
|
||||
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(obuffer());
|
||||
stream.avail_out = obuffer_size();
|
||||
|
||||
do
|
||||
{
|
||||
// fill ibuffer first if needed
|
||||
if (stream.avail_in == 0)
|
||||
{
|
||||
if (sinksource->in_avail() > 0)
|
||||
{
|
||||
// there is data already available
|
||||
// read compressed data from source into ibuffer
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// no data available
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
|
||||
if (stream.avail_in == 0)
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
stream.next_in = (const uint8_t*)ibuffer();
|
||||
}
|
||||
|
||||
// we decompress it now into obuffer
|
||||
|
||||
// at least one character received from source - pass to decompressor
|
||||
|
||||
checkError(::lzma_code(&stream, LZMA_RUN));
|
||||
|
||||
setg(obuffer(), obuffer(), obuffer() + obuffer_size() - stream.avail_out);
|
||||
|
||||
} while (gptr() == egptr());
|
||||
|
||||
return sgetc();
|
||||
}
|
||||
|
||||
int UnlzmaStreamBuf::sync()
|
||||
{
|
||||
if (pptr() && overflow(traits_type::eof()) == traits_type::eof())
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -18,86 +18,85 @@
|
||||
*/
|
||||
|
||||
#include <zim/zintstream.h>
|
||||
#include <stdint.h>
|
||||
#include "log.h"
|
||||
|
||||
log_define("zim.zintstream")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
IZIntStream& IZIntStream::get(unsigned &value)
|
||||
size_type ZIntStream::get()
|
||||
{
|
||||
char ch;
|
||||
if (!stream.get(ch))
|
||||
if (!_istream->get(ch))
|
||||
return *this;
|
||||
|
||||
unsigned ret = static_cast<unsigned>(static_cast<unsigned char>(ch));
|
||||
unsigned numb = ret & 0x3;
|
||||
ret >>= 2;
|
||||
unsigned s = 6;
|
||||
while (numb && stream.get(ch))
|
||||
if (ch == '\xff')
|
||||
{
|
||||
ret += static_cast<unsigned>(
|
||||
static_cast<unsigned char>(ch)) + 1 << s;
|
||||
s += 8;
|
||||
--numb;
|
||||
log_error("invalid bytestream in int decompressor");
|
||||
_istream->setstate(std::ios::failbit);
|
||||
}
|
||||
|
||||
size_type uuvalue = static_cast<size_type>(static_cast<unsigned char>(ch));
|
||||
uint64_t ubound = 0x80;
|
||||
size_type add = 0;
|
||||
unsigned short s = 7;
|
||||
unsigned short N = 0;
|
||||
size_type mask = 0x7F;
|
||||
while (ch & 0x80)
|
||||
{
|
||||
++N;
|
||||
ch <<= 1;
|
||||
--s;
|
||||
add += ubound;
|
||||
ubound <<= 7;
|
||||
mask >>= 1;
|
||||
}
|
||||
|
||||
if (numb)
|
||||
uuvalue &= mask;
|
||||
|
||||
while (N-- && _istream->get(ch))
|
||||
{
|
||||
log_error("incomplete bytestream");
|
||||
stream.setstate(std::ios::failbit);
|
||||
uuvalue |= static_cast<size_type>(static_cast<unsigned char>(ch)) << s;
|
||||
s += 8;
|
||||
}
|
||||
|
||||
if (_istream)
|
||||
{
|
||||
uuvalue += add;
|
||||
}
|
||||
else
|
||||
value = ret;
|
||||
{
|
||||
log_error("incomplete bytestream in int decompressor");
|
||||
_istream->setstate(std::ios::failbit);
|
||||
}
|
||||
|
||||
return *this;
|
||||
return uuvalue;
|
||||
}
|
||||
|
||||
OZIntStream& OZIntStream::put(size_type value)
|
||||
ZIntStream& ZIntStream::put(size_type value)
|
||||
{
|
||||
char data[4];
|
||||
unsigned count;
|
||||
if (value < 64)
|
||||
size_type nmask = 0;
|
||||
size_type mask = 0x7F;
|
||||
uint64_t ubound = 0x80;
|
||||
unsigned short N = 0;
|
||||
|
||||
while (value >= ubound)
|
||||
{
|
||||
count = 1;
|
||||
data[0] = (value << 2);
|
||||
log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0])));
|
||||
}
|
||||
else if (value < 16384 + 64)
|
||||
{
|
||||
value -= 64;
|
||||
count = 2;
|
||||
data[0] = value << 2 | 1;
|
||||
data[1] = value >> 6;
|
||||
log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1])));
|
||||
}
|
||||
else if (value < 4194304 + 16384 + 64)
|
||||
{
|
||||
value -= 16384 + 64;
|
||||
count = 3;
|
||||
data[0] = value << 2 | 2;
|
||||
data[1] = value >> 6;
|
||||
data[2] = value >> 14;
|
||||
log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[2])));
|
||||
}
|
||||
else
|
||||
{
|
||||
value -= 4194304 + 16384 + 64;
|
||||
count = 4;
|
||||
data[0] = value << 2 | 3;
|
||||
data[1] = value >> 6;
|
||||
data[2] = value >> 14;
|
||||
data[3] = value >> 22;
|
||||
log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[2]))
|
||||
<< std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[4])));
|
||||
value -= ubound;
|
||||
ubound <<= 7;
|
||||
nmask = (nmask >> 1) | 0x80;
|
||||
mask = mask >> 1;
|
||||
++N;
|
||||
}
|
||||
|
||||
stream.write(reinterpret_cast<char*>(&data[0]), count);
|
||||
_ostream->put(static_cast<char>(nmask | (value & mask)));
|
||||
value >>= 7 - N;
|
||||
while (N--)
|
||||
{
|
||||
_ostream->put(static_cast<char>(value & 0xFF));
|
||||
value >>= 8;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user