+ new trunk zimlib

2025-09-20 18:39:16 -04:00 · 2010-01-08 23:03:06 +00:00 · 2010-01-08 23:03:06 +00:00 · 3d44961ead
commit 3d44961ead
parent 189d97b220
35 changed files with 1299 additions and 444 deletions
--- a/src/server/kiwix-serve.cpp
+++ b/src/server/kiwix-serve.cpp
@ -80,7 +80,7 @@ static int accessHandlerCallback(void *cls,
  /* Load the article from the ZIM file */
  cout << "Loading '" << title << "' in namespace '" << ns << "'" << endl;
  try {
-    std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns[0], zim::QUnicodeString(title));
+    std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns[0], title);

    /* Test if the article was found */
    if (resultPair.first == true) {
--- a/src/zimlib/include/zim/article.h
+++ b/src/zimlib/include/zim/article.h
@ -23,7 +23,6 @@
 #include <string>
 #include <zim/zim.h>
 #include <zim/dirent.h>
-#include <zim/qunicode.h>
 #include <zim/file.h>
 #include <limits>
 #include <iosfwd>
@ -50,11 +49,13 @@ namespace zim

      std::string getParameter() const        { return getDirent().getParameter(); }

-      QUnicodeString getTitle() const         { return getDirent().getTitle(); }
+      std::string getTitle() const            { return getDirent().getTitle(); }
+      std::string getUrl() const              { return getDirent().getUrl(); }
+      std::string getLongUrl() const          { return getDirent().getLongUrl(); }

-      MimeType    getLibraryMimeType() const  { return getDirent().getMimeType(); }
+      uint16_t    getLibraryMimeType() const  { return getDirent().getMimeType(); }
      const std::string&
-                  getMimeType() const;
+                  getMimeType() const         { return file.getMimeType(getLibraryMimeType()); }

      bool        isRedirect() const          { return getDirent().isRedirect(); }

@ -67,8 +68,8 @@ namespace zim

      bool operator< (const Article& a) const
        { return getNamespace() < a.getNamespace()
-              || getNamespace() == a.getNamespace()
-               && getTitle() < a.getTitle(); }
+              || (getNamespace() == a.getNamespace()
+               && getTitle() < a.getTitle()); }

      Cluster getCluster() const
        { return file.getCluster(getDirent().getClusterNumber()); }
@ -87,8 +88,6 @@ namespace zim
      File& getFile()                { return file; }
      size_type   getIndex() const   { return idx; }

-      QUnicodeString getUrl() const  { return getDirent().getUrl(); }
-
      bool good() const   { return idx != std::numeric_limits<size_type>::max(); }
  };

--- a/src/zimlib/include/zim/blob.h
+++ b/src/zimlib/include/zim/blob.h
@ -42,9 +42,9 @@ namespace zim
          { }

      Blob(ClusterImpl* cluster, const char* data, unsigned size)
-        : _cluster(cluster),
-          _data(data),
-          _size(size)
+        : _data(data),
+          _size(size),
+          _cluster(cluster)
          { }

      const char* data() const  { return _data; }
--- a/src/zimlib/include/zim/dirent.h
+++ b/src/zimlib/include/zim/dirent.h
@ -22,14 +22,16 @@

 #include <string>
 #include <zim/zim.h>
-#include <zim/qunicode.h>
+#include <limits>

 namespace zim
 {
  class Dirent
  {
      bool redirect;
-      MimeType mimeType;
+      uint16_t mimeType;
+
+      size_type version;

      size_type clusterNumber;  // only used when redirect is false
      size_type blobNumber;    // only used when redirect is false
@ -37,14 +39,18 @@ namespace zim
      size_type redirectIndex;  // only used when redirect is true

      char ns;
-      QUnicodeString title;
+      std::string title;
+      std::string url;
      std::string parameter;

    public:
      Dirent()  {}

      bool isRedirect() const                 { return redirect; }
-      MimeType getMimeType() const            { return mimeType; }
+      uint16_t getMimeType() const            { return mimeType; }
+
+      size_type getVersion() const            { return version; }
+      void setVersion(size_type v)            { version = v; }

      size_type getClusterNumber() const      { return isRedirect() ? 0 : clusterNumber; }
      size_type getBlobNumber() const         { return isRedirect() ? 0 : blobNumber; }
@ -54,26 +60,28 @@ namespace zim
      size_type getRedirectIndex() const      { return isRedirect() ? redirectIndex : 0; }

      char getNamespace() const               { return ns; }
-      const QUnicodeString& getTitle() const  { return title; }
+      const std::string& getTitle() const     { return title.empty() ? url : title; }
+      const std::string& getUrl() const       { return url; }
+      std::string getLongUrl() const;
      const std::string& getParameter() const { return parameter; }

-      uint16_t getExtraLen() const
-      {
-        uint16_t s = title.getValue().size();
-        if (!parameter.empty())
-          s += (parameter.size() + 1);
-        return s;
-      }
-
      unsigned getDirentSize() const
      {
-        return (isRedirect() ? 10 : 14) + getExtraLen();
+        unsigned ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
+        if (title != url)
+          ret += title.size();
+        return ret;
      }

-      void setTitle(char ns_, const QUnicodeString& title_)
+      void setTitle(const std::string& title_)
+      {
+        title = title_;
+      }
+
+      void setUrl(char ns_, const std::string& url_)
      {
        ns = ns_;
-        title = title_;
+        url = url_;
      }

      void setParameter(const std::string& parameter_)
@ -85,12 +93,12 @@ namespace zim
      {
        redirect = true;
        redirectIndex = idx;
-        mimeType = zimMimeNone;
+        mimeType = std::numeric_limits<uint16_t>::max();
        clusterNumber = 0;
        blobNumber = 0;
      }

-      void setArticle(MimeType mimeType_, size_type clusterNumber_, size_type blobNumber_)
+      void setArticle(uint16_t mimeType_, size_type clusterNumber_, size_type blobNumber_)
      {
        redirect = false;
        mimeType = mimeType_;
@ -98,7 +106,6 @@ namespace zim
        blobNumber = blobNumber_;
      }

-      QUnicodeString getUrl() const;
  };

  std::ostream& operator<< (std::ostream& out, const Dirent& fh);
--- a/src/zimlib/include/zim/file.h
+++ b/src/zimlib/include/zim/file.h
@ -45,11 +45,14 @@ namespace zim
      const std::string& getFilename() const   { return impl->getFilename(); }
      const Fileheader& getFileheader() const  { return impl->getFileheader(); }

-      Dirent getDirent(size_type idx);
+      Dirent getDirent(size_type idx)          { return impl->getDirent(idx); }
+      Dirent getDirentByTitle(size_type idx)   { return impl->getDirentByTitle(idx); }
      size_type getCountArticles() const       { return impl->getCountArticles(); }

      Article getArticle(size_type idx) const;
-      Article getArticle(char ns, const QUnicodeString& title, bool collate = false);
+      Article getArticle(char ns, const std::string& url);
+      Article getArticleByTitle(size_type idx);
+      Article getArticleByTitle(char ns, const std::string& title);

      Cluster getCluster(size_type idx) const  { return impl->getCluster(idx); }
      size_type getCountClusters() const       { return impl->getCountClusters(); }
@ -72,12 +75,17 @@ namespace zim
      class const_iterator;

      const_iterator begin();
+      const_iterator beginByTitle();
      const_iterator end();
-      std::pair<bool, const_iterator> findx(char ns, const QUnicodeString& title, bool collate = false);
-      const_iterator find(char ns, const QUnicodeString& title, bool collate = false);
+      std::pair<bool, const_iterator> findxByTitle(char ns, const std::string& title);
+      std::pair<bool, const_iterator> findx(char ns, const std::string& url);
+      const_iterator findByTitle(char ns, const std::string& title);
+      const_iterator find(char ns, const std::string& url);

      bool good() const    { return impl.getPointer() != 0; }
      time_t getMTime() const   { return impl->getMTime(); }
+
+      const std::string& getMimeType(uint16_t idx) const   { return impl->getMimeType(idx); }
  };

 }
--- a/src/zimlib/include/zim/fileheader.h
+++ b/src/zimlib/include/zim/fileheader.h
@ -38,7 +38,9 @@ namespace zim
    private:
      Uuid uuid;
      size_type articleCount;
-      offset_type indexPtrPos;
+      offset_type titleIdxPos;
+      offset_type urlPtrPos;
+      offset_type mimeListPos;
      size_type blobCount;
      offset_type blobPtrPos;
      size_type mainPage;
@ -47,7 +49,8 @@ namespace zim
    public:
      Fileheader()
        : articleCount(0),
-          indexPtrPos(0),
+          titleIdxPos(0),
+          urlPtrPos(0),
          blobCount(0),
          blobPtrPos(0),
          mainPage(std::numeric_limits<size_type>::max()),
@ -60,22 +63,28 @@ namespace zim
      size_type getArticleCount() const            { return articleCount; }
      void      setArticleCount(size_type s)       { articleCount = s; }

-      offset_type getIndexPtrPos() const           { return indexPtrPos; }
-      void        setIndexPtrPos(offset_type p)    { indexPtrPos = p; }
+      offset_type getTitleIdxPos() const           { return titleIdxPos; }
+      void        setTitleIdxPos(offset_type p)    { titleIdxPos = p; }

-      size_type getClusterCount() const            { return blobCount; }
-      void      setClusterCount(size_type s)       { blobCount = s; }
+      offset_type getUrlPtrPos() const             { return urlPtrPos; }
+      void        setUrlPtrPos(offset_type p)      { urlPtrPos = p; }
+
+      offset_type getMimeListPos() const           { return mimeListPos; }
+      void        setMimeListPos(offset_type p)    { mimeListPos = p; }
+
+      size_type   getClusterCount() const          { return blobCount; }
+      void        setClusterCount(size_type s)     { blobCount = s; }

      offset_type getClusterPtrPos() const         { return blobPtrPos; }
      void        setClusterPtrPos(offset_type p)  { blobPtrPos = p; }

-      bool      hasMainPage() const                { return mainPage != std::numeric_limits<size_type>::max(); }
-      size_type getMainPage() const                { return mainPage; }
-      void      setMainPage(size_type s)           { mainPage = s; }
+      bool        hasMainPage() const              { return mainPage != std::numeric_limits<size_type>::max(); }
+      size_type   getMainPage() const              { return mainPage; }
+      void        setMainPage(size_type s)         { mainPage = s; }

-      bool      hasLayoutPage() const              { return layoutPage != std::numeric_limits<size_type>::max(); }
-      size_type getLayoutPage() const              { return layoutPage; }
-      void      setLayoutPage(size_type s)         { layoutPage = s; }
+      bool        hasLayoutPage() const            { return layoutPage != std::numeric_limits<size_type>::max(); }
+      size_type   getLayoutPage() const            { return layoutPage; }
+      void        setLayoutPage(size_type s)       { layoutPage = s; }
  };

  std::ostream& operator<< (std::ostream& out, const Fileheader& fh);
--- a/src/zimlib/include/zim/fileimpl.h
+++ b/src/zimlib/include/zim/fileimpl.h
@ -26,7 +26,6 @@
 #include <map>
 #include <zim/refcounted.h>
 #include <zim/zim.h>
-#include <zim/qunicode.h>
 #include <zim/fileheader.h>
 #include <zim/cache.h>
 #include <zim/dirent.h>
@ -40,10 +39,6 @@ namespace zim
      Fileheader header;
      std::string filename;

-      typedef std::vector<offset_type> OffsetsType;
-      OffsetsType indexOffsets;
-      OffsetsType clusterOffsets;
-
      Cache<size_type, Dirent> direntCache;
      Cache<offset_type, Cluster> clusterCache;
      typedef std::map<char, size_type> NamespaceCache;
@ -53,6 +48,11 @@ namespace zim
      std::string namespaces;
      time_t mtime;

+      typedef std::vector<std::string> MimeTypes;
+      MimeTypes mimeTypes;
+
+      offset_type getOffset(offset_type ptrOffset, size_type idx);
+
    public:
      explicit FileImpl(const char* fname);

@ -62,11 +62,13 @@ namespace zim
      const Fileheader& getFileheader() const  { return header; }

      Dirent getDirent(size_type idx);
-      size_type getCountArticles() const       { return indexOffsets.size(); }
+      Dirent getDirentByTitle(size_type idx);
+      size_type getIndexByTitle(size_type idx);
+      size_type getCountArticles() const       { return header.getArticleCount(); }

      Cluster getCluster(size_type idx);
-      size_type getCountClusters() const       { return clusterOffsets.size(); }
-      offset_type getClusterOffset(size_type idx) const    { return clusterOffsets[idx]; }
+      size_type getCountClusters() const       { return header.getClusterCount(); }
+      offset_type getClusterOffset(size_type idx)   { return getOffset(header.getClusterPtrPos(), idx); }

      size_type getNamespaceBeginOffset(char ch);
      size_type getNamespaceEndOffset(char ch);
@ -76,6 +78,7 @@ namespace zim
      std::string getNamespaces();
      bool hasNamespace(char ch);

+      const std::string& getMimeType(uint16_t idx) const;
  };

 }
--- a/src/zimlib/include/zim/fileiterator.h
+++ b/src/zimlib/include/zim/fileiterator.h
@ -27,30 +27,40 @@ namespace zim
 {
  class File::const_iterator : public std::iterator<std::bidirectional_iterator_tag, Article>
  {
+    public:
+      enum Mode {
+        UrlIterator,
+        ArticleIterator
+      };
+
+    private:
      File* file;
      size_type idx;
      mutable Article article;
+      Mode mode;

      bool is_end() const  { return file == 0 || idx >= file->getCountArticles(); }

    public:
-      explicit const_iterator(File* file_ = 0, size_type idx_ = 0)
+      explicit const_iterator(File* file_ = 0, size_type idx_ = 0, Mode mode_ = UrlIterator)
        : file(file_),
-          idx(idx_)
+          idx(idx_),
+          mode(mode_)
      { }

      size_type getIndex() const   { return idx; }
      const File& getFile() const  { return *file; }

      bool operator== (const const_iterator& it) const
-        { return is_end() && it.is_end()
-              || file == it.file && idx == it.idx; }
+        { return (is_end() && it.is_end())
+              || (file == it.file && idx == it.idx); }
      bool operator!= (const const_iterator& it) const
        { return !operator==(it); }

      const_iterator& operator++()
      {
        ++idx;
+        article = Article();
        return *this;
      }

@ -64,6 +74,7 @@ namespace zim
      const_iterator& operator--()
      {
        --idx;
+        article = Article();
        return *this;
      }

@ -74,17 +85,17 @@ namespace zim
        return *this;
      }

-      Article operator*() const
+      const Article& operator*() const
      {
-        if (article.getIndex() != idx)
-          article = file->getArticle(idx);
+        if (!article.good())
+          article = mode == UrlIterator ? file->getArticle(idx)
+                                        : file->getArticleByTitle(idx);
        return article;
      }

      pointer operator->() const
      {
-        if (article.getIndex() != idx)
-          article = file->getArticle(idx);
+        operator*();
        return &article;
      }

--- a/src/zimlib/include/zim/lzmastream.h
+++ b/src/zimlib/include/zim/lzmastream.h
@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_LZMASTREAM_H
+#define ZIM_LZMASTREAM_H
+
+#include <iostream>
+#include <stdexcept>
+#include <lzma.h>
+#include <vector>
+
+namespace zim
+{
+  class LzmaError : public std::runtime_error
+  {
+      lzma_ret ret;
+
+    public:
+      LzmaError(lzma_ret ret_, const std::string& msg)
+        : std::runtime_error(msg),
+          ret(ret_)
+          { }
+
+      lzma_ret getRetcode() const  { return ret; }
+  };
+
+  class LzmaStreamBuf : public std::streambuf
+  {
+      lzma_stream stream;
+      std::vector<char_type> obuffer;
+      std::streambuf* sink;
+
+    public:
+      LzmaStreamBuf(std::streambuf* sink_,
+        uint32_t preset = 3 | LZMA_PRESET_EXTREME,
+        lzma_check check = LZMA_CHECK_CRC32   /* LZMA_CHECK_NONE */,
+        unsigned bufsize = 8192);
+      ~LzmaStreamBuf();
+
+      /// see std::streambuf
+      int_type overflow(int_type c);
+      /// see std::streambuf
+      int_type underflow();
+      /// see std::streambuf
+      int sync();
+      /// end stream
+      int end();
+
+      void setSink(std::streambuf* sink_)   { sink = sink_; }
+  };
+
+  class LzmaStream : public std::ostream
+  {
+      LzmaStreamBuf streambuf;
+
+    public:
+      explicit LzmaStream(std::streambuf* sink,
+        uint32_t preset = 3 | LZMA_PRESET_EXTREME,
+        lzma_check check = LZMA_CHECK_CRC32   /* LZMA_CHECK_NONE */,
+        unsigned bufsize = 8192)
+        : std::ostream(0),
+          streambuf(sink, preset, check, bufsize)
+        { init(&streambuf); }
+      explicit LzmaStream(std::ostream& sink,
+        uint32_t preset = 3 | LZMA_PRESET_EXTREME,
+        lzma_check check = LZMA_CHECK_CRC32   /* LZMA_CHECK_NONE */,
+        unsigned bufsize = 8192)
+        : std::ostream(0),
+          streambuf(sink.rdbuf(), preset, check, bufsize)
+        { init(&streambuf); }
+
+      void end();
+      void setSink(std::streambuf* sink)   { streambuf.setSink(sink); }
+      void setSink(std::ostream& sink)     { streambuf.setSink(sink.rdbuf()); }
+  };
+}
+
+#endif // ZIM_LZMASTREAM_H
--- a/src/zimlib/include/zim/search.h
+++ b/src/zimlib/include/zim/search.h
@ -93,8 +93,8 @@ namespace zim
          { }

      void search(Results& results, const std::string& expr);
-      void find(Results& results, char ns, const QUnicodeString& praefix, unsigned limit = searchLimit);
-      void find(Results& results, char ns, const QUnicodeString& begin, const QUnicodeString& end, unsigned limit = searchLimit);
+      void find(Results& results, char ns, const std::string& praefix, unsigned limit = searchLimit);
+      void find(Results& results, char ns, const std::string& begin, const std::string& end, unsigned limit = searchLimit);

      static double getWeightOcc()                 { return weightOcc; }
      static double getWeightOccOff()              { return weightOccOff; }
--- a/src/zimlib/include/zim/smartptr.h
+++ b/src/zimlib/include/zim/smartptr.h
@ -31,25 +31,6 @@

 namespace zim
 {
-  template <typename objectType>
-  class InternalRefCounted
-  {
-    protected:
-      bool unlink(objectType* object)
-      {
-        if (object)
-          object->release();
-        return false;
-      }
-
-      void link(const InternalRefCounted& ptr, objectType* object)
-      {
-        if (object)
-          object->addRef();
-      }
-
-  };
-
  template <typename objectType>
  class SmartPtr
  {
--- a/src/zimlib/include/zim/template.h
+++ b/src/zimlib/include/zim/template.h
@ -32,7 +32,7 @@ namespace zim
        public:
          virtual void onData(const std::string& data) = 0;
          virtual void onToken(const std::string& token) = 0;
-          virtual void onLink(char ns, const std::string& title) = 0;
+          virtual void onLink(char ns, const std::string& url) = 0;
      };

    private:
--- a/src/zimlib/include/zim/unlzmastream.h
+++ b/src/zimlib/include/zim/unlzmastream.h
@ -0,0 +1,91 @@
+/* 
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+
+#ifndef ZIM_UNLZMASTREAM_H
+#define ZIM_UNLZMASTREAM_H
+
+#include <iostream>
+#include <stdexcept>
+#include <lzma.h>
+
+namespace zim
+{
+  class UnlzmaError : public std::runtime_error
+  {
+      lzma_ret ret;
+
+    public:
+      UnlzmaError(lzma_ret ret_, const std::string& msg)
+        : std::runtime_error(msg),
+          ret(ret_)
+          { }
+
+      lzma_ret getRetcode() const  { return ret; }
+  };
+
+  class UnlzmaStreamBuf : public std::streambuf
+  {
+      lzma_stream stream;
+      char_type* iobuffer;
+      unsigned bufsize;
+      std::streambuf* sinksource;
+
+      char_type* ibuffer()            { return iobuffer; }
+      std::streamsize ibuffer_size()  { return bufsize >> 1; }
+      char_type* obuffer()            { return iobuffer + ibuffer_size(); }
+      std::streamsize obuffer_size()  { return bufsize >> 1; }
+
+    public:
+      explicit UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize = 8192);
+      ~UnlzmaStreamBuf();
+
+      /// see std::streambuf
+      int_type overflow(int_type c);
+      /// see std::streambuf
+      int_type underflow();
+      /// see std::streambuf
+      int sync();
+
+      void setSinksource(std::streambuf* sinksource_)   { sinksource = sinksource_; }
+  };
+
+  class UnlzmaStream : public std::iostream
+  {
+      UnlzmaStreamBuf streambuf;
+
+    public:
+      explicit UnlzmaStream(std::streambuf* sinksource, unsigned bufsize = 8192)
+        : std::iostream(0),
+          streambuf(sinksource, bufsize)
+        { init(&streambuf); }
+      explicit UnlzmaStream(std::ios& sinksource, unsigned bufsize = 8192)
+        : std::iostream(0),
+          streambuf(sinksource.rdbuf(), bufsize)
+        { init(&streambuf); }
+
+      void setSinksource(std::streambuf* sinksource)   { streambuf.setSinksource(sinksource); }
+      void setSinksource(std::ios& sinksource)         { streambuf.setSinksource(sinksource.rdbuf()); }
+      void setSink(std::ostream& sink)                 { streambuf.setSinksource(sink.rdbuf()); }
+      void setSource(std::istream& source)             { streambuf.setSinksource(source.rdbuf()); }
+  };
+}
+
+#endif // ZIM_UNLZMASTREAM_H
+
--- a/src/zimlib/include/zim/zim.h
+++ b/src/zimlib/include/zim/zim.h
@ -42,23 +42,7 @@ namespace zim
    zimcompLzma
  };

-  enum MimeType
-  {
-    zimMimeNone = -1,
-    zimMimeTextHtml,
-    zimMimeTextPlain,
-    zimMimeImageJpeg,
-    zimMimeImagePng,
-    zimMimeImageTiff,
-    zimMimeTextCss,
-    zimMimeImageGif,
-    zimMimeIndex,
-    zimMimeApplicationJavaScript,
-    zimMimeImageIcon,
-    zimMimeTextXml,
-    zimMimeTextHtmlTemplate
-  };
-
+  static const char MimeHtmlTemplate[] = "text/x-zim-htmltemplate";
 }

 #endif // ZIM_ZIM_H
--- a/src/zimlib/include/zim/zintstream.h
+++ b/src/zimlib/include/zim/zintstream.h
@ -24,32 +24,74 @@
 #include <iostream>
 #include <zim/zim.h>

+/*
+  ZInt implements a int compressor and decompressor. The algorithm compresses
+  small values into fewer bytes.
+  
+  The idea is to add information about used bytes in the first byte. The number
+  of additional bytes used is specified by the number of set bits counted from
+  the most significant bit. So the numbers 0-127 are encoded as is, since they
+  fit into the 7 low order bits and the high order bit specifies, that no
+  additional bytes are used. The number starting from 128 up to 16383 need more
+  than 7 bits, so we need to set the highest order bit to 1 and the next bit to
+  0, leaving 6 bits of actual data, which is used as the low order bits of the
+  number.
+
+  Since the numbers 0-127 are already encoded in one byte, the 127 is
+  substracted from the actual number, so a 2 byte zero is actually a 128.
+
+  The same logic continues on the 3rd, 4th, ... byte. Up to 7 additional bytes
+  are used, so the first byte must contain at least one 0.
+
+  binary                          range
+  ------------------------------- --------------------------------------------------
+  0xxx xxxx                       0 - 127
+  10xx xxxx xxxx xxxx             128 - (2^14+128-1 = 16511)
+  110x xxxx xxxx xxxx xxxx xxxx   16512 - (2^21+16512-1 = 2113663)
+  1110 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
+                                  2113664 - (2^28+2113664-1 = 270549119)
+  ...
+
+*/
+
 namespace zim
 {
-  class IZIntStream
+  class ZIntStream
  {
-      std::istream& stream;
+      std::istream* _istream;
+      std::ostream* _ostream;

    public:
-      explicit IZIntStream(std::istream& stream_)
-        : stream(stream_)
-        { }
+      /// prepare ZIntStream for compression or decompression
+      explicit ZIntStream(std::iostream& iostream)
+        : _istream(&iostream),
+          _ostream(&iostream)
+          { }

-      IZIntStream& get(size_type &value);
-      operator void*() const  { return stream; }
-  };
+      /// prepare ZIntStream for decompression
+      explicit ZIntStream(std::istream& istream)
+        : _istream(&istream),
+          _ostream(0)
+          { }

-  class OZIntStream
-  {
-      std::ostream& stream;
+      /// prepare ZIntStream for compression
+      explicit ZIntStream(std::ostream& ostream)
+        : _istream(0),
+          _ostream(&ostream)
+          { }

-    public:
-      explicit OZIntStream(std::ostream& stream_)
-        : stream(stream_)
-        { }
+      /// decompresses one value from input stream and returns it
+      size_type get();

-      OZIntStream& put(size_type value);
-      operator void*() const  { return stream; }
+      ZIntStream& get(size_type &value)
+        { value = get(); return *this; }
+
+      /// compresses one value to output stream
+      ZIntStream& put(size_type value);
+
+      operator bool() const
+        { return (_istream == 0 || *_istream)
+              && (_ostream == 0 || *_ostream); }
  };

 }
--- a/src/zimlib/src/article.cpp
+++ b/src/zimlib/src/article.cpp
@ -28,50 +28,6 @@ log_define("zim.article")

 namespace zim
 {
-  const std::string& Article::getMimeType() const
-  {
-    static const std::string textHtml = "text/html; charset=UTF-8";
-    static const std::string textPlain = "text/plain";
-    static const std::string textXml = "application/xml";
-    static const std::string imageJpeg = "image/jpeg";
-    static const std::string imagePng = "image/png";
-    static const std::string imageTiff = "image/tiff";
-    static const std::string textCss = "text/css";
-    static const std::string imageGif = "image/gif";
-    static const std::string index = "text/plain";
-    static const std::string applicationJavaScript = "application/x-javascript";
-    static const std::string imageIcon = "image/x-icon";
-
-    switch (getLibraryMimeType())
-    {
-      case zimMimeTextHtml:
-      case zimMimeTextHtmlTemplate:
-        return textHtml;
-      case zimMimeTextPlain:
-        return textPlain;
-      case zimMimeImageJpeg:
-        return imageJpeg;
-      case zimMimeImagePng:
-        return imagePng;
-      case zimMimeImageTiff:
-        return imageTiff;
-      case zimMimeTextCss:
-        return textCss;
-      case zimMimeImageGif:
-        return imageGif;
-      case zimMimeIndex:
-        return index;
-      case zimMimeApplicationJavaScript:
-        return applicationJavaScript;
-      case zimMimeImageIcon:
-        return imageIcon;
-      case zimMimeTextXml:
-        return textXml;
-    }
-
-    return textHtml;
-  }
-
  size_type Article::getArticleSize() const
  {
    Dirent dirent = getDirent();
@ -108,9 +64,9 @@ namespace zim
      log_trace("onToken(\"" << token << "\")");

      if (token == "title")
-        out << article.getTitle().toUtf8();
+        out << article.getTitle();
      else if (token == "url")
-        out << article.getUrl().toUtf8();
+        out << article.getUrl();
      else if (token == "namespace")
        out << article.getNamespace();
      else if (token == "content")
@ -126,11 +82,11 @@ namespace zim
      }
    }

-    void Ev::onLink(char ns, const std::string& title)
+    void Ev::onLink(char ns, const std::string& url)
    {
      if (maxRecurse <= 0)
        throw std::runtime_error("maximum recursive limit is reached");
-      article.getFile().getArticle(ns, QUnicodeString::fromUtf8(title)).getPage(out, false, maxRecurse - 1);
+      article.getFile().getArticle(ns, url).getPage(out, false, maxRecurse - 1);
    }

  }
@ -146,7 +102,7 @@ namespace zim
  {
    log_trace("Article::getPage(" << layout << ", " << maxRecurse << ')');

-    if (getLibraryMimeType() == zimMimeTextHtml || getLibraryMimeType() == zimMimeTextHtmlTemplate)
+    if (getMimeType().compare(0, 9, "text/html") == 0 || getMimeType() == MimeHtmlTemplate)
    {
      if (layout && file.getFileheader().hasLayoutPage())
      {
@ -162,7 +118,7 @@ namespace zim

        return;
      }
-      else if (getLibraryMimeType() == zimMimeTextHtmlTemplate)
+      else if (getMimeType() == MimeHtmlTemplate)
      {
        Blob data = getData();

--- a/src/zimlib/src/articlesearch.cpp
+++ b/src/zimlib/src/articlesearch.cpp
@ -43,7 +43,7 @@ namespace zim

    for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
    {
-      std::string title = it->getTitle().toUtf8();
+      std::string title = it->getTitle();
      if (title.find(expr) != std::string::npos)
        ret.push_back(*it);
    }
--- a/src/zimlib/src/bunzip2stream.cpp
+++ b/src/zimlib/src/bunzip2stream.cpp
@ -66,8 +66,6 @@ namespace zim

  Bunzip2StreamBuf::int_type Bunzip2StreamBuf::overflow(int_type c)
  {
-    log_debug("Bunzip2StreamBuf::overflow");
-
    if (pptr())
    {
      // initialize input-stream for
@ -81,10 +79,8 @@ namespace zim
        stream.next_out = ibuffer();
        stream.avail_out = ibuffer_size();

-        log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
        ret = ::BZ2_bzDecompress(&stream);
        checkError(ret, stream);
-        log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);

        // copy ibuffer to sinksource
        std::streamsize count = ibuffer_size() - stream.avail_out;
@ -118,14 +114,12 @@ namespace zim
        {
          // there is data already available
          // read compressed data from source into ibuffer
-          log_debug("in_avail=" << sinksource->in_avail());
          stream.avail_in = sinksource->sgetn(ibuffer(), mymin(sinksource->in_avail(), ibuffer_size()));
        }
        else
        {
          // no data available
          stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
-          log_debug(stream.avail_in << " bytes read from source");
          if (stream.avail_in == 0)
            return traits_type::eof();
        }
@ -137,9 +131,7 @@ namespace zim

      // at least one character received from source - pass to decompressor

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
      int ret = ::BZ2_bzDecompress(&stream);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);

      checkError(ret, stream);

--- a/src/zimlib/src/bzip2stream.cpp
+++ b/src/zimlib/src/bzip2stream.cpp
@ -56,14 +56,11 @@ namespace zim

  Bzip2StreamBuf::~Bzip2StreamBuf()
  {
-    log_debug("bzCompressEnd");
    ::BZ2_bzCompressEnd(&stream);
  }

  Bzip2StreamBuf::int_type Bzip2StreamBuf::overflow(int_type c)
  {
-    log_debug("Bzip2StreamBuf::overflow");
-
    // initialize input-stream
    stream.next_in = &obuffer[0];
    stream.avail_in = pptr() - &obuffer[0];
@ -74,9 +71,7 @@ namespace zim
    stream.avail_out = sizeof(zbuffer);

    // deflate
-    log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_RUN");
-    int ret = checkError(::BZ2_bzCompress(&stream, BZ_RUN), stream);
-    log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);
+    checkError(::BZ2_bzCompress(&stream, BZ_RUN), stream);

    // copy zbuffer to sink / consume deflated data
    std::streamsize count = sizeof(zbuffer) - stream.avail_out;
@ -106,8 +101,6 @@ namespace zim

  int Bzip2StreamBuf::sync()
  {
-    log_debug("Bzip2StreamBuf::sync");
-
    // initialize input-stream for
    stream.next_in = &obuffer[0];
    stream.avail_in = pptr() - pbase();
@ -119,9 +112,7 @@ namespace zim
      stream.next_out = zbuffer;
      stream.avail_out = sizeof(zbuffer);

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_FLUSH");
      ret = checkError(::BZ2_bzCompress(&stream, BZ_FLUSH), stream);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);

      // copy zbuffer to sink
      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
@ -141,8 +132,6 @@ namespace zim

  int Bzip2StreamBuf::end()
  {
-    log_debug("Bzip2StreamBuf::end");
-
    char zbuffer[8192];
    // initialize input-stream for
    stream.next_in = &obuffer[0];
@ -154,9 +143,7 @@ namespace zim
      stream.next_out = zbuffer;
      stream.avail_out = sizeof(zbuffer);

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " BZ_FINISH");
      ret = checkError(::BZ2_bzCompress(&stream, BZ_FINISH), stream);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret << " total_out_lo32=" << stream.total_out_lo32);

      // copy zbuffer to sink
      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
--- a/src/zimlib/src/cluster.cpp
+++ b/src/zimlib/src/cluster.cpp
@ -19,16 +19,33 @@

 #include <zim/cluster.h>
 #include <zim/blob.h>
+#include <zim/endian.h>
+#include <stdlib.h>
 #include <sstream>
+
 #include "log.h"
+
+#include "config.h"
+
+#ifdef ENABLE_ZLIB
 #include <zim/deflatestream.h>
 #include <zim/inflatestream.h>
+#endif
+
+#ifdef ENABLE_BZIP2
 #include <zim/bzip2stream.h>
 #include <zim/bunzip2stream.h>
-#include <zim/endian.h>
+#endif
+
+#ifdef ENABLE_LZMA
+#include <zim/lzmastream.h>
+#include <zim/unlzmastream.h>
+#endif

 log_define("zim.cluster")

+#define log_debug1(e)
+
 namespace zim
 {
  Cluster::Cluster()
@ -50,7 +67,7 @@ namespace zim

  void ClusterImpl::read(std::istream& in)
  {
-    log_debug("read");
+    log_debug1("read");

    // read first offset, which specifies, how many offsets we need to read
    size_type offset;
@ -63,7 +80,7 @@ namespace zim
    size_type n = offset / 4;
    size_type a = offset;

-    log_debug("first offset is " << offset << " n=" << n << " a=" << a);
+    log_debug1("first offset is " << offset << " n=" << n << " a=" << a);

    // read offsets
    offsets.clear();
@ -75,11 +92,11 @@ namespace zim
      in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
      if (in.fail())
      {
-        log_debug("fail at " << n);
+        log_debug1("fail at " << n);
        return;
      }
      offset = fromLittleEndian(&offset);
-      log_debug("offset=" << offset << '(' << offset-a << ')');
+      log_debug1("offset=" << offset << '(' << offset-a << ')');
      offsets.push_back(offset - a);
    }

@ -88,7 +105,7 @@ namespace zim
    {
      n = offsets.back() - offsets.front();
      data.resize(n);
-      log_debug("read " << n << " bytes of data");
+      log_debug1("read " << n << " bytes of data");
      in.read(&(data[0]), n);
    }
  }
@ -109,12 +126,9 @@ namespace zim

  void ClusterImpl::addBlob(const Blob& blob)
  {
-    log_debug("addBlob(ptr, " << blob.size() << ')');
+    log_debug1("addBlob(ptr, " << blob.size() << ')');
    data.insert(data.end(), blob.data(), blob.end());
    offsets.push_back(data.size());
-
-    for (unsigned n = 0; n < offsets.size(); ++n)
-      log_debug("offset[" << n << "]=" << offsets[n]);
  }

  Blob ClusterImpl::getBlob(size_type n) const
@ -141,6 +155,8 @@ namespace zim

  std::istream& operator>> (std::istream& in, ClusterImpl& clusterImpl)
  {
+    log_trace("read cluster");
+
    char c;
    in.get(c);
    clusterImpl.setCompression(static_cast<CompressionType>(c));
@ -154,22 +170,42 @@ namespace zim

      case zimcompZip:
        {
+#ifdef ENABLE_ZLIB
          log_debug("uncompress data (zlib)");
          zim::InflateStream is(in);
+          is.exceptions(std::ios::failbit | std::ios::badbit);
          clusterImpl.read(is);
+#else
+          throw std::runtime_error("zlib not enabled in this library");
+#endif
          break;
        }

      case zimcompBzip2:
        {
+#ifdef ENABLE_BZIP2
          log_debug("uncompress data (bzip2)");
          zim::Bunzip2Stream is(in);
+          is.exceptions(std::ios::failbit | std::ios::badbit);
          clusterImpl.read(is);
+#else
+          throw std::runtime_error("bzip2 not enabled in this library");
+#endif
          break;
        }

      case zimcompLzma:
-        throw std::runtime_error("lzma decompression is not implemented");
+        {
+#ifdef ENABLE_LZMA
+          log_debug("uncompress data (lzma)");
+          zim::UnlzmaStream is(in);
+          is.exceptions(std::ios::failbit | std::ios::badbit);
+          clusterImpl.read(is);
+#else
+          throw std::runtime_error("lzma not enabled in this library");
+#endif
+          break;
+        }

      default:
        log_error("invalid compression flag " << c);
@ -187,6 +223,8 @@ namespace zim

  std::ostream& operator<< (std::ostream& out, const ClusterImpl& clusterImpl)
  {
+    log_trace("write cluster");
+
    out.put(static_cast<char>(clusterImpl.getCompression()));

    switch(clusterImpl.getCompression())
@ -198,24 +236,65 @@ namespace zim

      case zimcompZip:
        {
+#ifdef ENABLE_ZLIB
          log_debug("compress data (zlib)");
          zim::DeflateStream os(out);
+          os.exceptions(std::ios::failbit | std::ios::badbit);
          clusterImpl.write(os);
          os.flush();
+#else
+          throw std::runtime_error("zlib not enabled in this library");
+#endif
          break;
        }

      case zimcompBzip2:
        {
+#ifdef ENABLE_BZIP2
          log_debug("compress data (bzip2)");
          zim::Bzip2Stream os(out);
+          os.exceptions(std::ios::failbit | std::ios::badbit);
          clusterImpl.write(os);
          os.end();
+#else
+          throw std::runtime_error("bzip2 not enabled in this library");
+#endif
          break;
        }

      case zimcompLzma:
-        throw std::runtime_error("lzma compression is not implemented");
+        {
+#ifdef ENABLE_LZMA
+          uint32_t lzmaPreset = 3 | LZMA_PRESET_EXTREME;
+          /**
+           * read lzma preset from environment
+           * ZIM_LZMA_PRESET is a number followed optionally by a
+           * suffix 'e'. The number gives the preset and the suffix tells,
+           * if LZMA_PRESET_EXTREME should be set.
+           * e.g.:
+           *   ZIM_LZMA_LEVEL=9   => 9
+           *   ZIM_LZMA_LEVEL=3e  => 3 + extreme
+           */
+          const char* e = ::getenv("ZIM_LZMA_LEVEL");
+          if (e)
+          {
+            char flag = '\0';
+            std::istringstream s(e);
+            s >> lzmaPreset >> flag;
+            if (flag == 'e')
+              lzmaPreset |= LZMA_PRESET_EXTREME;
+          }
+
+          log_debug("compress data (lzma, " << std::hex << lzmaPreset << ")");
+          zim::LzmaStream os(out, lzmaPreset);
+          os.exceptions(std::ios::failbit | std::ios::badbit);
+          clusterImpl.write(os);
+          os.end();
+#else
+          throw std::runtime_error("lzma not enabled in this library");
+#endif
+          break;
+        }

      default:
        std::ostringstream msg;
--- a/src/zimlib/src/config.h
+++ b/src/zimlib/src/config.h
@ -0,0 +1,99 @@
+/* src/zimlib/src/config.h.  Generated from config.h.in by configure.  */
+/* src/zimlib/src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* set zim cluster cache size to number of cached chunks */
+#define CLUSTER_CACHE_SIZE 16
+
+/* set zim dirent cache size to number of cached chunks */
+#define DIRENT_CACHE_SIZE 51200
+
+/* defined if bzip2 compression is enabled */
+#define ENABLE_BZIP2 1
+
+/* defined if lzma compression is enabled */
+#define ENABLE_LZMA 1
+
+/* defined if zlib compression is enabled */
+#define ENABLE_ZLIB 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `bz2' library (-lbz2). */
+#define HAVE_LIBBZ2 1
+
+/* Define to 1 if you have the `lzma' library (-llzma). */
+#define HAVE_LIBLZMA 1
+
+/* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
+#define HAVE_LIBMICROHTTPD 1
+
+/* Define to 1 if you have the `unac' library (-lunac). */
+#define HAVE_LIBUNAC 1
+
+/* Define to 1 if you have the `z' library (-lz). */
+#define HAVE_LIBZ 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `stat64' function. */
+#define HAVE_STAT64 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* set lzma uncompress memory size to number of MB */
+#define LZMA_MEMORY_SIZE 128
+
+/* Name of package */
+#define PACKAGE "kiwix"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "kiwix"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "kiwix 0.9"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "kiwix"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.9"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "0.9"
--- a/src/zimlib/src/config.h.in
+++ b/src/zimlib/src/config.h.in
@ -6,6 +6,15 @@
 /* set zim dirent cache size to number of cached chunks */
 #undef DIRENT_CACHE_SIZE

+/* defined if bzip2 compression is enabled */
+#undef ENABLE_BZIP2
+
+/* defined if lzma compression is enabled */
+#undef ENABLE_LZMA
+
+/* defined if zlib compression is enabled */
+#undef ENABLE_ZLIB
+
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H

@ -15,6 +24,9 @@
 /* Define to 1 if you have the `bz2' library (-lbz2). */
 #undef HAVE_LIBBZ2

+/* Define to 1 if you have the `lzma' library (-llzma). */
+#undef HAVE_LIBLZMA
+
 /* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
 #undef HAVE_LIBMICROHTTPD

@ -55,6 +67,9 @@
   */
 #undef LT_OBJDIR

+/* set lzma uncompress memory size to number of MB */
+#undef LZMA_MEMORY_SIZE
+
 /* Name of package */
 #undef PACKAGE

--- a/src/zimlib/src/deflatestream.cpp
+++ b/src/zimlib/src/deflatestream.cpp
@ -70,8 +70,6 @@ namespace zim

  DeflateStreamBuf::int_type DeflateStreamBuf::overflow(int_type c)
  {
-    log_debug("DeflateStreamBuf::overflow");
-
    // initialize input-stream
    stream.next_in = reinterpret_cast<Bytef*>(&obuffer[0]);
    stream.avail_in = pptr() - &obuffer[0];
@ -82,9 +80,7 @@ namespace zim
    stream.avail_out = sizeof(zbuffer);

    // deflate
-    log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
    checkError(::deflate(&stream, Z_NO_FLUSH), stream);
-    log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);

    // copy zbuffer to sink / consume deflated data
    std::streamsize count = sizeof(zbuffer) - stream.avail_out;
@ -114,8 +110,6 @@ namespace zim

  int DeflateStreamBuf::sync()
  {
-    log_debug("DeflateStreamBuf::sync");
-
    // initialize input-stream for
    stream.next_in = reinterpret_cast<Bytef*>(&obuffer[0]);
    stream.avail_in = pptr() - pbase();
@ -126,9 +120,7 @@ namespace zim
      stream.next_out = (Bytef*)zbuffer;
      stream.avail_out = sizeof(zbuffer);

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
      checkError(::deflate(&stream, Z_SYNC_FLUSH), stream);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);

      // copy zbuffer to sink
      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
@ -157,9 +149,7 @@ namespace zim
      stream.next_out = (Bytef*)zbuffer;
      stream.avail_out = sizeof(zbuffer);

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
      int ret = checkError(::deflate(&stream, Z_FINISH), stream);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);

      // copy zbuffer to sink
      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
--- a/src/zimlib/src/dirent.cpp
+++ b/src/zimlib/src/dirent.cpp
@ -35,33 +35,35 @@ namespace zim
  {
    union
    {
-      char d[12];
+      char d[16];
      long a;
    } header;
-    header.d[0] = static_cast<char>(dirent.isRedirect());
-    header.d[1] = static_cast<char>(dirent.getMimeType());
-    header.d[2] = '\0';
+    toLittleEndian(dirent.getMimeType(), header.d);
+    header.d[2] = static_cast<char>(dirent.getParameter().size());
    header.d[3] = dirent.getNamespace();

-    log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().getValue().size() << " extralen=" << dirent.getExtraLen());
+    log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().size());
+
+    toLittleEndian(dirent.getVersion(), header.d + 4);

    if (dirent.isRedirect())
    {
-      toLittleEndian(dirent.getRedirectIndex(), header.d + 4);
-      toLittleEndian(dirent.getExtraLen(), header.d + 8);
-      out.write(header.d, 10);
+      toLittleEndian(dirent.getRedirectIndex(), header.d + 8);
+      out.write(header.d, 12);
    }
    else
    {
-      toLittleEndian(dirent.getClusterNumber(), header.d + 4);
-      toLittleEndian(dirent.getBlobNumber(), header.d + 8);
-      toLittleEndian(dirent.getExtraLen(), header.d + 12);
-      out.write(header.d, 14);
+      toLittleEndian(dirent.getClusterNumber(), header.d + 8);
+      toLittleEndian(dirent.getBlobNumber(), header.d + 12);
+      out.write(header.d, 16);
    }

-    out << dirent.getTitle().getValue();
-    if (!dirent.getParameter().empty())
-      out << '\0' << dirent.getParameter();
+    out << dirent.getUrl() << '\0';
+
+    std::string t = dirent.getTitle();
+    if (t != dirent.getUrl())
+      out << t;
+    out << '\0' << dirent.getParameter();

    return out;
  }
@ -71,34 +73,34 @@ namespace zim
    union
    {
      long a;
-      char d[14];
+      char d[16];
    } header;

-    in.read(header.d, 10);
+    in.read(header.d, 12);
    if (in.fail())
    {
      log_warn("error reading dirent header");
      return in;
    }

-    if (in.gcount() != 10)
+    if (in.gcount() != 12)
    {
      log_warn("error reading dirent header (2)");
      in.setstate(std::ios::failbit);
      return in;
    }

-    bool redirect = header.d[0];
+    uint16_t mimeType = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d));
+    bool redirect = (mimeType == std::numeric_limits<uint16_t>::max());
    char ns = header.d[3];
-    size_type extraLen;
+    size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
+    dirent.setVersion(version);
+
    if (redirect)
    {
-      log_debug("read redirect entry");
+      size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));

-      size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
-      extraLen = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d + 8));
-
-      log_debug("redirectIndex=" << redirectIndex << " extraLen=" << extraLen);
+      log_debug("redirectIndex=" << redirectIndex);

      dirent.setRedirect(redirectIndex);
    }
@ -106,7 +108,7 @@ namespace zim
    {
      log_debug("read article entry");

-      in.read(header.d + 10, 4);
+      in.read(header.d + 12, 4);
      if (in.fail())
      {
        log_warn("error reading article dirent header");
@ -116,56 +118,48 @@ namespace zim
      if (in.gcount() != 4)
      {
        log_warn("error reading article dirent header (2)");
-        return in;
        in.setstate(std::ios::failbit);
        return in;
      }

-      MimeType mimeType = static_cast<MimeType>(header.d[1]);
-      size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
-      size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
-      extraLen = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d + 12));
+      size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
+      size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 12));

-      log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber << " extraLen=" << extraLen);
+      log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber);

      dirent.setArticle(mimeType, clusterNumber, blobNumber);
    }
    
    char ch;
+    std::string url;
    std::string title;
    std::string parameter;

-    log_debug("read title and parameters; extraLen=" << extraLen);
+    log_debug("read url, title and parameters");

-    title.reserve(extraLen);
-    while (extraLen && in.get(ch) && ch != '\0')
-    {
+    while (in.get(ch) && ch != '\0')
+      url += ch;
+
+    while (in.get(ch) && ch != '\0')
      title += ch;
-      --extraLen;
-    }

-    if (in && extraLen)
-    {
-      --extraLen;
-      parameter.reserve(extraLen);
-      while (extraLen-- && in.get(ch))
-        parameter += ch;
-    }
+    uint8_t extraLen = static_cast<uint8_t>(header.d[2]);
+    while (extraLen-- > 0 && in.get(ch))
+      parameter += ch;

-    dirent.setTitle(ns, QUnicodeString(title));
+    dirent.setUrl(ns, url);
+    dirent.setTitle(title);
    dirent.setParameter(parameter);

    return in;
  }

-  QUnicodeString Dirent::getUrl() const
+  std::string Dirent::getLongUrl() const
  {
-    log_trace("Dirent::getUrl()");
+    log_trace("Dirent::getLongUrl()");
+    log_debug("namespace=" << getNamespace() << " title=" << getTitle());

-    log_debug("namespace=" << getNamespace());
-    log_debug("title=" << getTitle());
-
-    return QUnicodeString(std::string(1, getNamespace()) + '/' + getTitle().getValue());
+    return std::string(1, getNamespace()) + '/' + getUrl();
  }

 }
--- a/src/zimlib/src/envvalue.cpp
+++ b/src/zimlib/src/envvalue.cpp
@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <sstream>
+#include <stdlib.h>
+
+namespace zim
+{
+  unsigned envValue(const char* env, unsigned def)
+  {
+    const char* v = ::getenv(env);
+    if (v)
+    {
+      std::istringstream s(v);
+      s >> def;
+    }
+    return def;
+  }
+
+  unsigned envMemSize(const char* env, unsigned def)
+  {
+    const char* v = ::getenv(env);
+    if (v)
+    {
+      char unit = '\0';
+      std::istringstream s(v);
+      s >> def >> unit;
+
+      switch (unit)
+      {
+        case 'k':
+        case 'K': def *= 1024; break;
+        case 'm':
+        case 'M': def *= 1024 * 1024; break;
+        case 'g':
+        case 'G': def *= 1024 * 1024 * 1024; break;
+      }
+    }
+    return def;
+  }
+}
+
--- a/src/zimlib/src/envvalue.h
+++ b/src/zimlib/src/envvalue.h
@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ENVVALUE_H
+#define ZIM_ENVVALUE_H
+
+namespace zim
+{
+  unsigned envValue(const char* env, unsigned def);
+  unsigned envMemSize(const char* env, unsigned def);
+}
+
+#endif // ZIM_ENVVALUE_H
--- a/src/zimlib/src/file.cpp
+++ b/src/zimlib/src/file.cpp
@ -26,22 +26,27 @@ log_define("zim.file")

 namespace zim
 {
-  Dirent File::getDirent(size_type idx)
-  {
-    log_trace("File::getDirent(" << idx << ')');
-
-    return impl->getDirent(idx);
-  }
-
  Article File::getArticle(size_type idx) const
  {
    return Article(*this, idx);
  }

-  Article File::getArticle(char ns, const QUnicodeString& title, bool collate)
+  Article File::getArticle(char ns, const std::string& url)
  {
-    log_trace("File::getArticle('" << ns << "', \"" << title << "\", " << collate << ')');
-    std::pair<bool, const_iterator> r = findx(ns, title, collate);
+    log_trace("File::getArticle('" << ns << "', \"" << url << ')');
+    std::pair<bool, const_iterator> r = findx(ns, url);
+    return r.first ? *r.second : Article();
+  }
+
+  Article File::getArticleByTitle(size_type idx)
+  {
+    return Article(*this, impl->getIndexByTitle(idx));
+  }
+
+  Article File::getArticleByTitle(char ns, const std::string& title)
+  {
+    log_trace("File::getArticleByTitle('" << ns << "', \"" << title << ')');
+    std::pair<bool, const_iterator> r = findxByTitle(ns, title);
    return r.first ? *r.second : Article();
  }

@ -54,12 +59,15 @@ namespace zim
  File::const_iterator File::begin()
  { return const_iterator(this, 0); }

+  File::const_iterator File::beginByTitle()
+  { return const_iterator(this, 0, const_iterator::ArticleIterator); }
+
  File::const_iterator File::end()
  { return const_iterator(this, getCountArticles()); }

-  std::pair<bool, File::const_iterator> File::findx(char ns, const QUnicodeString& title, bool collate)
+  std::pair<bool, File::const_iterator> File::findx(char ns, const std::string& url)
  {
-    log_debug("find article " << ns << " \"" << title << "\", " << collate << " in file \"" << getFilename() << '"');
+    log_debug("find article by url " << ns << " \"" << url << "\",  in file \"" << getFilename() << '"');

    size_type l = getNamespaceBeginOffset(ns);
    size_type u = getNamespaceEndOffset(ns);
@ -79,8 +87,8 @@ namespace zim

      int c = ns < d.getNamespace() ? -1
            : ns > d.getNamespace() ? 1
-            : (collate ? title.compareCollate(QUnicodeString(d.getTitle()))
-                       : title.compare(QUnicodeString(d.getTitle())));
+            : url.compare(d.getUrl());
+
      if (c < 0)
        u = p;
      else if (c > 0)
@ -93,20 +101,70 @@ namespace zim
    }

    Dirent d = getDirent(l);
-    int c = collate ? title.compareCollate(QUnicodeString(d.getTitle()))
-                    : title.compare(QUnicodeString(d.getTitle()));
+    int c = url.compare(d.getUrl());
+
    if (c == 0)
    {
      log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
      return std::pair<bool, const_iterator>(true, const_iterator(this, l));
    }

-    log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
+    log_debug("article not found after " << itcount << " iterations (\"" << d.getUrl() << "\" does not match)");
    return std::pair<bool, const_iterator>(false, const_iterator(this, u));
  }

-  File::const_iterator File::find(char ns, const QUnicodeString& title, bool collate)
+  std::pair<bool, File::const_iterator> File::findxByTitle(char ns, const std::string& title)
  {
-    return findx(ns, title, collate).second;
+    log_debug("find article by title " << ns << " \"" << title << "\", in file \"" << getFilename() << '"');
+
+    size_type l = getNamespaceBeginOffset(ns);
+    size_type u = getNamespaceEndOffset(ns);
+
+    if (l == u)
+    {
+      log_debug("namespace " << ns << " not found");
+      return std::pair<bool, const_iterator>(false, end());
+    }
+
+    unsigned itcount = 0;
+    while (u - l > 1)
+    {
+      ++itcount;
+      size_type p = l + (u - l) / 2;
+      Dirent d = getDirentByTitle(p);
+
+      int c = ns < d.getNamespace() ? -1
+            : ns > d.getNamespace() ? 1
+            : title.compare(d.getTitle());
+
+      if (c < 0)
+        u = p;
+      else if (c > 0)
+        l = p;
+      else
+      {
+        log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
+        return std::pair<bool, const_iterator>(true, const_iterator(this, p, const_iterator::ArticleIterator));
+      }
+    }
+
+    Dirent d = getDirentByTitle(l);
+    int c = title.compare(d.getTitle());
+
+    if (c == 0)
+    {
+      log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
+      return std::pair<bool, const_iterator>(true, const_iterator(this, l, const_iterator::ArticleIterator));
+    }
+
+    log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
+    return std::pair<bool, const_iterator>(false, const_iterator(this, u, const_iterator::ArticleIterator));
  }
+
+  File::const_iterator File::find(char ns, const std::string& url)
+  { return findx(ns, url).second; }
+
+  File::const_iterator File::findByTitle(char ns, const std::string& title)
+  { return findxByTitle(ns, title).second; }
+
 }
--- a/src/zimlib/src/fileheader.cpp
+++ b/src/zimlib/src/fileheader.cpp
@ -27,34 +27,36 @@ log_define("zim.file.header")
 namespace zim
 {
  const size_type Fileheader::zimMagic = 0x044d495a; // ="ZIM^d"
-  const size_type Fileheader::zimVersion = 4;
-  const size_type Fileheader::size = 56;
+  const size_type Fileheader::zimVersion = 5;
+  const size_type Fileheader::size = 72;

  std::ostream& operator<< (std::ostream& out, const Fileheader& fh)
  {
-    char header[56];
+    char header[Fileheader::size];
    toLittleEndian(Fileheader::zimMagic, header);
    toLittleEndian(Fileheader::zimVersion, header + 4);
    std::copy(fh.getUuid().data, fh.getUuid().data + sizeof(Uuid), header + 8);
    toLittleEndian(fh.getArticleCount(), header + 24);
-    toLittleEndian(fh.getIndexPtrPos(), header + 28);
-    toLittleEndian(fh.getClusterCount(), header + 36);
-    toLittleEndian(fh.getClusterPtrPos(), header + 40);
-    toLittleEndian(fh.getMainPage(), header + 48);
-    toLittleEndian(fh.getLayoutPage(), header + 52);
+    toLittleEndian(fh.getClusterCount(), header + 28);
+    toLittleEndian(fh.getUrlPtrPos(), header + 32);
+    toLittleEndian(fh.getTitleIdxPos(), header + 40);
+    toLittleEndian(fh.getClusterPtrPos(), header + 48);
+    toLittleEndian(fh.getMimeListPos(), header + 56);
+    toLittleEndian(fh.getMainPage(), header + 64);
+    toLittleEndian(fh.getLayoutPage(), header + 68);

-    out.write(header, 56);
+    out.write(header, Fileheader::size);

    return out;
  }

  std::istream& operator>> (std::istream& in, Fileheader& fh)
  {
-    char header[56];
-    in.read(header, 56);
+    char header[Fileheader::size];
+    in.read(header, Fileheader::size);
    if (in.fail())
      return in;
-    if (in.gcount() != 56)
+    if (static_cast<size_type>(in.gcount()) != Fileheader::size)
    {
      in.setstate(std::ios::failbit);
      return in;
@ -69,8 +71,8 @@ namespace zim
      return in;
    }

-    size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header + 4));
-    if (version != Fileheader::zimVersion)
+    uint16_t version = fromLittleEndian(reinterpret_cast<const uint16_t*>(header + 4));
+    if (version != static_cast<size_type>(Fileheader::zimVersion))
    {
      log_error("invalid zimfile version " << version << " found - "
          << Fileheader::zimVersion << " expected");
@ -81,17 +83,21 @@ namespace zim
    Uuid uuid;
    std::copy(header + 8, header + 24, uuid.data);
    size_type articleCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 24));
-    offset_type indexPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 28));
-    size_type blobCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 36));
-    offset_type blobPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
-    size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 48));
-    size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 52));
+    size_type clusterCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 28));
+    offset_type urlPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 32));
+    offset_type titleIdxPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
+    offset_type clusterPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 48));
+    offset_type mimeListPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 56));
+    size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 64));
+    size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 68));

    fh.setUuid(uuid);
    fh.setArticleCount(articleCount);
-    fh.setIndexPtrPos(indexPtrPos);
-    fh.setClusterCount(blobCount);
-    fh.setClusterPtrPos(blobPtrPos);
+    fh.setClusterCount(clusterCount);
+    fh.setUrlPtrPos(urlPtrPos);
+    fh.setTitleIdxPos(titleIdxPos);
+    fh.setClusterPtrPos(clusterPtrPos);
+    fh.setMimeListPos(mimeListPos);
    fh.setMainPage(mainPage);
    fh.setLayoutPage(layoutPage);

--- a/src/zimlib/src/fileimpl.cpp
+++ b/src/zimlib/src/fileimpl.cpp
@ -24,11 +24,11 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include <stdlib.h>
 #include <sstream>
 #include <errno.h>
 #include "config.h"
 #include "log.h"
+#include "envvalue.h"

 #ifdef WITH_CXXTOOLS
 #  include <cxxtools/systemerror.h>
@ -38,20 +38,6 @@ log_define("zim.file.impl")

 namespace zim
 {
-  namespace
-  {
-    unsigned envValue(const char* env, unsigned def)
-    {
-      const char* v = ::getenv(env);
-      if (v)
-      {
-        std::istringstream s(v);
-        s >> def;
-      }
-      return def;
-    }
-  }
-
  //////////////////////////////////////////////////////////////////////
  // FileImpl
  //
@ -60,6 +46,8 @@ namespace zim
      direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
      clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE))
  {
+    log_trace("read file \"" << fname << '"');
+
    if (!zimFile)
      throw ZimFileFormatError(std::string("can't open zim-file \"") + fname + '"');

@ -89,55 +77,41 @@ namespace zim
    if (zimFile.fail())
      throw ZimFileFormatError("error reading zim-file header");

-    // read index offsets
-    {
-      size_type indexOffsetsSize = header.getArticleCount() * sizeof(OffsetsType::value_type);
-      log_debug("read " << indexOffsetsSize << " bytes indexptr");
-      zimFile.seekg(header.getIndexPtrPos());
-      indexOffsets.resize(header.getArticleCount());
-      zimFile.read(reinterpret_cast<char*>(&indexOffsets[0]), indexOffsetsSize);
-    }
-
-    if (isBigEndian())
-    {
-      for (OffsetsType::iterator it = indexOffsets.begin(); it != indexOffsets.end(); ++it)
-        *it = fromLittleEndian(&*it);
-    }
-
-    // read cluster offsets
-    {
-      size_type clusterOffsetsSize = header.getClusterCount() * sizeof(OffsetsType::value_type);
-      log_debug("read " << clusterOffsetsSize << " bytes clusterptr");
-      zimFile.seekg(header.getClusterPtrPos());
-      clusterOffsets.resize(header.getClusterCount());
-      zimFile.read(reinterpret_cast<char*>(&clusterOffsets[0]), clusterOffsetsSize);
-    }
-
-    if (isBigEndian())
-    {
-      for (OffsetsType::iterator it = clusterOffsets.begin(); it != clusterOffsets.end(); ++it)
-        *it = fromLittleEndian(&*it);
-    }
-
-    if (clusterOffsets.empty())
+    if (getCountClusters() == 0)
      log_warn("no clusters found");
    else
    {
-      offset_type lastOffset = clusterOffsets.back();
+      offset_type lastOffset = getClusterOffset(getCountClusters() - 1);
      log_debug("last offset=" << lastOffset << " file size=" << st.st_size);
-      if (lastOffset > st.st_size)
+      if (lastOffset > static_cast<offset_type>(st.st_size))
      {
        log_fatal("last offset (" << lastOffset << ") larger than file size (" << st.st_size << ')');
        throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
      }
    }
+
+    // read mime types
+    zimFile.seekg(header.getMimeListPos());
+    std::string mimeType;
+    while (true)
+    {
+      std::getline(zimFile, mimeType, '\0');
+
+      if (zimFile.fail())
+        throw ZimFileFormatError("error reading mime type list");
+
+      if (mimeType.empty())
+        break;
+
+      mimeTypes.push_back(mimeType);;
+    }
  }

  Dirent FileImpl::getDirent(size_type idx)
  {
    log_trace("FileImpl::getDirent(" << idx << ')');

-    if (idx >= indexOffsets.size())
+    if (idx >= getCountArticles())
      throw ZimFileFormatError("article index out of range");

    if (!zimFile)
@ -155,7 +129,9 @@ namespace zim

    log_debug("dirent " << idx << " not found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());

-    zimFile.seekg(indexOffsets[idx]);
+    offset_type indexOffset = getOffset(header.getUrlPtrPos(), idx);
+
+    zimFile.seekg(indexOffset);
    if (!zimFile)
    {
      log_warn("failed to seek to directory entry");
@ -171,18 +147,43 @@ namespace zim
      throw ZimFileFormatError("failed to read directory entry");
    }

-    log_debug("dirent read from " << indexOffsets[idx]);
+    log_debug("dirent read from " << indexOffset);
    direntCache.put(idx, dirent);

    return dirent;
  }

+  Dirent FileImpl::getDirentByTitle(size_type idx)
+  {
+    if (idx >= getCountArticles())
+      throw ZimFileFormatError("article index out of range");
+    return getDirent(getIndexByTitle(idx));
+  }
+
+  size_type FileImpl::getIndexByTitle(size_type idx)
+  {
+    if (idx >= getCountArticles())
+      throw ZimFileFormatError("article index out of range");
+
+    zimFile.seekg(header.getTitleIdxPos() + sizeof(size_type) * idx);
+    size_type ret;
+    zimFile.read(reinterpret_cast<char*>(&ret), sizeof(size_type));
+
+    if (!zimFile)
+      throw ZimFileFormatError("error reading title index");
+
+    if (isBigEndian())
+      ret = fromLittleEndian(&ret);
+
+    return ret;
+  }
+
  Cluster FileImpl::getCluster(size_type idx)
  {
    log_trace("getCluster(" << idx << ')');

-    if (idx >= clusterOffsets.size())
-      throw ZimFileFormatError("article index out of range");
+    if (idx >= getCountClusters())
+      throw ZimFileFormatError("cluster index out of range");

    Cluster cluster = clusterCache.get(idx);
    if (cluster)
@ -191,8 +192,9 @@ namespace zim
      return cluster;
    }

-    log_debug("read cluster " << idx << " from offset " << clusterOffsets[idx]);
-    zimFile.seekg(clusterOffsets[idx]);
+    offset_type clusterOffset = getClusterOffset(idx);
+    log_debug("read cluster " << idx << " from offset " << clusterOffset);
+    zimFile.seekg(clusterOffset);
    zimFile >> cluster;

    if (zimFile.fail())
@ -209,6 +211,21 @@ namespace zim
    return cluster;
  }

+  offset_type FileImpl::getOffset(offset_type ptrOffset, size_type idx)
+  {
+    zimFile.seekg(ptrOffset + sizeof(offset_type) * idx);
+    offset_type offset;
+    zimFile.read(reinterpret_cast<char*>(&offset), sizeof(offset_type));
+
+    if (!zimFile)
+      throw ZimFileFormatError("error reading offset");
+
+    if (isBigEndian())
+      offset = fromLittleEndian(&offset);
+
+    return offset;
+  }
+
  size_type FileImpl::getNamespaceBeginOffset(char ch)
  {
    log_trace("getNamespaceBeginOffset(" << ch << ')');
@ -282,4 +299,16 @@ namespace zim
    return namespaces;
  }

+  const std::string& FileImpl::getMimeType(uint16_t idx) const
+  {
+    if (idx > mimeTypes.size())
+    {
+      std::ostringstream msg;
+      msg << "unknown mime type code " << idx;
+      throw std::runtime_error(msg.str());
+    }
+
+    return mimeTypes[idx];
+  }
+
 }
--- a/src/zimlib/src/indexarticle.cpp
+++ b/src/zimlib/src/indexarticle.cpp
@ -48,7 +48,7 @@ namespace zim
  void IndexArticle::readEntriesZ()
  {
    std::istringstream s(getParameter());
-    zim::IZIntStream extra(s);
+    zim::ZIntStream extra(s);

    unsigned flagfield;  // field with one bit (bits 0-3) for each cateogry
    extra.get(flagfield);
@ -84,7 +84,7 @@ namespace zim
        log_debug("read data from offset " << offset << " len " << len);
        zim::Blob b = getData();
        ptrstream data(const_cast<char*>(b.data() + offset), const_cast<char*>(b.data() + offset + len));
-        IZIntStream zdata(data);
+        ZIntStream zdata(data);

        unsigned index;
        unsigned indexOffset = 0;
--- a/src/zimlib/src/inflatestream.cpp
+++ b/src/zimlib/src/inflatestream.cpp
@ -67,8 +67,6 @@ namespace zim

  InflateStreamBuf::int_type InflateStreamBuf::overflow(int_type c)
  {
-    log_debug("InflateStreamBuf::overflow");
-
    if (pptr())
    {
      // initialize input-stream for
@ -82,10 +80,8 @@ namespace zim
        stream.next_out = (Bytef*)ibuffer();
        stream.avail_out = ibuffer_size();

-        log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
        ret = ::inflate(&stream, Z_SYNC_FLUSH);
        checkError(ret, stream);
-        log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);

        // copy zbuffer to sinksource
        std::streamsize count = ibuffer_size() - stream.avail_out;
@ -119,14 +115,12 @@ namespace zim
        {
          // there is data already available
          // read compressed data from source into ibuffer
-          log_debug("in_avail=" << sinksource->in_avail());
          stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
        }
        else
        {
          // no data available
          stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
-          log_debug(stream.avail_in << " bytes read from source");
          if (stream.avail_in == 0)
            return traits_type::eof();
        }
@ -138,9 +132,7 @@ namespace zim

      // at least one character received from source - pass to decompressor

-      log_debug("pre:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in);
      int ret = ::inflate(&stream, Z_SYNC_FLUSH);
-      log_debug("post:avail_out=" << stream.avail_out << " avail_in=" << stream.avail_in << " ret=" << ret);

      checkError(ret, stream);

--- a/src/zimlib/src/lzmastream.cpp
+++ b/src/zimlib/src/lzmastream.cpp
@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/lzmastream.h>
+#include "log.h"
+#include <cstring>
+#include <sstream>
+
+log_define("zim.lzma.compress")
+
+namespace zim
+{
+  namespace
+  {
+    lzma_ret checkError(lzma_ret ret)
+    {
+      if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+      {
+        std::ostringstream msg;
+        msg << "lzma-error " << ret;
+        switch (ret)
+        {
+            case LZMA_OK: msg << ": LZMA_OK"; break;
+            case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
+            case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
+            case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
+            case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
+            case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
+            case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
+            case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
+            case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
+            case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
+            case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
+            case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
+        }
+        log_error(msg.str());
+        throw LzmaError(ret, msg.str());
+      }
+      return ret;
+    }
+  }
+
+  LzmaStreamBuf::LzmaStreamBuf(std::streambuf* sink_, uint32_t preset, lzma_check check, unsigned bufsize_)
+    : obuffer(bufsize_),
+      sink(sink_)
+  {
+    std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
+
+    checkError(
+      ::lzma_easy_encoder(&stream, preset, check));
+
+    setp(&obuffer[0], &obuffer[0] + obuffer.size());
+  }
+
+  LzmaStreamBuf::~LzmaStreamBuf()
+  {
+    ::lzma_end(&stream);
+  }
+
+  LzmaStreamBuf::int_type LzmaStreamBuf::overflow(int_type c)
+  {
+    // initialize input-stream
+    stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
+    stream.avail_in = pptr() - &obuffer[0];
+
+    // initialize zbuffer for compressed data
+    char zbuffer[8192];
+    stream.next_out = reinterpret_cast<uint8_t*>(zbuffer);
+    stream.avail_out = sizeof(zbuffer);
+
+    // compress
+    checkError(::lzma_code(&stream, LZMA_RUN));
+
+    // copy zbuffer to sink / consume deflated data
+    std::streamsize count = sizeof(zbuffer) - stream.avail_out;
+    if (count > 0)
+    {
+      std::streamsize n = sink->sputn(zbuffer, count);
+      if (n < count)
+        return traits_type::eof();
+    }
+
+    // move remaining characters to start of obuffer
+    if (stream.avail_in > 0)
+      memmove(&obuffer[0], stream.next_in, stream.avail_in);
+
+    // reset outbuffer
+    setp(&obuffer[0] + stream.avail_in, &obuffer[0] + obuffer.size());
+    if (c != traits_type::eof())
+      sputc(traits_type::to_char_type(c));
+
+    return 0;
+  }
+
+  LzmaStreamBuf::int_type LzmaStreamBuf::underflow()
+  {
+    return traits_type::eof();
+  }
+
+  int LzmaStreamBuf::sync()
+  {
+    // initialize input-stream for
+    stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
+    stream.avail_in = pptr() - pbase();
+    char zbuffer[8192];
+    while (stream.avail_in > 0)
+    {
+      // initialize zbuffer
+      stream.next_out = (uint8_t*)zbuffer;
+      stream.avail_out = sizeof(zbuffer);
+
+      checkError(::lzma_code(&stream, LZMA_FINISH));
+
+      // copy zbuffer to sink
+      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
+      if (count > 0)
+      {
+        std::streamsize n = sink->sputn(zbuffer, count);
+        if (n < count)
+          return -1;
+      }
+    };
+
+    // reset outbuffer
+    setp(&obuffer[0], &obuffer[0] + obuffer.size());
+    return 0;
+  }
+
+  int LzmaStreamBuf::end()
+  {
+    char zbuffer[8192];
+    // initialize input-stream for
+    stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
+    stream.avail_in = pptr() - pbase();
+    lzma_ret ret;
+    do
+    {
+      // initialize zbuffer
+      stream.next_out = (uint8_t*)zbuffer;
+      stream.avail_out = sizeof(zbuffer);
+
+      ret = checkError(::lzma_code(&stream, LZMA_FINISH));
+
+      // copy zbuffer to sink
+      std::streamsize count = sizeof(zbuffer) - stream.avail_out;
+      if (count > 0)
+      {
+        std::streamsize n = sink->sputn(zbuffer, count);
+        if (n < count)
+          return -1;
+      }
+    } while (ret != LZMA_STREAM_END);
+
+    // reset outbuffer
+    setp(&obuffer[0], &obuffer[0] + obuffer.size());
+    return 0;
+  }
+
+  void LzmaStream::end()
+  {
+    if (streambuf.end() != 0)
+      setstate(failbit);
+  }
+
+}
--- a/src/zimlib/src/search.cpp
+++ b/src/zimlib/src/search.cpp
@ -39,8 +39,8 @@ namespace zim
        bool operator() (const SearchResult& s1, const SearchResult& s2) const
        {
          return s1.getPriority() > s2.getPriority()
-              || s1.getPriority() == s2.getPriority()
-               && s1.getArticle().getTitle() > s2.getArticle().getTitle();
+              || (s1.getPriority() == s2.getPriority()
+               && s1.getArticle().getTitle() > s2.getArticle().getTitle());
        }
    };
  }
@ -68,7 +68,7 @@ namespace zim
                        + Search::getWeightOccOff()
                        + Search::getWeightPlus() * itw->second.addweight;

-        std::string title = article.getTitle().toUtf8();
+        std::string title = article.getTitle();
        for (std::string::iterator it = title.begin(); it != title.end(); ++it)
          *it = std::tolower(*it);

@ -165,8 +165,7 @@ namespace zim

      log_debug("search for token \"" << token << '"');

-      QUnicodeString qtoken = QUnicodeString::fromUtf8(token);
-      IndexArticle indexarticle = indexfile.getArticle('X', qtoken, true);
+      IndexArticle indexarticle = indexfile.getArticleByTitle('X', token);

      if (indexarticle.getTotalCount() > 0)
      {
@ -190,7 +189,7 @@ namespace zim
      {
        log_debug("no entries found - try searching for titles");
        Results results;
-        find(results, 'A', qtoken);
+        find(results, 'A', token);
        for (Results::const_iterator it = results.begin(); it != results.end(); ++it)
        {
          uint32_t articleIdx = it->getArticle().getIndex();
@ -224,13 +223,13 @@ namespace zim
    std::sort(results.begin(), results.end(), PriorityGt());
  }

-  void Search::find(Results& results, char ns, const QUnicodeString& praefix, unsigned limit)
+  void Search::find(Results& results, char ns, const std::string& praefix, unsigned limit)
  {
    log_debug("find results in namespace " << ns << " for praefix \"" << praefix << '"');
-    for (File::const_iterator pos = articlefile.find(ns, praefix, true);
+    for (File::const_iterator pos = articlefile.findByTitle(ns, praefix);
         pos != articlefile.end() && results.size() < limit; ++pos)
    {
-      if (ns != pos->getNamespace() || pos->getTitle().compareCollate(0, praefix.size(), praefix) > 0)
+      if (ns != pos->getNamespace() || pos->getTitle().compare(0, praefix.size(), praefix) > 0)
      {
        log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match " << ns << ", \"" << praefix << '"');
        break;
@ -240,17 +239,17 @@ namespace zim
    log_debug(results.size() << " articles in result");
  }

-  void Search::find(Results& results, char ns, const QUnicodeString& begin,
-    const QUnicodeString& end, unsigned limit)
+  void Search::find(Results& results, char ns, const std::string& begin,
+    const std::string& end, unsigned limit)
  {
    log_debug("find results in namespace " << ns << " for praefix \"" << begin << '"');
-    for (File::const_iterator pos = articlefile.find(ns, begin, true);
+    for (File::const_iterator pos = articlefile.findByTitle(ns, begin);
         pos != articlefile.end() && results.size() < limit; ++pos)
    {
      log_debug("check " << pos->getNamespace() << '/' << pos->getTitle());
-      if (pos->getNamespace() != ns || pos->getTitle().compareCollate(0, end.size(), end) > 0)
+      if (pos->getNamespace() != ns || pos->getTitle().compare(end) > 0)
      {
-        log_debug("article \"" << pos->getUrl() << "\" does not match");
+        log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match");
        break;
      }
      results.push_back(SearchResult(*pos));
--- a/src/zimlib/src/unlzmastream.cpp
+++ b/src/zimlib/src/unlzmastream.cpp
@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+
+#include "zim/unlzmastream.h"
+#include "log.h"
+#include "config.h"
+#include <sstream>
+#include <cstring>
+#include "envvalue.h"
+
+log_define("zim.lzma.uncompress")
+
+namespace zim
+{
+  namespace
+  {
+    lzma_ret checkError(lzma_ret ret)
+    {
+      if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+      {
+        std::ostringstream msg;
+        msg << "inflate-error " << ret;
+        switch (ret)
+        {
+            case LZMA_OK: msg << ": LZMA_OK"; break;
+            case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
+            case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
+            case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
+            case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
+            case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
+            case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
+            case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
+            case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
+            case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
+            case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
+            case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
+        }
+        log_error(msg);
+        throw UnlzmaError(ret, msg.str());
+      }
+      return ret;
+    }
+
+  }
+
+  UnlzmaStreamBuf::UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize_)
+    : iobuffer(new char_type[bufsize_]),
+      bufsize(bufsize_),
+      sinksource(sinksource_)
+  {
+    std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
+
+    unsigned memsize = envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
+    checkError(
+      ::lzma_stream_decoder(&stream, memsize, 0));
+  }
+
+  UnlzmaStreamBuf::~UnlzmaStreamBuf()
+  {
+    ::lzma_end(&stream);
+    delete[] iobuffer;
+  }
+
+  UnlzmaStreamBuf::int_type UnlzmaStreamBuf::overflow(int_type c)
+  {
+    if (pptr())
+    {
+      // initialize input-stream for
+      stream.next_in = reinterpret_cast<const uint8_t*>(obuffer());
+      stream.avail_in = pptr() - pbase();
+
+      lzma_ret ret;
+      do
+      {
+        // initialize ibuffer
+        stream.next_out = reinterpret_cast<uint8_t*>(ibuffer());
+        stream.avail_out = ibuffer_size();
+
+        ret = ::lzma_code(&stream, LZMA_RUN);
+        checkError(ret);
+
+        // copy zbuffer to sinksource
+        std::streamsize count = ibuffer_size() - stream.avail_out;
+        std::streamsize n = sinksource->sputn(reinterpret_cast<char*>(ibuffer()), count);
+        if (n < count)
+          return traits_type::eof();
+      } while (ret != LZMA_STREAM_END && stream.avail_in > 0);
+    }
+
+    // reset outbuffer
+    setp(obuffer(), obuffer() + obuffer_size());
+    if (c != traits_type::eof())
+      sputc(traits_type::to_char_type(c));
+
+    return 0;
+  }
+
+  UnlzmaStreamBuf::int_type UnlzmaStreamBuf::underflow()
+  {
+    // read from sinksource and decompress into obuffer
+
+    stream.next_out = reinterpret_cast<uint8_t*>(obuffer());
+    stream.avail_out = obuffer_size();
+
+    do
+    {
+      // fill ibuffer first if needed
+      if (stream.avail_in == 0)
+      {
+        if (sinksource->in_avail() > 0)
+        {
+          // there is data already available
+          // read compressed data from source into ibuffer
+          stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
+        }
+        else
+        {
+          // no data available
+          stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
+          if (stream.avail_in == 0)
+            return traits_type::eof();
+        }
+
+        stream.next_in = (const uint8_t*)ibuffer();
+      }
+
+      // we decompress it now into obuffer
+
+      // at least one character received from source - pass to decompressor
+
+      checkError(::lzma_code(&stream, LZMA_RUN));
+
+      setg(obuffer(), obuffer(), obuffer() + obuffer_size() - stream.avail_out);
+
+    } while (gptr() == egptr());
+
+    return sgetc();
+  }
+
+  int UnlzmaStreamBuf::sync()
+  {
+    if (pptr() && overflow(traits_type::eof()) == traits_type::eof())
+      return -1;
+    return 0;
+  }
+}
--- a/src/zimlib/src/zintstream.cpp
+++ b/src/zimlib/src/zintstream.cpp
@ -18,86 +18,85 @@
 */

 #include <zim/zintstream.h>
+#include <stdint.h>
 #include "log.h"

 log_define("zim.zintstream")

 namespace zim
 {
-  IZIntStream& IZIntStream::get(unsigned &value)
+  size_type ZIntStream::get()
  {
    char ch;
-    if (!stream.get(ch))
+    if (!_istream->get(ch))
      return *this;

-    unsigned ret = static_cast<unsigned>(static_cast<unsigned char>(ch));
-    unsigned numb = ret & 0x3;
-    ret >>= 2;
-    unsigned s = 6;
-    while (numb && stream.get(ch))
+    if (ch == '\xff')
    {
-      ret += static_cast<unsigned>(
-               static_cast<unsigned char>(ch)) + 1 << s;
-      s += 8;
-      --numb;
+      log_error("invalid bytestream in int decompressor");
+      _istream->setstate(std::ios::failbit);
+    }
+      
+    size_type uuvalue = static_cast<size_type>(static_cast<unsigned char>(ch));
+    uint64_t ubound = 0x80;
+    size_type add = 0;
+    unsigned short s = 7;
+    unsigned short N = 0;
+    size_type mask = 0x7F;
+    while (ch & 0x80)
+    {
+      ++N;
+      ch <<= 1;
+      --s;
+      add += ubound;
+      ubound <<= 7;
+      mask >>= 1;
    }

-    if (numb)
+    uuvalue &= mask;
+
+    while (N-- && _istream->get(ch))
    {
-      log_error("incomplete bytestream");
-      stream.setstate(std::ios::failbit);
+      uuvalue |= static_cast<size_type>(static_cast<unsigned char>(ch)) << s;
+      s += 8;
+    }
+
+    if (_istream)
+    {
+      uuvalue += add;
    }
    else
-      value = ret;
+    {
+      log_error("incomplete bytestream in int decompressor");
+      _istream->setstate(std::ios::failbit);
+    }

-    return *this;
+    return uuvalue;
  }

-  OZIntStream& OZIntStream::put(size_type value)
+  ZIntStream& ZIntStream::put(size_type value)
  {
-    char data[4];
-    unsigned count;
-    if (value < 64)
+    size_type nmask = 0;
+    size_type mask = 0x7F;
+    uint64_t ubound = 0x80;
+    unsigned short N = 0;
+
+    while (value >= ubound)
    {
-      count = 1;
-      data[0] = (value << 2);
-      log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0])));
-    }
-    else if (value < 16384 + 64)
-    {
-      value -= 64;
-      count = 2;
-      data[0] = value << 2 | 1;
-      data[1] = value >> 6;
-      log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1])));
-    }
-    else if (value < 4194304 + 16384 + 64)
-    {
-      value -= 16384 + 64;
-      count = 3;
-      data[0] = value << 2 | 2;
-      data[1] = value >> 6;
-      data[2] = value >> 14;
-      log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[2])));
-    }
-    else
-    {
-      value -= 4194304 + 16384 + 64;
-      count = 4;
-      data[0] = value << 2 | 3;
-      data[1] = value >> 6;
-      data[2] = value >> 14;
-      data[3] = value >> 22;
-      log_debug(value << " => " << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[0]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[1]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[2]))
-                                << std::hex << static_cast<unsigned>(static_cast<unsigned char>(data[4])));
+      value -= ubound;
+      ubound <<= 7;
+      nmask = (nmask >> 1) | 0x80;
+      mask = mask >> 1;
+      ++N;
    }

-    stream.write(reinterpret_cast<char*>(&data[0]), count);
+    _ostream->put(static_cast<char>(nmask | (value & mask)));
+    value >>= 7 - N;
+    while (N--)
+    {
+      _ostream->put(static_cast<char>(value & 0xFF));
+      value >>= 8;
+    }

    return *this;
  }