diff --git a/src/zimlib/include/zim/fstream.h b/src/zimlib/include/zim/fstream.h index cb4d514..be9f161 100644 --- a/src/zimlib/include/zim/fstream.h +++ b/src/zimlib/include/zim/fstream.h @@ -23,43 +23,75 @@ #include #include #include +#include +#include +#include namespace zim { class streambuf : public std::streambuf { + struct FileInfo : public RefCounted + { + std::string fname; + zim::offset_type fsize; + + FileInfo() { } + FileInfo(const std::string& fname_, int fd); + }; + + struct OpenfileInfo : public RefCounted + { + std::string fname; + int fd; + + explicit OpenfileInfo(const std::string& fname); + ~OpenfileInfo(); + }; + + typedef SmartPtr FileInfoPtr; + typedef std::vector FilesType; + + typedef SmartPtr OpenfileInfoPtr; + typedef Cache OpenFilesCacheType; + std::vector buffer; - int fd; + + FilesType files; + OpenFilesCacheType openFilesCache; + OpenfileInfoPtr currentFile; + offset_type currentPos; std::streambuf::int_type overflow(std::streambuf::int_type ch); std::streambuf::int_type underflow(); int sync(); + void setCurrentFile(const std::string& fname, off_t off); + public: - typedef zim::offset_type offset_type; + streambuf(const std::string& fname, unsigned bufsize, unsigned openFilesCache); - streambuf(const char* fname, unsigned bufsize); - ~streambuf(); - - void seekg(offset_type off); + void seekg(zim::offset_type off); void setBufsize(unsigned s) { buffer.resize(s); } + zim::offset_type fsize() const; }; - class ifstream : public std::iostream + class ifstream : public std::istream { streambuf myStreambuf; public: - typedef streambuf::offset_type offset_type; + explicit ifstream(const std::string& fname, unsigned bufsize = 8192, unsigned openFilesCache = 5) + : std::istream(0), + myStreambuf(fname, bufsize, openFilesCache) + { + init(&myStreambuf); + } - explicit ifstream(const char* fname, unsigned bufsize = 8192) - : std::iostream(&myStreambuf), - myStreambuf(fname, bufsize) - { } - - void seekg(offset_type off) { myStreambuf.seekg(off); } + void seekg(zim::offset_type off) { myStreambuf.seekg(off); } void setBufsize(unsigned s) { myStreambuf.setBufsize(s); } + zim::offset_type fsize() const { return myStreambuf.fsize(); } }; } diff --git a/src/zimlib/src/config.h.in b/src/zimlib/src/config.h.in index e8608c2..b780474 100644 --- a/src/zimlib/src/config.h.in +++ b/src/zimlib/src/config.h.in @@ -24,6 +24,9 @@ /* Define to 1 if you have the `bz2' library (-lbz2). */ #undef HAVE_LIBBZ2 +/* Define to 1 if you have the `clucene' library (-lclucene). */ +#undef HAVE_LIBCLUCENE + /* Define to 1 if you have the `lzma' library (-llzma). */ #undef HAVE_LIBLZMA diff --git a/src/zimlib/src/fileimpl.cpp b/src/zimlib/src/fileimpl.cpp index 7ca5731..21794c8 100644 --- a/src/zimlib/src/fileimpl.cpp +++ b/src/zimlib/src/fileimpl.cpp @@ -50,28 +50,6 @@ namespace zim if (!zimFile) throw ZimFileFormatError(std::string("can't open zim-file \"") + fname + '"'); -#ifdef HAVE_STAT64 - struct stat64 st; - int ret = ::stat64(fname, &st); -#elif _WIN32 - struct __stat64 st; - int ret = ::_stat64(fname, &st); -#else - struct stat st; - int ret = ::stat(fname, &st); -#endif - if (ret != 0) -#ifdef WITH_CXXTOOLS - throw cxxtools::SystemError("stat"); -#else - { - std::ostringstream msg; - msg << "stat failed with errno " << errno << " : " << strerror(errno); - throw std::runtime_error(msg.str()); - } -#endif - mtime = st.st_mtime; - filename = fname; // read header @@ -84,10 +62,10 @@ namespace zim else { offset_type lastOffset = getClusterOffset(getCountClusters() - 1); - log_debug("last offset=" << lastOffset << " file size=" << st.st_size); - if (lastOffset > static_cast(st.st_size)) + log_debug("last offset=" << lastOffset << " file size=" << zimFile.fsize()); + if (lastOffset > static_cast(zimFile.fsize())) { - log_fatal("last offset (" << lastOffset << ") larger than file size (" << st.st_size << ')'); + log_fatal("last offset (" << lastOffset << ") larger than file size (" << zimFile.fsize() << ')'); throw ZimFileFormatError("last cluster offset larger than file size; file corrupt"); } } diff --git a/src/zimlib/src/fstream.cpp b/src/zimlib/src/fstream.cpp index 56cf186..4889a60 100644 --- a/src/zimlib/src/fstream.cpp +++ b/src/zimlib/src/fstream.cpp @@ -25,19 +25,71 @@ #include #include #include +#include #include #include -#ifdef _WIN32 -#include -int _fmode = _O_BINARY; -#define _LARGEFILE64_SOURCE +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif + +#ifndef O_BINARY +#define O_BINARY 0 #endif log_define("zim.fstream") namespace zim { + class FileNotFound : public std::runtime_error + { + public: + FileNotFound() + : std::runtime_error("file not found") + { } + }; + +//////////////////////////////////////////////////////////// +// OpenfileInfo +// +streambuf::OpenfileInfo::OpenfileInfo(const std::string& fname_) + : fname(fname_), +#ifdef HAVE_OPEN64 + fd(::open64(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY)) +#else + fd(::open(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY)) +#endif +{ + if (fd < 0) + throw FileNotFound(); +} + +streambuf::OpenfileInfo::~OpenfileInfo() +{ + ::close(fd); +} + +//////////////////////////////////////////////////////////// +// FileInfo +// +streambuf::FileInfo::FileInfo(const std::string& fname_, int fd) + : fname(fname_) +{ +#ifdef HAVE_LSEEK64 + off64_t ret = ::lseek64(fd, 0, SEEK_END); +#else + off_t ret = ::lseek(fd, 0, SEEK_END); +#endif + if (ret < 0) + { + std::ostringstream msg; + msg << "error " << errno << " seeking to end in file " << fname << ": " << strerror(errno); + throw std::runtime_error(msg.str()); + } + + fsize = static_cast(ret); +} + std::streambuf::int_type streambuf::overflow(std::streambuf::int_type ch) { return traits_type::eof(); @@ -47,15 +99,34 @@ std::streambuf::int_type streambuf::underflow() { log_debug("underflow; bufsize=" << buffer.size()); - int n = ::read(fd, &buffer[0], buffer.size()); - if (n < 0) + int n; + do { - std::ostringstream msg; - msg << "error " << errno << " reading from file: " << strerror(errno); - throw std::runtime_error(msg.str()); - } - else if (n == 0) - return traits_type::eof(); + n = ::read(currentFile->fd, &buffer[0], buffer.size()); + if (n < 0) + { + std::ostringstream msg; + msg << "error " << errno << " reading from file: " << strerror(errno); + throw std::runtime_error(msg.str()); + } + else if (n == 0) + { + FilesType::iterator it; + for (it = files.begin(); it != files.end(); ++it) + { + if ((*it)->fname == currentFile->fname) + { + ++it; + break; + } + } + + if (it == files.end()) + return traits_type::eof(); + + setCurrentFile((*it)->fname, 0); + } + } while (n == 0); char* p = &buffer[0]; setg(p, p, p + n); @@ -67,46 +138,144 @@ int streambuf::sync() return traits_type::eof(); } -streambuf::streambuf(const char* fname, unsigned bufsize) +namespace +{ + void parseFilelist(const std::string& list, std::vector& out) + { + enum { + state_0, + state_t, + state_e + } state = state_0; + + for (std::string::const_iterator it = list.begin(); it != list.end(); ++it) + { + switch (state) + { + case state_0: + out.push_back(std::string(1, *it)); + state = state_t; + break; + + case state_t: + if (*it == ':') + out.push_back(std::string(1, *it)); + else if (*it == '\\') + state = state_e; + else + out.back() += *it; + break; + + case state_e: + out.back() += *it; + state = state_t; + break; + } + } + } +} + +streambuf::streambuf(const std::string& fname, unsigned bufsize, unsigned noOpenFiles) : buffer(bufsize), - #ifdef HAVE_OPEN64 - fd(::open64(fname, 0)) - #else - fd(::open(fname, 0)) - #endif + openFilesCache(noOpenFiles) { log_debug("streambuf for " << fname << " with " << bufsize << " bytes"); - if (fd < 0) + try { - std::ostringstream msg; - msg << "error " << errno << " opening file \"" << fname << "\": " << strerror(errno); - throw std::runtime_error(msg.str()); + currentFile = new OpenfileInfo(fname); + files.push_back(new FileInfo(fname, currentFile->fd)); + openFilesCache.put(fname, currentFile); + } + catch (const FileNotFound&) + { + int errnoSave = errno; + try + { + for (char ch0 = 'a'; ch0 <= 'z'; ++ch0) + { + std::string fname0 = fname + ch0; + for (char ch1 = 'a'; ch1 <= 'z'; ++ch1) + { + std::string fname1 = fname0 + ch1; + + currentFile = new OpenfileInfo(fname1); + files.push_back(new FileInfo(fname1, currentFile->fd)); + + openFilesCache.put(fname1, currentFile); + } + } + } + catch (const FileNotFound&) + { + if (files.empty()) + { + std::ostringstream msg; + msg << "error " << errnoSave << " opening file \"" << fname << "\": " << strerror(errnoSave); + throw std::runtime_error(msg.str()); + } + } + } + + setCurrentFile((*files.begin())->fname, 0); +} + +void streambuf::setCurrentFile(const std::string& fname, off_t off) +{ + std::pair f = openFilesCache.getx(fname); + if (f.first) + { + currentFile = f.second; + } + else + { + // file not found in cache + currentFile = new OpenfileInfo(fname); + openFilesCache.put(fname, currentFile); + } + + if (f.first || off != 0) // found in cache or seek requested + { +#ifdef HAVE_LSEEK64 + off64_t ret = ::lseek64(currentFile->fd, off, SEEK_SET); +#else + off_t ret = ::lseek(currentFile->fd, off, SEEK_SET); +#endif + if (ret < 0) + { + std::ostringstream msg; + msg << "error " << errno << " seeking to "<< off << " in file " << fname << ": " << strerror(errno); + throw std::runtime_error(msg.str()); + } } } -streambuf::~streambuf() -{ - ::close(fd); -} - -void streambuf::seekg(offset_type off) +void streambuf::seekg(zim::offset_type off) { setg(0, 0, 0); - #ifdef HAVE_LSEEK64 - off64_t ret = ::lseek64(fd, off, SEEK_SET); - #elif _WIN32 - offset_type ret = ::_lseeki64(fd, off, SEEK_SET); - #else - off_t ret = ::lseek(fd, off, SEEK_SET); - #endif + currentPos = off; - if (ret < 0) + zim::offset_type o = off; + FilesType::iterator it; + for (it = files.begin(); it != files.end() && (*it)->fsize < o; ++it) + o -= (*it)->fsize; + + if (it == files.end()) { std::ostringstream msg; - msg << "error " << errno << " seeking to "<< off << " in file: " << strerror(errno); + msg << "error seeking to "<< off; throw std::runtime_error(msg.str()); } + + setCurrentFile((*it)->fname, o); +} + +zim::offset_type streambuf::fsize() const +{ + zim::offset_type o = 0; + for (FilesType::const_iterator it = files.begin(); it != files.end(); ++it) + o += (*it)->fsize; + return o; } }