mirror of
https://github.com/kiwix/kiwix-android.git
synced 2025-08-03 10:46:53 -04:00
Finished glassify (and compiled for x86_64)
This commit is contained in:
parent
5ec55a299c
commit
f69d7064dd
@ -510,20 +510,6 @@ for arch in ARCHS:
|
||||
failed_on_step('The libxapian.a archive file has not been created '
|
||||
'and is not present.')
|
||||
|
||||
# recompile xapian for build system arch to compile glassify
|
||||
if COMPILE_GLASSIFY:
|
||||
change_env(ORIGINAL_ENVIRON)
|
||||
os.chdir(os.path.join(curdir, '..', 'src', 'dependencies', 'xapian-core-1.3.4'))
|
||||
syscall('make clean')
|
||||
syscall('./configure')
|
||||
syscall('make')
|
||||
os.chdir(curdir)
|
||||
syscall('g++ glassify.cc -o glassify.o -I../src/dependencies/xapian-core-1.3.4/include')
|
||||
syscall('ld glassify.o ../src/dependencies/xapian-core-1.3.4/.libs/libxapian-1.3.a -o glassify')
|
||||
|
||||
change_env(new_environ)
|
||||
change_env(OPTIMIZATION_ENV)
|
||||
|
||||
# create libzim.a
|
||||
os.chdir(curdir)
|
||||
platform_includes = ['%(platform)s/include/c++/%(gccver)s/'
|
||||
@ -670,10 +656,22 @@ for arch in ARCHS:
|
||||
'arch_full': arch_full,
|
||||
'arch_short': arch_short,
|
||||
'curdir': curdir})
|
||||
if COMPILE_GLASSIFY:
|
||||
os.chdir(curdir)
|
||||
syscall('g++ glassify.cc ../src/dependencies/xapian-core-1.3.4/.libs/libxapian-1.3.a -o glassify_%s -lz -luuid -lrt -I../src/dependencies/xapian-core-1.3.4/include' % arch_short)
|
||||
|
||||
os.chdir(curdir)
|
||||
change_env(ORIGINAL_ENVIRON)
|
||||
|
||||
# recompile xapian for build system arch to compile glassify
|
||||
if COMPILE_GLASSIFY:
|
||||
os.chdir(os.path.join(curdir, '..', 'src', 'dependencies', 'xapian-core-1.3.4'))
|
||||
syscall('make clean')
|
||||
syscall('./configure')
|
||||
syscall('make')
|
||||
os.chdir(curdir)
|
||||
syscall('g++ glassify.cc ../src/dependencies/xapian-core-1.3.4/.libs/libxapian-1.3.a -o glassify -lz -luuid -lrt -I../src/dependencies/xapian-core-1.3.4/include')
|
||||
|
||||
if LOCALES_TXT:
|
||||
|
||||
os.chdir(curdir)
|
||||
|
181
glassify.cc
181
glassify.cc
@ -1,27 +1,188 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <xapian.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include <cmath> // For log10().
|
||||
#include <cstdlib> // For exit().
|
||||
#include <cstring> // For strcmp() and strrchr().
|
||||
#include <string>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <ftw.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void compact(const char* in, const char* out) {
|
||||
#define PROG_NAME "glassify"
|
||||
#define PROG_DESC "Perform a document-by-document copy of one or more Xapian databases and make it a single file"
|
||||
|
||||
static void
|
||||
show_usage(int rc)
|
||||
{
|
||||
cout << "Usage: " PROG_NAME " SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
|
||||
"Options:\n"
|
||||
" --no-renumber Preserve the numbering of document ids (useful if you have\n"
|
||||
" external references to them, or have set them to match\n"
|
||||
" unique ids from an external source). If multiple source\n"
|
||||
" databases are specified and the same docid occurs in more\n"
|
||||
" one, the last occurrence will be the one which ends up in\n"
|
||||
" the destination database.\n"
|
||||
" --help display this help and exit\n"
|
||||
" --version output version information and exit" << endl;
|
||||
exit(rc);
|
||||
}
|
||||
|
||||
void compact(const char* in, const char* out) try {
|
||||
Xapian::Database indb(in);
|
||||
int fd = open(out, O_CREAT|O_RDWR, 0666);
|
||||
if (fd != -1) {
|
||||
indb.compact(fd);
|
||||
close(fd);
|
||||
cout << "Done!" << endl;
|
||||
return;
|
||||
}
|
||||
cout << "Some error happened..." << endl;
|
||||
} catch (const Xapian::Error &e) {
|
||||
cout << e.get_description().c_str() << endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc != 3) {
|
||||
cout << "Wrong number of arguments!" << endl << "\t" << argv[0] << " [input folder] [output glassdb]" << endl;
|
||||
}
|
||||
compact(argv[1], argv[2]);
|
||||
return 0;
|
||||
int unlinker(const char *fpth, const struct stat *sb, int t, struct FTW *fb) {
|
||||
int rv = remove(fpth);
|
||||
if (rv)
|
||||
perror(fpth);
|
||||
return rv;
|
||||
}
|
||||
|
||||
int cleaner(const char *path) {
|
||||
return nftw(path, unlinker, 64, FTW_DEPTH | FTW_PHYS);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
try {
|
||||
bool renumber = true;
|
||||
if (argc > 1 && argv[1][0] == '-') {
|
||||
if (strcmp(argv[1], "--help") == 0) {
|
||||
cout << PROG_NAME " - " PROG_DESC "\n\n";
|
||||
show_usage(0);
|
||||
}
|
||||
if (strcmp(argv[1], "--version") == 0) {
|
||||
cout << PROG_NAME << endl;
|
||||
exit(0);
|
||||
}
|
||||
if (strcmp(argv[1], "--no-renumber") == 0) {
|
||||
renumber = false;
|
||||
argv[1] = argv[0];
|
||||
++argv;
|
||||
--argc;
|
||||
}
|
||||
}
|
||||
|
||||
// We expect two or more arguments: at least one source database path
|
||||
// followed by the destination database path.
|
||||
if (argc < 3) show_usage(1);
|
||||
|
||||
// Create the destination database, using DB_CREATE so that we don't
|
||||
// try to overwrite or update an existing database in case the user
|
||||
// got the command line argument order wrong.
|
||||
string dest_str = string(argv[argc - 1]);
|
||||
dest_str += ".tmp";
|
||||
const char *dest = dest_str.c_str();
|
||||
Xapian::WritableDatabase db_out(dest, Xapian::DB_CREATE|Xapian::DB_BACKEND_GLASS);
|
||||
|
||||
for (int i = 1; i < argc - 1; ++i) {
|
||||
char * src = argv[i];
|
||||
if (*src) {
|
||||
// Remove any trailing directory separator.
|
||||
char & ch = src[strlen(src) - 1];
|
||||
if (ch == '/' || ch == '\\') ch = '\0';
|
||||
}
|
||||
|
||||
// Open the source database.
|
||||
Xapian::Database db_in(src);
|
||||
|
||||
// Find the leaf-name of the database path for reporting progress.
|
||||
const char * leaf = strrchr(src, '/');
|
||||
#if defined __WIN32__ || defined __OS2__
|
||||
if (!leaf) leaf = strrchr(src, '\\');
|
||||
#endif
|
||||
if (leaf) ++leaf; else leaf = src;
|
||||
|
||||
// Iterate over all the documents in db_in, copying each to db_out.
|
||||
Xapian::doccount dbsize = db_in.get_doccount();
|
||||
if (dbsize == 0) {
|
||||
cout << leaf << ": empty!" << endl;
|
||||
} else {
|
||||
// Calculate how many decimal digits there are in dbsize.
|
||||
int width = static_cast<int>(log10(double(dbsize))) + 1;
|
||||
|
||||
Xapian::doccount c = 0;
|
||||
Xapian::PostingIterator it = db_in.postlist_begin(string());
|
||||
while (it != db_in.postlist_end(string())) {
|
||||
Xapian::docid did = *it;
|
||||
if (renumber) {
|
||||
db_out.add_document(db_in.get_document(did));
|
||||
} else {
|
||||
db_out.replace_document(did, db_in.get_document(did));
|
||||
}
|
||||
|
||||
// Update for the first 10, and then every 13th document
|
||||
// counting back from the end (this means that all the
|
||||
// digits "rotate" and the counter ends up on the exact
|
||||
// total.
|
||||
++c;
|
||||
if (c <= 10 || (dbsize - c) % 13 == 0) {
|
||||
cout << '\r' << leaf << ": ";
|
||||
cout << setw(width) << c << '/' << dbsize << flush;
|
||||
}
|
||||
|
||||
++it;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
cout << "Copying spelling data..." << flush;
|
||||
Xapian::TermIterator spellword = db_in.spellings_begin();
|
||||
while (spellword != db_in.spellings_end()) {
|
||||
db_out.add_spelling(*spellword, spellword.get_termfreq());
|
||||
++spellword;
|
||||
}
|
||||
cout << " done." << endl;
|
||||
|
||||
cout << "Copying synonym data..." << flush;
|
||||
Xapian::TermIterator synkey = db_in.synonym_keys_begin();
|
||||
while (synkey != db_in.synonym_keys_end()) {
|
||||
string key = *synkey;
|
||||
Xapian::TermIterator syn = db_in.synonyms_begin(key);
|
||||
while (syn != db_in.synonyms_end(key)) {
|
||||
db_out.add_synonym(key, *syn);
|
||||
++syn;
|
||||
}
|
||||
++synkey;
|
||||
}
|
||||
cout << " done." << endl;
|
||||
|
||||
cout << "Copying user metadata..." << flush;
|
||||
Xapian::TermIterator metakey = db_in.metadata_keys_begin();
|
||||
while (metakey != db_in.metadata_keys_end()) {
|
||||
string key = *metakey;
|
||||
db_out.set_metadata(key, db_in.get_metadata(key));
|
||||
++metakey;
|
||||
}
|
||||
cout << " done." << endl;
|
||||
}
|
||||
|
||||
cout << "Committing..." << flush;
|
||||
// Commit explicitly so that any error is reported.
|
||||
db_out.commit();
|
||||
cout << " done." << endl;
|
||||
cout << "Turning into single file..." << endl;
|
||||
compact(dest, argv[argc - 1]);
|
||||
cout << "All finished. Cleaning up..." << endl;
|
||||
cleaner(dest);
|
||||
cout << "Done!" << endl;
|
||||
} catch (const Xapian::Error & e) {
|
||||
cerr << '\n' << argv[0] << ": " << e.get_description() << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user