493 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			493 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-
 | |
|  * Copyright (c) 2008 Joerg Sonnenberger
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Redistribution and use in source and binary forms, with or without
 | |
|  * modification, are permitted provided that the following conditions
 | |
|  * are met:
 | |
|  * 1. Redistributions of source code must retain the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer.
 | |
|  * 2. Redistributions in binary form must reproduce the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer in the
 | |
|  *    documentation and/or other materials provided with the distribution.
 | |
|  *
 | |
|  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 | |
|  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
|  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
|  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
|  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
|  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
|  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| /*-
 | |
|  * Copyright (c) 1985, 1986, 1992, 1993
 | |
|  *	The Regents of the University of California.  All rights reserved.
 | |
|  *
 | |
|  * This code is derived from software contributed to Berkeley by
 | |
|  * Diomidis Spinellis and James A. Woods, derived from original
 | |
|  * work by Spencer Thomas and Joseph Orost.
 | |
|  *
 | |
|  * Redistribution and use in source and binary forms, with or without
 | |
|  * modification, are permitted provided that the following conditions
 | |
|  * are met:
 | |
|  * 1. Redistributions of source code must retain the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer.
 | |
|  * 2. Redistributions in binary form must reproduce the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer in the
 | |
|  *    documentation and/or other materials provided with the distribution.
 | |
|  * 3. Neither the name of the University nor the names of its contributors
 | |
|  *    may be used to endorse or promote products derived from this software
 | |
|  *    without specific prior written permission.
 | |
|  *
 | |
|  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 | |
|  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
|  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 | |
|  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | |
|  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | |
|  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | |
|  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | |
|  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | |
|  * SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| #include "archive_platform.h"
 | |
| 
 | |
| __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $");
 | |
| 
 | |
| #ifdef HAVE_ERRNO_H
 | |
| #include <errno.h>
 | |
| #endif
 | |
| #ifdef HAVE_STDLIB_H
 | |
| #include <stdlib.h>
 | |
| #endif
 | |
| #ifdef HAVE_STRING_H
 | |
| #include <string.h>
 | |
| #endif
 | |
| 
 | |
| #include "archive.h"
 | |
| #include "archive_private.h"
 | |
| #include "archive_write_private.h"
 | |
| 
 | |
| #define	HSIZE		69001	/* 95% occupancy */
 | |
| #define	HSHIFT		8	/* 8 - trunc(log2(HSIZE / 65536)) */
 | |
| #define	CHECK_GAP 10000		/* Ratio check interval. */
 | |
| 
 | |
| #define	MAXCODE(bits)	((1 << (bits)) - 1)
 | |
| 
 | |
| /*
 | |
|  * the next two codes should not be changed lightly, as they must not
 | |
|  * lie within the contiguous general code space.
 | |
|  */
 | |
| #define	FIRST	257		/* First free entry. */
 | |
| #define	CLEAR	256		/* Table clear output code. */
 | |
| 
 | |
| struct private_data {
 | |
| 	off_t in_count, out_count, checkpoint;
 | |
| 
 | |
| 	int code_len;			/* Number of bits/code. */
 | |
| 	int cur_maxcode;		/* Maximum code, given n_bits. */
 | |
| 	int max_maxcode;		/* Should NEVER generate this code. */
 | |
| 	int hashtab [HSIZE];
 | |
| 	unsigned short codetab [HSIZE];
 | |
| 	int first_free;		/* First unused entry. */
 | |
| 	int compress_ratio;
 | |
| 
 | |
| 	int cur_code, cur_fcode;
 | |
| 
 | |
| 	int bit_offset;
 | |
| 	unsigned char bit_buf;
 | |
| 
 | |
| 	unsigned char	*compressed;
 | |
| 	size_t		 compressed_buffer_size;
 | |
| 	size_t		 compressed_offset;
 | |
| };
 | |
| 
 | |
| static int	archive_compressor_compress_finish(struct archive_write *);
 | |
| static int	archive_compressor_compress_init(struct archive_write *);
 | |
| static int	archive_compressor_compress_write(struct archive_write *,
 | |
| 		    const void *, size_t);
 | |
| 
 | |
| /*
 | |
|  * Allocate, initialize and return a archive object.
 | |
|  */
 | |
| int
 | |
| archive_write_set_compression_compress(struct archive *_a)
 | |
| {
 | |
| 	struct archive_write *a = (struct archive_write *)_a;
 | |
| 	__archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
 | |
| 	    ARCHIVE_STATE_NEW, "archive_write_set_compression_compress");
 | |
| 	a->compressor.init = &archive_compressor_compress_init;
 | |
| 	a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
 | |
| 	a->archive.compression_name = "compress";
 | |
| 	return (ARCHIVE_OK);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Setup callback.
 | |
|  */
 | |
| static int
 | |
| archive_compressor_compress_init(struct archive_write *a)
 | |
| {
 | |
| 	int ret;
 | |
| 	struct private_data *state;
 | |
| 
 | |
| 	a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
 | |
| 	a->archive.compression_name = "compress";
 | |
| 
 | |
| 	if (a->bytes_per_block < 4) {
 | |
| 		archive_set_error(&a->archive, EINVAL,
 | |
| 		    "Can't write Compress header as single block");
 | |
| 		return (ARCHIVE_FATAL);
 | |
| 	}
 | |
| 
 | |
| 	if (a->client_opener != NULL) {
 | |
| 		ret = (a->client_opener)(&a->archive, a->client_data);
 | |
| 		if (ret != ARCHIVE_OK)
 | |
| 			return (ret);
 | |
| 	}
 | |
| 
 | |
| 	state = (struct private_data *)malloc(sizeof(*state));
 | |
| 	if (state == NULL) {
 | |
| 		archive_set_error(&a->archive, ENOMEM,
 | |
| 		    "Can't allocate data for compression");
 | |
| 		return (ARCHIVE_FATAL);
 | |
| 	}
 | |
| 	memset(state, 0, sizeof(*state));
 | |
| 
 | |
| 	state->compressed_buffer_size = a->bytes_per_block;
 | |
| 	state->compressed = malloc(state->compressed_buffer_size);
 | |
| 
 | |
| 	if (state->compressed == NULL) {
 | |
| 		archive_set_error(&a->archive, ENOMEM,
 | |
| 		    "Can't allocate data for compression buffer");
 | |
| 		free(state);
 | |
| 		return (ARCHIVE_FATAL);
 | |
| 	}
 | |
| 
 | |
| 	a->compressor.write = archive_compressor_compress_write;
 | |
| 	a->compressor.finish = archive_compressor_compress_finish;
 | |
| 
 | |
| 	state->max_maxcode = 0x10000;	/* Should NEVER generate this code. */
 | |
| 	state->in_count = 0;		/* Length of input. */
 | |
| 	state->bit_buf = 0;
 | |
| 	state->bit_offset = 0;
 | |
| 	state->out_count = 3;		/* Includes 3-byte header mojo. */
 | |
| 	state->compress_ratio = 0;
 | |
| 	state->checkpoint = CHECK_GAP;
 | |
| 	state->code_len = 9;
 | |
| 	state->cur_maxcode = MAXCODE(state->code_len);
 | |
| 	state->first_free = FIRST;
 | |
| 
 | |
| 	memset(state->hashtab, 0xff, sizeof(state->hashtab));
 | |
| 
 | |
| 	/* Prime output buffer with a gzip header. */
 | |
| 	state->compressed[0] = 0x1f; /* Compress */
 | |
| 	state->compressed[1] = 0x9d;
 | |
| 	state->compressed[2] = 0x90; /* Block mode, 16bit max */
 | |
| 	state->compressed_offset = 3;
 | |
| 
 | |
| 	a->compressor.data = state;
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| /*-
 | |
|  * Output the given code.
 | |
|  * Inputs:
 | |
|  * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
 | |
|  *		that n_bits =< (long)wordsize - 1.
 | |
|  * Outputs:
 | |
|  * 	Outputs code to the file.
 | |
|  * Assumptions:
 | |
|  *	Chars are 8 bits long.
 | |
|  * Algorithm:
 | |
|  * 	Maintain a BITS character long buffer (so that 8 codes will
 | |
|  * fit in it exactly).  Use the VAX insv instruction to insert each
 | |
|  * code in turn.  When the buffer fills up empty it and start over.
 | |
|  */
 | |
| 
 | |
| static unsigned char rmask[9] =
 | |
| 	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
 | |
| 
 | |
| static int
 | |
| output_byte(struct archive_write *a, unsigned char c)
 | |
| {
 | |
| 	struct private_data *state = a->compressor.data;
 | |
| 	ssize_t bytes_written;
 | |
| 
 | |
| 	state->compressed[state->compressed_offset++] = c;
 | |
| 	++state->out_count;
 | |
| 
 | |
| 	if (state->compressed_buffer_size == state->compressed_offset) {
 | |
| 		bytes_written = (a->client_writer)(&a->archive,
 | |
| 		    a->client_data,
 | |
| 		    state->compressed, state->compressed_buffer_size);
 | |
| 		if (bytes_written <= 0)
 | |
| 			return ARCHIVE_FATAL;
 | |
| 		a->archive.raw_position += bytes_written;
 | |
| 		state->compressed_offset = 0;
 | |
| 	}
 | |
| 
 | |
| 	return ARCHIVE_OK;
 | |
| }
 | |
| 
 | |
| static int
 | |
| output_code(struct archive_write *a, int ocode)
 | |
| {
 | |
| 	struct private_data *state = a->compressor.data;
 | |
| 	int bits, ret, clear_flg, bit_offset;
 | |
| 
 | |
| 	clear_flg = ocode == CLEAR;
 | |
| 
 | |
| 	/*
 | |
| 	 * Since ocode is always >= 8 bits, only need to mask the first
 | |
| 	 * hunk on the left.
 | |
| 	 */
 | |
| 	bit_offset = state->bit_offset % 8;
 | |
| 	state->bit_buf |= (ocode << bit_offset) & 0xff;
 | |
| 	output_byte(a, state->bit_buf);
 | |
| 
 | |
| 	bits = state->code_len - (8 - bit_offset);
 | |
| 	ocode >>= 8 - bit_offset;
 | |
| 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
 | |
| 	if (bits >= 8) {
 | |
| 		output_byte(a, ocode & 0xff);
 | |
| 		ocode >>= 8;
 | |
| 		bits -= 8;
 | |
| 	}
 | |
| 	/* Last bits. */
 | |
| 	state->bit_offset += state->code_len;
 | |
| 	state->bit_buf = ocode & rmask[bits];
 | |
| 	if (state->bit_offset == state->code_len * 8)
 | |
| 		state->bit_offset = 0;
 | |
| 
 | |
| 	/*
 | |
| 	 * If the next entry is going to be too big for the ocode size,
 | |
| 	 * then increase it, if possible.
 | |
| 	 */
 | |
| 	if (clear_flg || state->first_free > state->cur_maxcode) {
 | |
| 	       /*
 | |
| 		* Write the whole buffer, because the input side won't
 | |
| 		* discover the size increase until after it has read it.
 | |
| 		*/
 | |
| 		if (state->bit_offset > 0) {
 | |
| 			while (state->bit_offset < state->code_len * 8) {
 | |
| 				ret = output_byte(a, state->bit_buf);
 | |
| 				if (ret != ARCHIVE_OK)
 | |
| 					return ret;
 | |
| 				state->bit_offset += 8;
 | |
| 				state->bit_buf = 0;
 | |
| 			}
 | |
| 		}
 | |
| 		state->bit_buf = 0;
 | |
| 		state->bit_offset = 0;
 | |
| 
 | |
| 		if (clear_flg) {
 | |
| 			state->code_len = 9;
 | |
| 			state->cur_maxcode = MAXCODE(state->code_len);
 | |
| 		} else {
 | |
| 			state->code_len++;
 | |
| 			if (state->code_len == 16)
 | |
| 				state->cur_maxcode = state->max_maxcode;
 | |
| 			else
 | |
| 				state->cur_maxcode = MAXCODE(state->code_len);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return (ARCHIVE_OK);
 | |
| }
 | |
| 
 | |
| static int
 | |
| output_flush(struct archive_write *a)
 | |
| {
 | |
| 	struct private_data *state = a->compressor.data;
 | |
| 	int ret;
 | |
| 
 | |
| 	/* At EOF, write the rest of the buffer. */
 | |
| 	if (state->bit_offset % 8) {
 | |
| 		state->code_len = (state->bit_offset % 8 + 7) / 8;
 | |
| 		ret = output_byte(a, state->bit_buf);
 | |
| 		if (ret != ARCHIVE_OK)
 | |
| 			return ret;
 | |
| 	}
 | |
| 
 | |
| 	return (ARCHIVE_OK);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Write data to the compressed stream.
 | |
|  */
 | |
| static int
 | |
| archive_compressor_compress_write(struct archive_write *a, const void *buff,
 | |
|     size_t length)
 | |
| {
 | |
| 	struct private_data *state;
 | |
| 	int i;
 | |
| 	int ratio;
 | |
| 	int c, disp, ret;
 | |
| 	const unsigned char *bp;
 | |
| 
 | |
| 	state = (struct private_data *)a->compressor.data;
 | |
| 	if (a->client_writer == NULL) {
 | |
| 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
 | |
| 		    "No write callback is registered?  "
 | |
| 		    "This is probably an internal programming error.");
 | |
| 		return (ARCHIVE_FATAL);
 | |
| 	}
 | |
| 
 | |
| 	if (length == 0)
 | |
| 		return ARCHIVE_OK;
 | |
| 
 | |
| 	bp = buff;
 | |
| 
 | |
| 	if (state->in_count == 0) {
 | |
| 		state->cur_code = *bp++;
 | |
| 		++state->in_count;
 | |
| 		--length;
 | |
| 	}
 | |
| 
 | |
| 	while (length--) {
 | |
| 		c = *bp++;
 | |
| 		state->in_count++;
 | |
| 		state->cur_fcode = (c << 16) + state->cur_code;
 | |
| 		i = ((c << HSHIFT) ^ state->cur_code);	/* Xor hashing. */
 | |
| 
 | |
| 		if (state->hashtab[i] == state->cur_fcode) {
 | |
| 			state->cur_code = state->codetab[i];
 | |
| 			continue;
 | |
| 		}
 | |
| 		if (state->hashtab[i] < 0)	/* Empty slot. */
 | |
| 			goto nomatch;
 | |
| 		/* Secondary hash (after G. Knott). */
 | |
| 		if (i == 0)
 | |
| 			disp = 1;
 | |
| 		else
 | |
| 			disp = HSIZE - i;
 | |
|  probe:		
 | |
| 		if ((i -= disp) < 0)
 | |
| 			i += HSIZE;
 | |
| 
 | |
| 		if (state->hashtab[i] == state->cur_fcode) {
 | |
| 			state->cur_code = state->codetab[i];
 | |
| 			continue;
 | |
| 		}
 | |
| 		if (state->hashtab[i] >= 0)
 | |
| 			goto probe;
 | |
|  nomatch:	
 | |
| 		ret = output_code(a, state->cur_code);
 | |
| 		if (ret != ARCHIVE_OK)
 | |
| 			return ret;
 | |
| 		state->cur_code = c;
 | |
| 		if (state->first_free < state->max_maxcode) {
 | |
| 			state->codetab[i] = state->first_free++;	/* code -> hashtable */
 | |
| 			state->hashtab[i] = state->cur_fcode;
 | |
| 			continue;
 | |
| 		}
 | |
| 		if (state->in_count < state->checkpoint)
 | |
| 			continue;
 | |
| 
 | |
| 		state->checkpoint = state->in_count + CHECK_GAP;
 | |
| 
 | |
| 		if (state->in_count <= 0x007fffff)
 | |
| 			ratio = state->in_count * 256 / state->out_count;
 | |
| 		else if ((ratio = state->out_count / 256) == 0)
 | |
| 			ratio = 0x7fffffff;
 | |
| 		else
 | |
| 			ratio = state->in_count / ratio;
 | |
| 
 | |
| 		if (ratio > state->compress_ratio)
 | |
| 			state->compress_ratio = ratio;
 | |
| 		else {
 | |
| 			state->compress_ratio = 0;
 | |
| 			memset(state->hashtab, 0xff, sizeof(state->hashtab));
 | |
| 			state->first_free = FIRST;
 | |
| 			ret = output_code(a, CLEAR);
 | |
| 			if (ret != ARCHIVE_OK)
 | |
| 				return ret;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return (ARCHIVE_OK);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Finish the compression...
 | |
|  */
 | |
| static int
 | |
| archive_compressor_compress_finish(struct archive_write *a)
 | |
| {
 | |
| 	ssize_t block_length, target_block_length, bytes_written;
 | |
| 	int ret;
 | |
| 	struct private_data *state;
 | |
| 	size_t tocopy;
 | |
| 
 | |
| 	state = (struct private_data *)a->compressor.data;
 | |
| 	if (a->client_writer == NULL) {
 | |
| 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
 | |
| 		    "No write callback is registered?  "
 | |
| 		    "This is probably an internal programming error.");
 | |
| 		ret = ARCHIVE_FATAL;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	/* By default, always pad the uncompressed data. */
 | |
| 	if (a->pad_uncompressed) {
 | |
| 		while (state->in_count % a->bytes_per_block != 0) {
 | |
| 			tocopy = a->bytes_per_block -
 | |
| 			    (state->in_count % a->bytes_per_block);
 | |
| 			if (tocopy > a->null_length)
 | |
| 				tocopy = a->null_length;
 | |
| 			ret = archive_compressor_compress_write(a, a->nulls,
 | |
| 			    tocopy);
 | |
| 			if (ret != ARCHIVE_OK)
 | |
| 				goto cleanup;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ret = output_code(a, state->cur_code);
 | |
| 	if (ret != ARCHIVE_OK)
 | |
| 		goto cleanup;
 | |
| 	ret = output_flush(a);
 | |
| 	if (ret != ARCHIVE_OK)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* Optionally, pad the final compressed block. */
 | |
| 	block_length = state->compressed_offset;
 | |
| 
 | |
| 	/* Tricky calculation to determine size of last block. */
 | |
| 	if (a->bytes_in_last_block <= 0)
 | |
| 		/* Default or Zero: pad to full block */
 | |
| 		target_block_length = a->bytes_per_block;
 | |
| 	else
 | |
| 		/* Round length to next multiple of bytes_in_last_block. */
 | |
| 		target_block_length = a->bytes_in_last_block *
 | |
| 		    ( (block_length + a->bytes_in_last_block - 1) /
 | |
| 			a->bytes_in_last_block);
 | |
| 	if (target_block_length > a->bytes_per_block)
 | |
| 		target_block_length = a->bytes_per_block;
 | |
| 	if (block_length < target_block_length) {
 | |
| 		memset(state->compressed + state->compressed_offset, 0,
 | |
| 		    target_block_length - block_length);
 | |
| 		block_length = target_block_length;
 | |
| 	}
 | |
| 
 | |
| 	/* Write the last block */
 | |
| 	bytes_written = (a->client_writer)(&a->archive, a->client_data,
 | |
| 	    state->compressed, block_length);
 | |
| 	if (bytes_written <= 0)
 | |
| 		ret = ARCHIVE_FATAL;
 | |
| 	else
 | |
| 		a->archive.raw_position += bytes_written;
 | |
| 
 | |
| cleanup:
 | |
| 	free(state->compressed);
 | |
| 	free(state);
 | |
| 	return (ret);
 | |
| }
 | 
