2026-02-10 00:37:00 +08:00 · 2015-01-15 14:24:20 +01:00
parent 8d2a562af1
commit 85d3162f45
4 changed files with 341 additions and 1 deletions
							
							
								
							
							
						
@@ -78,7 +78,7 @@ ocserv_SOURCES = main.c main-auth.c worker-vpn.c worker-auth.c tlslib.c \
	sup-config/file.c sup-config/file.h \
	sup-config/radius.c sup-config/radius.h \
	worker-bandwidth.c worker-bandwidth.h ctl.h main-ctl.h \
	vasprintf.c vasprintf.h \
	vasprintf.c vasprintf.h lzs.c lzs.h \
	proc-search.c proc-search.h \
	str.c str.h gettime.h $(CCAN_SOURCES) $(HTTP_PARSER_SOURCES) \
	$(PROTOBUF_SOURCES)
							
								
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,330 @@
/*
 * OpenConnect (SSL + DTLS) VPN client
 *
 * Copyright © 2008-2014 Intel Corporation.
 * Copyright © 2008 Nick Andrew <nick@nick-andrew.net>
 *
 * Author: David Woodhouse <dwmw2@infradead.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 */
#include <config.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include "lzs.h"
#define GET_BITS(bits)							\
do {									\
	/* Strictly speaking, this check ought to be on			\
	 * (srclen < 1 + (bits_left < bits)). However, when bits == 9	\
	 * the (bits_left < bits) comparison is always true so it	\
	 * always comes out as (srclen < 2).				\
	 * And bits is only anything *other* than 9 when we're reading	\
	 * reading part of a match encoding. And in that case, there	\
	 * damn well ought to be an end marker (7 more bits) after	\
	 * what we're reading now, so it's perfectly OK to use		\
	 * (srclen < 2) in that case too. And a *lot* cheaper. */	\
	if (srclen < 2)							\
		return -EINVAL;						\
	/* Explicit comparison with 8 to optimise it into a tautology	\
	 * in the the bits == 9 case, because the compiler doesn't	\
	 * know that bits_left can never be larger than 8. */		\
	if (bits >= 8 || bits >= bits_left) {				\
		/* We need *all* the bits that are left in the current	\
		 * byte. Take them and bump the input pointer. */	\
		data = (src[0] << (bits - bits_left)) & ((1ULL << bits) - 1); \
		src++;							\
		srclen--;						\
		bits_left += 8 - bits;					\
		if (bits > 8 || bits_left < 8) {			\
			/* We need bits from the next byte too... */	\
			data |= src[0] >> bits_left;			\
			/* ...if we used *all* of them then (which can	\
			 * only happen if bits > 8), then bump the	\
			 * input pointer again so we never leave	\
			 * bits_left == 0. */				\
			if (bits > 8 && !bits_left) {			\
				bits_left = 8;				\
				src++;					\
				srclen--;				\
			}						\
		}							\
	} else {							\
		/* We need fewer bits than are left in the current byte */ \
		data = (src[0] >> (bits_left - bits)) & ((1ULL << bits) - 1); \
		bits_left -= bits;					\
	}								\
} while (0)
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
	int outlen = 0;
	int bits_left = 8; /* Bits left in the current byte at *src */
	uint32_t data;
	uint16_t offset, length;
	while (1) {
		/* Get 9 bits, which is the minimum and a common case */
		GET_BITS(9);
		/* 0bbbbbbbb is a literal byte. The loop gives a hint to
		 * the compiler that we expect to see a few of these. */
		while (data < 0x100) {
			if (outlen == dstlen)
				return -EFBIG;
			dst[outlen++] = data;
			GET_BITS(9);
		}
		/* 110000000 is the end marker */
		if (data == 0x180)
			return outlen;
		/* 11bbbbbbb is a 7-bit offset */
		offset = data & 0x7f;
		/* 10bbbbbbbbbbb is an 11-bit offset, so get the next 4 bits */
		if (data < 0x180) {
			GET_BITS(4);
			offset <<= 4;
			offset |= data;
		}
		/* This is a compressed sequence; now get the length */
		GET_BITS(2);
		if (data != 3) {
			/* 00, 01, 10 ==> 2, 3, 4 */
			length = data + 2;
		} else {
			GET_BITS(2);
			if (data != 3) {
				/* 1100, 1101, 1110 => 5, 6, 7 */
				length = data + 5;
			} else {
				/* For each 1111 prefix add 15 to the length. Then add
				   the value of final nybble. */
				length = 8;
				while (1) {
					GET_BITS(4);
					if (data != 15) {
						length += data;
						break;
					}
					length += 15;
				}
			}
		}
		if (offset > outlen)
			return -EINVAL;
		if (length + outlen > dstlen)
			return -EFBIG;
		while (length) {
			dst[outlen] = dst[outlen - offset];
			outlen++;
			length--;
		}
	}
	return -EINVAL;
}
#define PUT_BITS(nr, bits)					\
do {								\
	outbits <<= (nr);					\
	outbits |= (bits);					\
	nr_outbits += (nr);					\
	if ((nr) > 8) {						\
		nr_outbits -= 8;				\
		if (outpos == dstlen)				\
			return -EFBIG;				\
		dst[outpos++] = outbits >> nr_outbits;		\
	}							\
	if (nr_outbits >= 8) {					\
		nr_outbits -= 8;				\
		if (outpos == dstlen)				\
			return -EFBIG;				\
		dst[outpos++] = outbits >> nr_outbits;		\
	}							\
} while (0)
/*
 * Much of the compression algorithm used here is based very loosely on ideas
 * from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
 */
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
{
	int length, offset;
	int inpos = 0, outpos = 0;
	uint16_t longest_match_len;
	uint16_t hofs, longest_match_ofs;
	uint16_t hash;
	uint32_t outbits = 0;
	int nr_outbits = 0;
	/*
	 * This is theoretically a hash. But RAM is cheap and just loading the
	 * 16-bit value and using it as a hash is *much* faster.
	 */
#define HASH_BITS 16
#define HASH_TABLE_SIZE (1ULL << HASH_BITS)
#define HASH(p) (*(uint16_t *)(p))
	/*
	 * There are two data structures for tracking the history. The first
	 * is the true hash table, an array indexed by the hash value described
	 * above. It yields the offset in the input buffer at which the given
	 * hash was most recently seen. We use INVALID_OFS (0xffff) for none
	 * since we know IP packets are limited to 64KiB and we can never be
	 * *starting* a match at the penultimate byte of the packet.
	 */
#define INVALID_OFS 0xffff
	uint16_t hash_table[HASH_TABLE_SIZE]; /* Buffer offset for first match */
	/*
	 * The second data structure allows us to find the previous occurrences
	 * of the same hash value. It is a ring buffer containing links only for
	 * the latest MAX_HISTORY bytes of the input. The lookup for a given
	 * offset will yield the previous offset at which the same data hash
	 * value was found.
	 */
#define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
	uint16_t hash_chain[MAX_HISTORY];
	/* Just in case anyone tries to use this in a more general-purpose
	 * scenario... */
	if (srclen > INVALID_OFS + 1)
		return -EFBIG;
	/* No need to initialise hash_chain since we can only ever follow
	 * links to it that have already been initialised. */
	memset(hash_table, 0xff, sizeof(hash_table));
	while (inpos < srclen - 2) {
		hash = HASH(src + inpos);
		hofs = hash_table[hash];
		hash_chain[inpos & (MAX_HISTORY - 1)] = hofs;
		hash_table[hash] = inpos;
		if (hofs == INVALID_OFS || hofs + MAX_HISTORY <= inpos) {
			PUT_BITS(9, src[inpos]);
			inpos++;
			continue;
		}
		/* Since the hash is 16-bits, we *know* the first two bytes match */
		longest_match_len = 2;
		longest_match_ofs = hofs;
		for (; hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos;
		     hofs = hash_chain[hofs & (MAX_HISTORY - 1)]) {
			/* We only get here if longest_match_len is >= 2. We need to find
			   a match of longest_match_len + 1 for it to be interesting. */
			if (!memcmp(src + hofs + 2, src + inpos + 2, longest_match_len - 1)) {
				longest_match_ofs = hofs;
				do {
					longest_match_len++;
					/* If we cannot *have* a longer match because we're at the
					 * end of the input, stop looking */
					if (longest_match_len + inpos == srclen)
						goto got_match;
				} while (src[longest_match_len + inpos] == src[longest_match_len + hofs]);
			}
			/* Typical compressor tuning would have a break out of the loop
			   here depending on the number of potential match locations we've
			   tried, or a value of longest_match_len that's considered "good
			   enough" so we stop looking for something better. We could also
			   do a hybrid where we count the total bytes compared, so 5
			   attempts to find a match better than 10 bytes is worth the same
			   as 10 attempts to find a match better than 5 bytes. Or
			   something. Anyway, we currently don't give up until we run out
			   of reachable history — maximal compression. */
		}
	got_match:
		/* Output offset, as 7-bit or 11-bit as appropriate */
		offset = inpos - longest_match_ofs;
		length = longest_match_len;
		if (offset < 0x80)
			PUT_BITS(9, 0x180 | offset);
		else
			PUT_BITS(13, 0x1000 | offset);
		/* Output length */
		if (length < 5)
			PUT_BITS(2, length - 2);
		else if (length < 8)
			PUT_BITS(4, length + 7);
		else {
			length += 7;
			while (length >= 30) {
				PUT_BITS(8, 0xff);
				length -= 30;
			}
			if (length >= 15)
				PUT_BITS(8, 0xf0 + length - 15);
			else
				PUT_BITS(4, length);
		}
		/* If we're already done, don't bother updating the hash tables. */
		if (inpos + longest_match_len >= srclen - 2) {
			inpos += longest_match_len;
			break;
		}
		/* We already added the first byte to the hash tables. Add the rest. */
		inpos++;
		while (--longest_match_len) {
			hash = HASH(src + inpos);
			hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
			hash_table[hash] = inpos++;
		}
	}
	/* Special cases at the end */
	if (inpos == srclen - 2) {
		hash = HASH(src + inpos);
		hofs = hash_table[hash];
		if (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
			offset = inpos - hofs;
			if (offset < 0x80)
				PUT_BITS(9, 0x180 | offset);
			else
				PUT_BITS(13, 0x1000 | offset);
			/* The length is 2 bytes */
			PUT_BITS(2, 0);
		} else {
			PUT_BITS(9, src[inpos]);
			PUT_BITS(9, src[inpos + 1]);
		}
	} else if (inpos == srclen - 1) {
		PUT_BITS(9, src[inpos]);
	}
	/* End marker, with 7 trailing zero bits to ensure that it's flushed. */
	PUT_BITS(16, 0xc000);
	return outpos;
}
							
							
							
						
@@ -0,0 +1,2 @@
int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen);
							
							
								
							
							
						
@@ -36,6 +36,7 @@
#ifdef HAVE_LZ4
# include <lz4.h>
#endif
#include "lzs.h"
#include <base64.h>
#include <c-strcase.h>
							
								
							
							
								
							
							
						
@@ -315,6 +316,13 @@ struct compression_method_st comp_methods[] = {
		.server_prio = 90,
	},
#endif
	{
		.id = OC_COMP_LZS,
		.name = "lzs",
		.decompress = (decompress_fn)lzs_decompress,
		.compress = (compress_fn)lzs_compress,
		.server_prio = 80,
	}
};