/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Tom Truscott.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef CRYPT_H
#define CRYPT_H 1
/* ===== Configuration ==================== */
#ifdef CHAR_BITS
#if CHAR_BITS != 8
#error C_block structure assumes 8 bit characters
#endif
#endif
#ifndef LONG_LONG
# if SIZEOF_LONG_LONG > 0
# define LONG_LONG long long
# elif SIZEOF___INT64 > 0
# define HAVE_LONG_LONG 1
# define LONG_LONG __int64
# undef SIZEOF_LONG_LONG
# define SIZEOF_LONG_LONG SIZEOF___INT64
# endif
#endif
/*
* define "LONG_IS_32_BITS" only if sizeof(long)==4.
* This avoids use of bit fields (your compiler may be sloppy with them).
*/
#if SIZEOF_LONG == 4
#define LONG_IS_32_BITS
#endif
/*
* define "B64" to be the declaration for a 64 bit integer.
* XXX this feature is currently unused, see "endian" comment below.
*/
#if SIZEOF_LONG == 8
#define B64 long
#elif SIZEOF_LONG_LONG == 8
#define B64 LONG_LONG
#endif
/*
* define "LARGEDATA" to get faster permutations, by using about 72 kilobytes
* of lookup tables. This speeds up des_setkey() and des_cipher(), but has
* little effect on crypt().
*/
#if defined(notdef)
#define LARGEDATA
#endif
/* compile with "-DSTATIC=int" when profiling */
#ifndef STATIC
#define STATIC static
#endif
/* ==================================== */
/*
* Cipher-block representation (Bob Baldwin):
*
* DES operates on groups of 64 bits, numbered 1..64 (sigh). One
* representation is to store one bit per byte in an array of bytes. Bit N of
* the NBS spec is stored as the LSB of the Nth byte (index N-1) in the array.
* Another representation stores the 64 bits in 8 bytes, with bits 1..8 in the
* first byte, 9..16 in the second, and so on. The DES spec apparently has
* bit 1 in the MSB of the first byte, but that is particularly noxious so we
* bit-reverse each byte so that bit 1 is the LSB of the first byte, bit 8 is
* the MSB of the first byte. Specifically, the 64-bit input data and key are
* converted to LSB format, and the output 64-bit block is converted back into
* MSB format.
*
* DES operates internally on groups of 32 bits which are expanded to 48 bits
* by permutation E and shrunk back to 32 bits by the S boxes. To speed up
* the computation, the expansion is applied only once, the expanded
* representation is maintained during the encryption, and a compression
* permutation is applied only at the end. To speed up the S-box lookups,
* the 48 bits are maintained as eight 6 bit groups, one per byte, which
* directly feed the eight S-boxes. Within each byte, the 6 bits are the
* most significant ones. The low two bits of each byte are zero. (Thus,
* bit 1 of the 48 bit E expansion is stored as the "4"-valued bit of the
* first byte in the eight byte representation, bit 2 of the 48 bit value is
* the "8"-valued bit, and so on.) In fact, a combined "SPE"-box lookup is
* used, in which the output is the 64 bit result of an S-box lookup which
* has been permuted by P and expanded by E, and is ready for use in the next
* iteration. Two 32-bit wide tables, SPE[0] and SPE[1], are used for this
* lookup. Since each byte in the 48 bit path is a multiple of four, indexed
* lookup of SPE[0] and SPE[1] is simple and fast. The key schedule and
* "salt" are also converted to this 8*(6+2) format. The SPE table size is
* 8*64*8 = 4K bytes.
*
* To speed up bit-parallel operations (such as XOR), the 8 byte
* representation is "union"ed with 32 bit values "i0" and "i1", and, on
* machines which support it, a 64 bit value "b64". This data structure,
* "C_block", has two problems. First, alignment restrictions must be
* honored. Second, the byte-order (e.g. little-endian or big-endian) of
* the architecture becomes visible.
*
* The byte-order problem is unfortunate, since on the one hand it is good
* to have a machine-independent C_block representation (bits 1..8 in the
* first byte, etc.), and on the other hand it is good for the LSB of the
* first byte to be the LSB of i0. We cannot have both these things, so we
* currently use the "little-endian" representation and avoid any multi-byte
* operations that depend on byte order. This largely precludes use of the
* 64-bit datatype since the relative order of i0 and i1 are unknown. It
* also inhibits grouping the SPE table to look up 12 bits at a time. (The
* 12 bits can be stored in a 16-bit field with 3 low-order zeroes and 1
* high-order zero, providing fast indexing into a 64-bit wide SPE.) On the
* other hand, 64-bit datatypes are currently rare, and a 12-bit SPE lookup
* requires a 128 kilobyte table, so perhaps this is not a big loss.
*
* Permutation representation (Jim Gillogly):
*
* A transformation is defined by its effect on each of the 8 bytes of the
* 64-bit input. For each byte we give a 64-bit output that has the bits in
* the input distributed appropriately. The transformation is then the OR
* of the 8 sets of 64-bits. This uses 8*256*8 = 16K bytes of storage for
* each transformation. Unless LARGEDATA is defined, however, a more compact
* table is used which looks up 16 4-bit "chunks" rather than 8 8-bit chunks.
* The smaller table uses 16*16*8 = 2K bytes for each transformation. This
* is slower but tolerable, particularly for password encryption in which
* the SPE transformation is iterated many times. The small tables total 9K
* bytes, the large tables total 72K bytes.
*
* The transformations used are:
* IE3264: MSB->LSB conversion, initial permutation, and expansion.
* This is done by collecting the 32 even-numbered bits and applying
* a 32->64 bit transformation, and then collecting the 32 odd-numbered
* bits and applying the same transformation. Since there are only
* 32 input bits, the IE3264 transformation table is half the size of
* the usual table.
* CF6464: Compression, final permutation, and LSB->MSB conversion.
* This is done by two trivial 48->32 bit compressions to obtain
* a 64-bit block (the bit numbering is given in the "CIFP" table)
* followed by a 64->64 bit "cleanup" transformation. (It would
* be possible to group the bits in the 64-bit block so that 2
* identical 32->32 bit transformations could be used instead,
* saving a factor of 4 in space and possibly 2 in time, but
* byte-ordering and other complications rear their ugly head.
* Similar opportunities/problems arise in the key schedule
* transforms.)
* PC1ROT: MSB->LSB, PC1 permutation, rotate, and PC2 permutation.
* This admittedly baroque 64->64 bit transformation is used to
* produce the first code (in 8*(6+2) format) of the key schedule.
* PC2ROT[0]: Inverse PC2 permutation, rotate, and PC2 permutation.
* It would be possible to define 15 more transformations, each
* with a different rotation, to generate the entire key schedule.
* To save space, however, we instead permute each code into the
* next by using a transformation that "undoes" the PC2 permutation,
* rotates the code, and then applies PC2. Unfortunately, PC2
* transforms 56 bits into 48 bits, dropping 8 bits, so PC2 is not
* invertible. We get around that problem by using a modified PC2
* which retains the 8 otherwise-lost bits in the unused low-order
* bits of each byte. The low-order bits are cleared when the
* codes are stored into the key schedule.
* PC2ROT[1]: Same as PC2ROT[0], but with two rotations.
* This is faster than applying PC2ROT[0] twice,
*
* The Bell Labs "salt" (Bob Baldwin):
*
* The salting is a simple permutation applied to the 48-bit result of E.
* Specifically, if bit i (1 <= i <= 24) of the salt is set then bits i and
* i+24 of the result are swapped. The salt is thus a 24 bit number, with
* 16777216 possible values. (The original salt was 12 bits and could not
* swap bits 13..24 with 36..48.)
*
* It is possible, but ugly, to warp the SPE table to account for the salt
* permutation. Fortunately, the conditional bit swapping requires only
* about four machine instructions and can be done on-the-fly with about an
* 8% performance penalty.
*/
typedef union {
unsigned char b[8];
struct {
#if defined(LONG_IS_32_BITS)
/* long is often faster than a 32-bit bit field */
long i0;
long i1;
#else
long i0: 32;
long i1: 32;
#endif
} b32;
#if defined(B64)
B64 b64;
#endif
} C_block;
#if defined(LARGEDATA)
/* Waste memory like crazy. Also, do permutations in line */
#define LGCHUNKBITS 3
#define CHUNKBITS (1<