/* $NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $"); #ifdef _KERNEL #include #include #include #else #include #include #include #include #define KASSERT assert #define panic(fmt, args...) err(1, fmt, args) struct evcnt { uint64_t ev_count; }; #define EVCNT_INITIALIZER(a,b,c,d) {0} #define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0 #endif #include #include #include #ifdef _KERNEL #include #include #include #include #include #else #include #define fpu_kern_enter() ((void)0) #define fpu_kern_leave() ((void)0) #define C3_CRYPT_CWLO_ROUND_M 0x0000000f #define C3_CRYPT_CWLO_ALG_M 0x00000070 #define C3_CRYPT_CWLO_ALG_AES 0x00000000 #define C3_CRYPT_CWLO_KEYGEN_M 0x00000080 #define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000 #define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080 #define C3_CRYPT_CWLO_NORMAL 0x00000000 #define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100 #define C3_CRYPT_CWLO_ENCRYPT 0x00000000 #define C3_CRYPT_CWLO_DECRYPT 0x00000200 #define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ #define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ #define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ #endif static void aesvia_reload_keys(void) { asm volatile("pushf; popf"); } static uint32_t aesvia_keylen_cw0(unsigned nrounds) { /* * Determine the control word bits for the key size / number of * rounds. For AES-128, the hardware can do key expansion on * the fly; for AES-192 and AES-256, software must do it. */ switch (nrounds) { case AES_128_NROUNDS: return C3_CRYPT_CWLO_KEY128; case AES_192_NROUNDS: return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW; case AES_256_NROUNDS: return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW; default: panic("invalid AES nrounds: %u", nrounds); } } static void aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) { size_t key_len; switch (nrounds) { case AES_128_NROUNDS: enc->aese_aes.aes_rk[0] = le32dec(key + 4*0); enc->aese_aes.aes_rk[1] = le32dec(key + 4*1); enc->aese_aes.aes_rk[2] = le32dec(key + 4*2); enc->aese_aes.aes_rk[3] = le32dec(key + 4*3); return; case AES_192_NROUNDS: key_len = 24; break; case AES_256_NROUNDS: key_len = 32; break; default: panic("invalid AES nrounds: %u", nrounds); } br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len); } static void aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) { size_t key_len; switch (nrounds) { case AES_128_NROUNDS: dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0); dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1); dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2); dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3); return; case AES_192_NROUNDS: key_len = 24; break; case AES_256_NROUNDS: key_len = 32; break; default: panic("invalid AES nrounds: %u", nrounds); } br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len); } static inline void aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 | C3_CRYPT_CWLO_ALG_AES | C3_CRYPT_CWLO_ENCRYPT | C3_CRYPT_CWLO_NORMAL), }; KASSERT(((uintptr_t)enc & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); KASSERT(((uintptr_t)out & 0xf) == 0); asm volatile("rep xcryptecb" : "+c"(nblocks), "+S"(in), "+D"(out) : "b"(enc), "d"(cw) : "memory", "cc"); } static inline void aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 | C3_CRYPT_CWLO_ALG_AES | C3_CRYPT_CWLO_DECRYPT | C3_CRYPT_CWLO_NORMAL), }; KASSERT(((uintptr_t)dec & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); KASSERT(((uintptr_t)out & 0xf) == 0); asm volatile("rep xcryptecb" : "+c"(nblocks), "+S"(in), "+D"(out) : "b"(dec), "d"(cw) : "memory", "cc"); } static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "enc aligned"); EVCNT_ATTACH_STATIC(enc_aligned_evcnt); static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "dec unaligned"); EVCNT_ATTACH_STATIC(enc_unaligned_evcnt); static void aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && ((uintptr_t)in & 0xff0) != 0xff0) { enc_aligned_evcnt.ev_count++; aesvia_encN(enc, in, out, 1, cw0); } else { enc_unaligned_evcnt.ev_count++; /* * VIA requires 16-byte/128-bit alignment, and * xcrypt-ecb reads one block past the one we're * working on -- which may go past the end of the page * into unmapped territory. Use a bounce buffer if * either constraint is violated. */ uint8_t inbuf[16] __aligned(16); uint8_t outbuf[16] __aligned(16); memcpy(inbuf, in, 16); aesvia_encN(enc, inbuf, outbuf, 1, cw0); memcpy(out, outbuf, 16); explicit_memset(inbuf, 0, sizeof inbuf); explicit_memset(outbuf, 0, sizeof outbuf); } fpu_kern_leave(); } static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "dec aligned"); EVCNT_ATTACH_STATIC(dec_aligned_evcnt); static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "dec unaligned"); EVCNT_ATTACH_STATIC(dec_unaligned_evcnt); static void aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && ((uintptr_t)in & 0xff0) != 0xff0) { dec_aligned_evcnt.ev_count++; aesvia_decN(dec, in, out, 1, cw0); } else { dec_unaligned_evcnt.ev_count++; /* * VIA requires 16-byte/128-bit alignment, and * xcrypt-ecb reads one block past the one we're * working on -- which may go past the end of the page * into unmapped territory. Use a bounce buffer if * either constraint is violated. */ uint8_t inbuf[16] __aligned(16); uint8_t outbuf[16] __aligned(16); memcpy(inbuf, in, 16); aesvia_decN(dec, inbuf, outbuf, 1, cw0); memcpy(out, outbuf, 16); explicit_memset(inbuf, 0, sizeof inbuf); explicit_memset(outbuf, 0, sizeof outbuf); } fpu_kern_leave(); } static inline void aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 | C3_CRYPT_CWLO_ALG_AES | C3_CRYPT_CWLO_ENCRYPT | C3_CRYPT_CWLO_NORMAL), }; KASSERT(((uintptr_t)enc & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); KASSERT(((uintptr_t)out & 0xf) == 0); KASSERT(((uintptr_t)*ivp & 0xf) == 0); /* * Register effects: * - Counts nblocks down to zero. * - Advances in by nblocks (units of blocks). * - Advances out by nblocks (units of blocks). * - Updates *ivp to point at the last block of out. */ asm volatile("rep xcryptcbc" : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp) : "b"(enc), "d"(cw) : "memory", "cc"); } static inline void aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 | C3_CRYPT_CWLO_ALG_AES | C3_CRYPT_CWLO_DECRYPT | C3_CRYPT_CWLO_NORMAL), }; KASSERT(((uintptr_t)dec & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); KASSERT(((uintptr_t)out & 0xf) == 0); KASSERT(((uintptr_t)iv & 0xf) == 0); /* * Register effects: * - Counts nblocks down to zero. * - Advances in by nblocks (units of blocks). * - Advances out by nblocks (units of blocks). * Memory side effects: * - Writes what was the last block of in at the address iv. */ asm volatile("rep xcryptcbc" : "+c"(nblocks), "+S"(in), "+D"(out) : "a"(iv), "b"(dec), "d"(cw) : "memory", "cc"); } static inline void xor128(void *x, const void *a, const void *b) { uint32_t *x32 = x; const uint32_t *a32 = a; const uint32_t *b32 = b; x32[0] = a32[0] ^ b32[0]; x32[1] = a32[1] ^ b32[1]; x32[2] = a32[2] ^ b32[2]; x32[3] = a32[3] ^ b32[3]; } static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcenc aligned"); EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt); static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcenc unaligned"); EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt); static void aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); KASSERT(nbytes % 16 == 0); if (nbytes == 0) return; fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { cbcenc_aligned_evcnt.ev_count++; uint8_t *ivp = iv; aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); memcpy(iv, ivp, 16); } else { cbcenc_unaligned_evcnt.ev_count++; uint8_t cv[16] __aligned(16); uint8_t tmp[16] __aligned(16); memcpy(cv, iv, 16); for (; nbytes; nbytes -= 16, in += 16, out += 16) { memcpy(tmp, in, 16); xor128(tmp, tmp, cv); aesvia_encN(enc, tmp, cv, 1, cw0); memcpy(out, cv, 16); } memcpy(iv, cv, 16); } fpu_kern_leave(); } static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcdec aligned"); EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt); static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcdec unaligned"); EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt); static void aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); KASSERT(nbytes % 16 == 0); if (nbytes == 0) return; fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { cbcdec_aligned_evcnt.ev_count++; aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0); } else { cbcdec_unaligned_evcnt.ev_count++; uint8_t iv0[16] __aligned(16); uint8_t cv[16] __aligned(16); uint8_t tmp[16] __aligned(16); memcpy(iv0, iv, 16); memcpy(cv, in + nbytes - 16, 16); memcpy(iv, cv, 16); for (;;) { aesvia_decN(dec, cv, tmp, 1, cw0); if ((nbytes -= 16) == 0) break; memcpy(cv, in + nbytes - 16, 16); xor128(tmp, tmp, cv); memcpy(out + nbytes, tmp, 16); } xor128(tmp, tmp, iv0); memcpy(out, tmp, 16); explicit_memset(tmp, 0, sizeof tmp); } fpu_kern_leave(); } static inline void aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) { uint32_t s0, s1, s2, s3; s0 = *t0 >> 31; s1 = *t1 >> 31; s2 = *t2 >> 31; s3 = *t3 >> 31; *t0 = (*t0 << 1) ^ (-s3 & 0x87); *t1 = (*t1 << 1) ^ s0; *t2 = (*t2 << 1) ^ s1; *t3 = (*t3 << 1) ^ s2; } static int aesvia_xts_update_selftest(void) { static const struct { uint32_t in[4], out[4]; } cases[] = { { {1}, {2} }, { {0x80000000U,0,0,0}, {0,1,0,0} }, { {0,0x80000000U,0,0}, {0,0,1,0} }, { {0,0,0x80000000U,0}, {0,0,0,1} }, { {0,0,0,0x80000000U}, {0x87,0,0,0} }, { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, }; unsigned i; uint32_t t0, t1, t2, t3; for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { t0 = cases[i].in[0]; t1 = cases[i].in[1]; t2 = cases[i].in[2]; t3 = cases[i].in[3]; aesvia_xts_update(&t0, &t1, &t2, &t3); if (t0 != cases[i].out[0] || t1 != cases[i].out[1] || t2 != cases[i].out[2] || t3 != cases[i].out[3]) return -1; } /* Success! */ return 0; } static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "xtsenc aligned"); EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt); static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "xtsenc unaligned"); EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt); static void aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); uint32_t t[4]; KASSERT(nbytes % 16 == 0); memcpy(t, tweak, 16); fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { xtsenc_aligned_evcnt.ev_count++; unsigned lastblock = 0; uint32_t buf[8*4] __aligned(16); /* * Make sure the last block is not the last block of a * page. (Note that we store the AES input in `out' as * a temporary buffer, rather than reading it directly * from `in', since we have to combine the tweak * first.) */ lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); nbytes -= lastblock; /* * Handle an odd number of initial blocks so we can * process the rest in eight-block (128-byte) chunks. */ if (nbytes % 128) { unsigned nbytes128 = nbytes % 128; nbytes -= nbytes128; for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) { xor128(out, in, t); aesvia_encN(enc, out, out, 1, cw0); xor128(out, out, t); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } } /* Process eight blocks at a time. */ for (; nbytes; nbytes -= 128, in += 128, out += 128) { unsigned i; for (i = 0; i < 8; i++) { memcpy(buf + 4*i, t, 16); xor128(out + 4*i, in + 4*i, t); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } aesvia_encN(enc, out, out, 8, cw0); for (i = 0; i < 8; i++) xor128(out + 4*i, in + 4*i, buf + 4*i); } /* Handle the last block of a page, if necessary. */ if (lastblock) { xor128(buf, in, t); aesvia_encN(enc, (const void *)buf, out, 1, cw0); } explicit_memset(buf, 0, sizeof buf); } else { xtsenc_unaligned_evcnt.ev_count++; uint8_t buf[16] __aligned(16); for (; nbytes; nbytes -= 16, in += 16, out += 16) { memcpy(buf, in, 16); xor128(buf, buf, t); aesvia_encN(enc, buf, buf, 1, cw0); xor128(buf, buf, t); memcpy(out, buf, 16); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } explicit_memset(buf, 0, sizeof buf); } fpu_kern_leave(); memcpy(tweak, t, 16); explicit_memset(t, 0, sizeof t); } static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "xtsdec aligned"); EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt); static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "xtsdec unaligned"); EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt); static void aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); uint32_t t[4]; KASSERT(nbytes % 16 == 0); memcpy(t, tweak, 16); fpu_kern_enter(); aesvia_reload_keys(); if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { xtsdec_aligned_evcnt.ev_count++; unsigned lastblock = 0; uint32_t buf[8*4] __aligned(16); /* * Make sure the last block is not the last block of a * page. (Note that we store the AES input in `out' as * a temporary buffer, rather than reading it directly * from `in', since we have to combine the tweak * first.) */ lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); nbytes -= lastblock; /* * Handle an odd number of initial blocks so we can * process the rest in eight-block (128-byte) chunks. */ if (nbytes % 128) { unsigned nbytes128 = nbytes % 128; nbytes -= nbytes128; for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) { xor128(out, in, t); aesvia_decN(dec, out, out, 1, cw0); xor128(out, out, t); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } } /* Process eight blocks at a time. */ for (; nbytes; nbytes -= 128, in += 128, out += 128) { unsigned i; for (i = 0; i < 8; i++) { memcpy(buf + 4*i, t, 16); xor128(out + 4*i, in + 4*i, t); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } aesvia_decN(dec, out, out, 8, cw0); for (i = 0; i < 8; i++) xor128(out + 4*i, in + 4*i, buf + 4*i); } /* Handle the last block of a page, if necessary. */ if (lastblock) { xor128(buf, in, t); aesvia_decN(dec, (const void *)buf, out, 1, cw0); } explicit_memset(buf, 0, sizeof buf); } else { xtsdec_unaligned_evcnt.ev_count++; uint8_t buf[16] __aligned(16); for (; nbytes; nbytes -= 16, in += 16, out += 16) { memcpy(buf, in, 16); xor128(buf, buf, t); aesvia_decN(dec, buf, buf, 1, cw0); xor128(buf, buf, t); memcpy(out, buf, 16); aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); } explicit_memset(buf, 0, sizeof buf); } fpu_kern_leave(); memcpy(tweak, t, 16); explicit_memset(t, 0, sizeof t); } static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcmac aligned"); EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt); static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "cbcmac unaligned"); EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt); static void aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); uint8_t authbuf[16] __aligned(16); uint8_t *auth = auth0; KASSERT(nbytes); KASSERT(nbytes % 16 == 0); if ((uintptr_t)auth0 & 0xf) { memcpy(authbuf, auth0, 16); auth = authbuf; cbcmac_unaligned_evcnt.ev_count++; } else { cbcmac_aligned_evcnt.ev_count++; } fpu_kern_enter(); aesvia_reload_keys(); for (; nbytes; nbytes -= 16, in += 16) { xor128(auth, auth, in); aesvia_encN(enc, auth, auth, 1, cw0); } fpu_kern_leave(); if ((uintptr_t)auth0 & 0xf) { memcpy(auth0, authbuf, 16); explicit_memset(authbuf, 0, sizeof authbuf); } } static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "ccmenc aligned"); EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt); static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "ccmenc unaligned"); EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt); static void aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); uint8_t authctrbuf[32] __aligned(16); uint8_t *authctr; uint32_t c0, c1, c2, c3; KASSERT(nbytes); KASSERT(nbytes % 16 == 0); if ((uintptr_t)authctr0 & 0xf) { memcpy(authctrbuf, authctr0, 16); authctr = authctrbuf; ccmenc_unaligned_evcnt.ev_count++; } else { authctr = authctr0; ccmenc_aligned_evcnt.ev_count++; } c0 = le32dec(authctr0 + 16 + 4*0); c1 = le32dec(authctr0 + 16 + 4*1); c2 = le32dec(authctr0 + 16 + 4*2); c3 = be32dec(authctr0 + 16 + 4*3); /* * In principle we could use REP XCRYPTCTR here, but that * doesn't help to compute the CBC-MAC step, and certain VIA * CPUs have some weird errata with REP XCRYPTCTR that make it * kind of a pain to use. So let's just use REP XCRYPTECB to * simultaneously compute the CBC-MAC step and the CTR step. * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel, * who knows...) */ fpu_kern_enter(); aesvia_reload_keys(); for (; nbytes; nbytes -= 16, in += 16, out += 16) { xor128(authctr, authctr, in); le32enc(authctr + 16 + 4*0, c0); le32enc(authctr + 16 + 4*1, c1); le32enc(authctr + 16 + 4*2, c2); be32enc(authctr + 16 + 4*3, ++c3); aesvia_encN(enc, authctr, authctr, 2, cw0); xor128(out, in, authctr + 16); } fpu_kern_leave(); if ((uintptr_t)authctr0 & 0xf) { memcpy(authctr0, authctrbuf, 16); explicit_memset(authctrbuf, 0, sizeof authctrbuf); } le32enc(authctr0 + 16 + 4*0, c0); le32enc(authctr0 + 16 + 4*1, c1); le32enc(authctr0 + 16 + 4*2, c2); be32enc(authctr0 + 16 + 4*3, c3); } static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "ccmdec aligned"); EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt); static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "aesvia", "ccmdec unaligned"); EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt); static void aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], uint32_t nrounds) { const uint32_t cw0 = aesvia_keylen_cw0(nrounds); uint8_t authctrbuf[32] __aligned(16); uint8_t *authctr; uint32_t c0, c1, c2, c3; KASSERT(nbytes); KASSERT(nbytes % 16 == 0); c0 = le32dec(authctr0 + 16 + 4*0); c1 = le32dec(authctr0 + 16 + 4*1); c2 = le32dec(authctr0 + 16 + 4*2); c3 = be32dec(authctr0 + 16 + 4*3); if ((uintptr_t)authctr0 & 0xf) { memcpy(authctrbuf, authctr0, 16); authctr = authctrbuf; le32enc(authctr + 16 + 4*0, c0); le32enc(authctr + 16 + 4*1, c1); le32enc(authctr + 16 + 4*2, c2); ccmdec_unaligned_evcnt.ev_count++; } else { authctr = authctr0; ccmdec_aligned_evcnt.ev_count++; } fpu_kern_enter(); aesvia_reload_keys(); be32enc(authctr + 16 + 4*3, ++c3); aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0); for (;; in += 16, out += 16) { xor128(out, authctr + 16, in); xor128(authctr, authctr, out); if ((nbytes -= 16) == 0) break; le32enc(authctr + 16 + 4*0, c0); le32enc(authctr + 16 + 4*1, c1); le32enc(authctr + 16 + 4*2, c2); be32enc(authctr + 16 + 4*3, ++c3); aesvia_encN(enc, authctr, authctr, 2, cw0); } aesvia_encN(enc, authctr, authctr, 1, cw0); fpu_kern_leave(); if ((uintptr_t)authctr0 & 0xf) { memcpy(authctr0, authctrbuf, 16); explicit_memset(authctrbuf, 0, sizeof authctrbuf); } le32enc(authctr0 + 16 + 4*0, c0); le32enc(authctr0 + 16 + 4*1, c1); le32enc(authctr0 + 16 + 4*2, c2); be32enc(authctr0 + 16 + 4*3, c3); } static int aesvia_probe(void) { /* Verify that the CPU advertises VIA ACE support. */ #ifdef _KERNEL if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0) return -1; #else /* * From the VIA PadLock Programming Guide: * http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261 */ unsigned eax, ebx, ecx, edx; if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx)) return -1; if (ebx != signature_CENTAUR_ebx || ecx != signature_CENTAUR_ecx || edx != signature_CENTAUR_edx) return -1; if (eax < 0xc0000000) return -1; if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx)) return -1; if (eax < 0xc0000001) return -1; if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx)) return -1; /* Check whether ACE or ACE2 is both supported and enabled. */ if ((edx & 0x000000c0) != 0x000000c0 || (edx & 0x00000300) != 0x00000300) return -1; #endif /* Verify that our XTS tweak update logic works. */ if (aesvia_xts_update_selftest()) return -1; /* Success! */ return 0; } struct aes_impl aes_via_impl = { .ai_name = "VIA ACE", .ai_probe = aesvia_probe, .ai_setenckey = aesvia_setenckey, .ai_setdeckey = aesvia_setdeckey, .ai_enc = aesvia_enc, .ai_dec = aesvia_dec, .ai_cbc_enc = aesvia_cbc_enc, .ai_cbc_dec = aesvia_cbc_dec, .ai_xts_enc = aesvia_xts_enc, .ai_xts_dec = aesvia_xts_dec, .ai_cbcmac_update1 = aesvia_cbcmac_update1, .ai_ccm_enc1 = aesvia_ccm_enc1, .ai_ccm_dec1 = aesvia_ccm_dec1, };