From df257a8e8a0df4415c61c2a93f6e4193ac7c6464 Mon Sep 17 00:00:00 2001 From: Sergey Lyubka Date: Sun, 9 Jun 2024 10:21:24 +0100 Subject: [PATCH] Add mtls and chacha20 --- mongoose.c | 1923 +++++++++++++++++++++++++++++++++++++------- mongoose.h | 188 ++++- src/net.c | 2 +- src/tls_builtin.c | 564 ++++++------- src/tls_chacha20.c | 1337 ++++++++++++++++++++++++++++++ src/tls_chacha20.h | 110 +++ src/tls_openssl.c | 16 + test/Makefile | 2 +- test/unit_test.c | 3 +- 9 files changed, 3504 insertions(+), 641 deletions(-) create mode 100644 src/tls_chacha20.c create mode 100644 src/tls_chacha20.h diff --git a/mongoose.c b/mongoose.c index e02c7f80..7311af6b 100644 --- a/mongoose.c +++ b/mongoose.c @@ -4705,7 +4705,7 @@ static bool mg_aton6(struct mg_str str, struct mg_addr *addr) { if ((str.buf[i] >= '0' && str.buf[i] <= '9') || (str.buf[i] >= 'a' && str.buf[i] <= 'f') || (str.buf[i] >= 'A' && str.buf[i] <= 'F')) { - unsigned long val; // TODO(): This loops on chars, refactor + unsigned long val = 0; // TODO(): This loops on chars, refactor if (i > j + 3) return false; // MG_DEBUG(("%lu %lu [%.*s]", i, j, (int) (i - j + 1), &str.buf[j])); mg_str_to_num(mg_str_n(&str.buf[j], i - j + 1), 16, &val, sizeof(val)); @@ -9421,8 +9421,16 @@ int mg_aes_gcm_decrypt(unsigned char *output, const unsigned char *input, + + + + + + #if MG_TLS == MG_TLS_BUILTIN +#define CHACHA20 1 + /* TLS 1.3 Record Content Type (RFC8446 B.1) */ #define MG_TLS_CHANGE_CIPHER 20 #define MG_TLS_ALERT 21 @@ -9435,6 +9443,7 @@ int mg_aes_gcm_decrypt(unsigned char *output, const unsigned char *input, #define MG_TLS_SERVER_HELLO 2 #define MG_TLS_ENCRYPTED_EXTENSIONS 8 #define MG_TLS_CERTIFICATE 11 +#define MG_TLS_CERTIFICATE_REQUEST 13 #define MG_TLS_CERTIFICATE_VERIFY 15 #define MG_TLS_FINISHED 20 @@ -9456,6 +9465,20 @@ enum mg_tls_hs_state { MG_TLS_STATE_SERVER_CONNECTED // Done }; +// encryption keys for a TLS connection +struct tls_enc { + uint32_t sseq; // server sequence number, used in encryption + uint32_t cseq; // client sequence number, used in decryption + // keys for AES encryption or ChaCha20 + uint8_t handshake_secret[32]; + uint8_t server_write_key[32]; + uint8_t server_write_iv[12]; + uint8_t server_finished_key[32]; + uint8_t client_write_key[32]; + uint8_t client_write_iv[12]; + uint8_t client_finished_key[32]; +}; + // per-connection TLS data struct tls_data { enum mg_tls_hs_state state; // keep track of connection handshake progress @@ -9468,31 +9491,22 @@ struct tls_data { mg_sha256_ctx sha256; // incremental SHA-256 hash for TLS handshake - uint32_t sseq; // server sequence number, used in encryption - uint32_t cseq; // client sequence number, used in decryption - uint8_t random[32]; // client random from ClientHello uint8_t session_id[32]; // client session ID between the handshake states uint8_t x25519_cli[32]; // client X25519 key between the handshake states uint8_t x25519_sec[32]; // x25519 secret between the handshake states - int skip_verification; // perform checks on server certificate? - struct mg_str server_cert_der; // server certificate in DER format - uint8_t server_key[32]; // server EC private key - char hostname[254]; // server hostname (client extension) + int skip_verification; // perform checks on server certificate? + int cert_requested; // client received a CertificateRequest? + struct mg_str cert_der; // certificate in DER format + uint8_t ec_key[32]; // EC private key + char hostname[254]; // server hostname (client extension) uint8_t certhash[32]; // certificate message hash uint8_t pubkey[64]; // server EC public key to verify cert uint8_t sighash[32]; // server EC public key to verify cert - // keys for AES encryption - uint8_t handshake_secret[32]; - uint8_t server_write_key[16]; - uint8_t server_write_iv[12]; - uint8_t server_finished_key[32]; - uint8_t client_write_key[16]; - uint8_t client_write_iv[12]; - uint8_t client_finished_key[32]; + struct tls_enc enc; }; #define MG_LOAD_BE16(p) ((uint16_t) ((MG_U8P(p)[0] << 8U) | MG_U8P(p)[1])) @@ -9507,15 +9521,7 @@ struct tls_data { #define TLS_RECHDR_SIZE 5 // 1 byte type, 2 bytes version, 2 bytes length #define TLS_MSGHDR_SIZE 4 // 1 byte type, 3 bytes length -#if 1 -static void mg_ssl_key_log(const char *label, uint8_t client_random[32], - uint8_t *secret, size_t secretsz) { - (void) label; - (void) client_random; - (void) secret; - (void) secretsz; -} -#else +#ifdef MG_TLS_SSLKEYLOGFILE #include static void mg_ssl_key_log(const char *label, uint8_t client_random[32], uint8_t *secret, size_t secretsz) { @@ -9662,14 +9668,19 @@ static void mg_tls_generate_handshake_keys(struct mg_connection *c) { uint8_t hello_hash[32]; uint8_t server_hs_secret[32]; uint8_t client_hs_secret[32]; +#if CHACHA20 + const size_t keysz = 32; +#else + const size_t keysz = 16; +#endif mg_hmac_sha256(early_secret, NULL, 0, zeros, sizeof(zeros)); mg_tls_derive_secret("tls13 derived", early_secret, 32, zeros_sha256_digest, 32, pre_extract_secret, 32); - mg_hmac_sha256(tls->handshake_secret, pre_extract_secret, + mg_hmac_sha256(tls->enc.handshake_secret, pre_extract_secret, sizeof(pre_extract_secret), tls->x25519_sec, sizeof(tls->x25519_sec)); - mg_tls_hexdump("hs secret", tls->handshake_secret, 32); + mg_tls_hexdump("hs secret", tls->enc.handshake_secret, 32); // mg_sha256_final is not idempotent, need to copy sha256 context to calculate // the digest @@ -9678,37 +9689,40 @@ static void mg_tls_generate_handshake_keys(struct mg_connection *c) { mg_tls_hexdump("hello hash", hello_hash, 32); // derive keys needed for the rest of the handshake - mg_tls_derive_secret("tls13 s hs traffic", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 s hs traffic", tls->enc.handshake_secret, 32, hello_hash, 32, server_hs_secret, 32); - mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, - tls->server_write_key, 16); - mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, - tls->server_write_iv, 12); - mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, - tls->server_finished_key, 32); - - mg_tls_derive_secret("tls13 c hs traffic", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 c hs traffic", tls->enc.handshake_secret, 32, hello_hash, 32, client_hs_secret, 32); + + mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, + tls->enc.server_write_key, keysz); + mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, + tls->enc.server_write_iv, 12); + mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, + tls->enc.server_finished_key, 32); + mg_tls_derive_secret("tls13 key", client_hs_secret, 32, NULL, 0, - tls->client_write_key, 16); + tls->enc.client_write_key, keysz); mg_tls_derive_secret("tls13 iv", client_hs_secret, 32, NULL, 0, - tls->client_write_iv, 12); + tls->enc.client_write_iv, 12); mg_tls_derive_secret("tls13 finished", client_hs_secret, 32, NULL, 0, - tls->client_finished_key, 32); + tls->enc.client_finished_key, 32); mg_tls_hexdump("s hs traffic", server_hs_secret, 32); - mg_tls_hexdump("s key", tls->server_write_key, 16); - mg_tls_hexdump("s iv", tls->server_write_iv, 12); - mg_tls_hexdump("s finished", tls->server_finished_key, 32); + mg_tls_hexdump("s key", tls->enc.server_write_key, keysz); + mg_tls_hexdump("s iv", tls->enc.server_write_iv, 12); + mg_tls_hexdump("s finished", tls->enc.server_finished_key, 32); mg_tls_hexdump("c hs traffic", client_hs_secret, 32); - mg_tls_hexdump("c key", tls->client_write_key, 16); - mg_tls_hexdump("c iv", tls->client_write_iv, 16); - mg_tls_hexdump("c finished", tls->client_finished_key, 32); + mg_tls_hexdump("c key", tls->enc.client_write_key, keysz); + mg_tls_hexdump("c iv", tls->enc.client_write_iv, 12); + mg_tls_hexdump("c finished", tls->enc.client_finished_key, 32); +#ifdef MG_TLS_SSLKEYLOGFILE mg_ssl_key_log("SERVER_HANDSHAKE_TRAFFIC_SECRET", tls->random, server_hs_secret, 32); mg_ssl_key_log("CLIENT_HANDSHAKE_TRAFFIC_SECRET", tls->random, client_hs_secret, 32); +#endif } static void mg_tls_generate_application_keys(struct mg_connection *c) { @@ -9718,40 +9732,47 @@ static void mg_tls_generate_application_keys(struct mg_connection *c) { uint8_t master_secret[32]; uint8_t server_secret[32]; uint8_t client_secret[32]; +#if CHACHA20 + const size_t keysz = 32; +#else + const size_t keysz = 16; +#endif mg_sha256_ctx sha256; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_tls_derive_secret("tls13 derived", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 derived", tls->enc.handshake_secret, 32, zeros_sha256_digest, 32, premaster_secret, 32); mg_hmac_sha256(master_secret, premaster_secret, 32, zeros, 32); mg_tls_derive_secret("tls13 s ap traffic", master_secret, 32, hash, 32, server_secret, 32); mg_tls_derive_secret("tls13 key", server_secret, 32, NULL, 0, - tls->server_write_key, 16); + tls->enc.server_write_key, keysz); mg_tls_derive_secret("tls13 iv", server_secret, 32, NULL, 0, - tls->server_write_iv, 12); + tls->enc.server_write_iv, 12); mg_tls_derive_secret("tls13 c ap traffic", master_secret, 32, hash, 32, client_secret, 32); mg_tls_derive_secret("tls13 key", client_secret, 32, NULL, 0, - tls->client_write_key, 16); + tls->enc.client_write_key, keysz); mg_tls_derive_secret("tls13 iv", client_secret, 32, NULL, 0, - tls->client_write_iv, 12); + tls->enc.client_write_iv, 12); mg_tls_hexdump("s ap traffic", server_secret, 32); - mg_tls_hexdump("s key", tls->server_write_key, 16); - mg_tls_hexdump("s iv", tls->server_write_iv, 12); - mg_tls_hexdump("s finished", tls->server_finished_key, 32); + mg_tls_hexdump("s key", tls->enc.server_write_key, keysz); + mg_tls_hexdump("s iv", tls->enc.server_write_iv, 12); + mg_tls_hexdump("s finished", tls->enc.server_finished_key, 32); mg_tls_hexdump("c ap traffic", client_secret, 32); - mg_tls_hexdump("c key", tls->client_write_key, 16); - mg_tls_hexdump("c iv", tls->client_write_iv, 16); - mg_tls_hexdump("c finished", tls->client_finished_key, 32); - tls->sseq = tls->cseq = 0; + mg_tls_hexdump("c key", tls->enc.client_write_key, keysz); + mg_tls_hexdump("c iv", tls->enc.client_write_iv, 12); + mg_tls_hexdump("c finished", tls->enc.client_finished_key, 32); + tls->enc.sseq = tls->enc.cseq = 0; +#ifdef MG_TLS_SSLKEYLOGFILE mg_ssl_key_log("SERVER_TRAFFIC_SECRET_0", tls->random, server_secret, 32); mg_ssl_key_log("CLIENT_TRAFFIC_SECRET_0", tls->random, client_secret, 32); +#endif } // AES GCM encryption of the message + put encoded data into the write buffer @@ -9769,21 +9790,21 @@ static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, (uint8_t) (encsz & 0xff)}; uint8_t nonce[12]; - mg_gcm_initialize(); + uint32_t seq = c->is_client ? tls->enc.cseq : tls->enc.sseq; + uint8_t *key = + c->is_client ? tls->enc.client_write_key : tls->enc.server_write_key; + uint8_t *iv = + c->is_client ? tls->enc.client_write_iv : tls->enc.server_write_iv; - if (c->is_client) { - memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); - nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); - } else { - memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); - nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); - } +#if !CHACHA20 + mg_gcm_initialize(); +#endif + + memmove(nonce, iv, sizeof(nonce)); + nonce[8] ^= (uint8_t) ((seq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((seq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((seq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((seq) & 255U); mg_iobuf_add(wio, wio->len, hdr, sizeof(hdr)); mg_iobuf_resize(wio, wio->len + encsz); @@ -9791,17 +9812,18 @@ static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, tag = wio->buf + wio->len + msgsz + 1; memmove(outmsg, msg, msgsz); outmsg[msgsz] = msgtype; - if (c->is_client) { - mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->client_write_key, - sizeof(tls->client_write_key), nonce, sizeof(nonce), - associated_data, sizeof(associated_data), tag, 16); - tls->cseq++; - } else { - mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->server_write_key, - sizeof(tls->server_write_key), nonce, sizeof(nonce), - associated_data, sizeof(associated_data), tag, 16); - tls->sseq++; - } +#if CHACHA20 + (void) tag; // tag is only used in aes gcm + uint8_t enc[8192]; + size_t n = + mg_chacha20_poly1305_encrypt(enc, key, nonce, associated_data, + sizeof(associated_data), outmsg, msgsz + 1); + memmove(outmsg, enc, n); +#else + mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, key, 16, nonce, sizeof(nonce), + associated_data, sizeof(associated_data), tag, 16); +#endif + c->is_client ? tls->enc.cseq++ : tls->enc.sseq++; wio->len += encsz; } @@ -9813,6 +9835,13 @@ static int mg_tls_recv_record(struct mg_connection *c) { uint8_t *msg; uint8_t nonce[12]; int r; + + uint32_t seq = c->is_client ? tls->enc.sseq : tls->enc.cseq; + uint8_t *key = + c->is_client ? tls->enc.server_write_key : tls->enc.client_write_key; + uint8_t *iv = + c->is_client ? tls->enc.server_write_iv : tls->enc.client_write_iv; + if (tls->recv.len > 0) { return 0; /* some data from previous record is still present */ } @@ -9834,43 +9863,47 @@ static int mg_tls_recv_record(struct mg_connection *c) { } } +#if !CHACHA20 mg_gcm_initialize(); +#endif + msgsz = MG_LOAD_BE16(rio->buf + 3); msg = rio->buf + 5; - if (c->is_client) { - memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); - nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); - mg_aes_gcm_decrypt(msg, msg, msgsz - 16, tls->server_write_key, - sizeof(tls->server_write_key), nonce, sizeof(nonce)); - tls->sseq++; - } else { - memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); - nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); - mg_aes_gcm_decrypt(msg, msg, msgsz - 16, tls->client_write_key, - sizeof(tls->client_write_key), nonce, sizeof(nonce)); - tls->cseq++; - } + memmove(nonce, iv, sizeof(nonce)); + nonce[8] ^= (uint8_t) ((seq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((seq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((seq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((seq) & 255U); +#if CHACHA20 + uint8_t dec[8192]; + size_t n = mg_chacha20_poly1305_decrypt(dec, key, nonce, msg, msgsz); + memmove(msg, dec, n); +#else + mg_aes_gcm_decrypt(msg, msg, msgsz - 16, key, 16, nonce, sizeof(nonce)); +#endif r = msgsz - 16 - 1; tls->content_type = msg[msgsz - 16 - 1]; tls->recv.buf = msg; tls->recv.size = tls->recv.len = msgsz - 16 - 1; + c->is_client ? tls->enc.sseq++ : tls->enc.cseq++; return r; } static void mg_tls_calc_cert_verify_hash(struct mg_connection *c, - uint8_t hash[32]) { + uint8_t hash[32], int is_client) { struct tls_data *tls = (struct tls_data *) c->tls; - uint8_t sig_content[130] = { - " " - " " - "TLS 1.3, server CertificateVerify\0"}; + uint8_t server_context[34] = "TLS 1.3, server CertificateVerify"; + uint8_t client_context[34] = "TLS 1.3, client CertificateVerify"; + uint8_t sig_content[130]; mg_sha256_ctx sha256; + + memset(sig_content, 0x20, 64); + if (is_client) { + memmove(sig_content + 64, client_context, sizeof(client_context)); + } else { + memmove(sig_content + 64, server_context, sizeof(server_context)); + } + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(sig_content + 98, &sha256); @@ -9947,51 +9980,28 @@ static void mg_tls_server_send_hello(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; struct mg_iobuf *wio = &tls->send; + // clang-format off uint8_t msg_server_hello[122] = { - // server hello, tls 1.2 - 0x02, - 0x00, - 0x00, - 0x76, - 0x03, - 0x03, - // random (32 bytes) - PLACEHOLDER_32B, - // session ID length + session ID (32 bytes) - 0x20, - PLACEHOLDER_32B, + // server hello, tls 1.2 + 0x02, 0x00, 0x00, 0x76, 0x03, 0x03, + // random (32 bytes) + PLACEHOLDER_32B, + // session ID length + session ID (32 bytes) + 0x20, PLACEHOLDER_32B, #if defined(CHACHA20) && CHACHA20 - // TLS_CHACHA20_POLY1305_SHA256 + no compression - 0x13, - 0x03, - 0x00, + // TLS_CHACHA20_POLY1305_SHA256 + no compression + 0x13, 0x03, 0x00, #else - // TLS_AES_128_GCM_SHA256 + no compression - 0x13, - 0x01, - 0x00, + // TLS_AES_128_GCM_SHA256 + no compression + 0x13, 0x01, 0x00, #endif - // extensions + keyshare - 0x00, - 0x2e, - 0x00, - 0x33, - 0x00, - 0x24, - 0x00, - 0x1d, - 0x00, - 0x20, - // x25519 keyshare - PLACEHOLDER_32B, - // supported versions (tls1.3 == 0x304) - 0x00, - 0x2b, - 0x00, - 0x02, - 0x03, - 0x04 - }; + // extensions + keyshare + 0x00, 0x2e, 0x00, 0x33, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, + // x25519 keyshare + PLACEHOLDER_32B, + // supported versions (tls1.3 == 0x304) + 0x00, 0x2b, 0x00, 0x02, 0x03, 0x04}; + // clang-format on // calculate keyshare uint8_t x25519_pub[X25519_BYTES]; @@ -10026,7 +10036,7 @@ static void mg_tls_server_send_ext(struct mg_connection *c) { static void mg_tls_server_send_cert(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; // server DER certificate (empty) - size_t n = tls->server_cert_der.len; + size_t n = tls->cert_der.len; uint8_t *cert = (uint8_t *) calloc(1, 13 + n); if (cert == NULL) { mg_error(c, "tls cert oom"); @@ -10045,7 +10055,7 @@ static void mg_tls_server_send_cert(struct mg_connection *c) { cert[9] = (uint8_t) (((n) >> 8) & 255U); cert[10] = (uint8_t) (n & 255U); // bytes 11+ are certificate in DER format - memmove(cert + 11, tls->server_cert_der.buf, n); + memmove(cert + 11, tls->cert_der.buf, n); cert[11 + n] = cert[12 + n] = 0; // certificate extensions (none) mg_sha256_update(&tls->sha256, cert, 13 + n); mg_tls_encrypt(c, cert, 13 + n, MG_TLS_HANDSHAKE); @@ -10074,7 +10084,7 @@ static void finish_SHA256(const MG_UECC_HashContext *base, mg_sha256_final(hash_result, &c->ctx); } -static void mg_tls_server_send_cert_verify(struct mg_connection *c) { +static void mg_tls_send_cert_verify(struct mg_connection *c, int is_client) { struct tls_data *tls = (struct tls_data *) c->tls; // server certificate verify packet uint8_t verify[82] = {0x0f, 0x00, 0x00, 0x00, 0x04, 0x03, 0x00, 0x00}; @@ -10086,10 +10096,10 @@ static void mg_tls_server_send_cert_verify(struct mg_connection *c) { int neg1, neg2; uint8_t sig[64] = {0}; - mg_tls_calc_cert_verify_hash(c, (uint8_t *) hash); + mg_tls_calc_cert_verify_hash(c, (uint8_t *) hash, is_client); - mg_uecc_sign_deterministic(tls->server_key, hash, sizeof(hash), &ctx.uECC, - sig, mg_uecc_secp256r1()); + mg_uecc_sign_deterministic(tls->ec_key, hash, sizeof(hash), &ctx.uECC, sig, + mg_uecc_secp256r1()); neg1 = !!(sig[0] & 0x80); neg2 = !!(sig[32] & 0x80); @@ -10119,7 +10129,7 @@ static void mg_tls_server_send_finish(struct mg_connection *c) { uint8_t finish[36] = {0x14, 0, 0, 32}; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_hmac_sha256(finish + 4, tls->server_finished_key, 32, hash, 32); + mg_hmac_sha256(finish + 4, tls->enc.server_finished_key, 32, hash, 32); mg_tls_encrypt(c, finish, sizeof(finish), MG_TLS_HANDSHAKE); mg_io_send(c, wio->buf, wio->len); wio->len = 0; @@ -10151,140 +10161,73 @@ static void mg_tls_client_send_hello(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; struct mg_iobuf *wio = &tls->send; - const char *hostname = tls->hostname; - size_t hostnamesz = strlen(tls->hostname); uint8_t x25519_pub[X25519_BYTES]; - uint8_t msg_client_hello[162 + 32] = { - // TLS Client Hello header reported as TLS1.2 (5) - 0x16, - 0x03, - 0x01, - 0x00, - 0xfe, - // server hello, tls 1.2 (6) - 0x01, - 0x00, - 0x00, - 0x8c, - 0x03, - 0x03, - // random (32 bytes) - PLACEHOLDER_32B, - // session ID length + session ID (32 bytes) - 0x20, - PLACEHOLDER_32B, -#if defined(CHACHA20) && CHACHA20 - // TLS_CHACHA20_POLY1305_SHA256 + no compression - 0x13, - 0x03, - 0x00, -#else - 0x00, - 0x02, // size = 2 bytes - 0x13, - 0x01, // TLS_AES_128_GCM_SHA256 - 0x01, - 0x00, // no compression -#endif - - // extensions + keyshare - 0x00, - 0xfe, - // x25519 keyshare - 0x00, - 0x33, - 0x00, - 0x26, - 0x00, - 0x24, - 0x00, - 0x1d, - 0x00, - 0x20, - PLACEHOLDER_32B, - // supported groups (x25519) - 0x00, - 0x0a, - 0x00, - 0x04, - 0x00, - 0x02, - 0x00, - 0x1d, - // supported versions (tls1.3 == 0x304) - 0x00, - 0x2b, - 0x00, - 0x03, - 0x02, - 0x03, - 0x04, - // session ticket (none) - 0x00, - 0x23, - 0x00, - 0x00, - // signature algorithms (we don't care, so list all the common ones) - 0x00, - 0x0d, - 0x00, - 0x24, - 0x00, - 0x22, - 0x04, - 0x03, - 0x05, - 0x03, - 0x06, - 0x03, - 0x08, - 0x07, - 0x08, - 0x08, - 0x08, - 0x1a, - 0x08, - 0x1b, - 0x08, - 0x1c, - 0x08, - 0x09, - 0x08, - 0x0a, - 0x08, - 0x0b, - 0x08, - 0x04, - 0x08, - 0x05, - 0x08, - 0x06, - 0x04, - 0x01, - 0x05, - 0x01, - 0x06, - 0x01, - // server name - 0x00, - 0x00, - 0x00, - 0xfe, - 0x00, - 0xfe, - 0x00, - 0x00, - 0xfe + // the only signature algorithm we actually support + uint8_t secp256r1_sig_algs[8] = { + 0x00, 0x0d, 0x00, 0x04, 0x00, 0x02, 0x04, 0x03, }; + // all popular signature algorithms (if we don't care about verification) + uint8_t all_sig_algs[34] = { + 0x00, 0x0d, 0x00, 0x1e, 0x00, 0x1c, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, + 0x08, 0x07, 0x08, 0x08, 0x08, 0x09, 0x08, 0x0a, 0x08, 0x0b, 0x08, 0x04, + 0x08, 0x05, 0x08, 0x06, 0x04, 0x01, 0x05, 0x01, 0x06, 0x01}; + uint8_t server_name_ext[9] = {0x00, 0x00, 0x00, 0xfe, 0x00, + 0xfe, 0x00, 0x00, 0xfe}; - // patch ClientHello with correct hostname length + offset: - MG_STORE_BE16(msg_client_hello + 3, hostnamesz + 189); - MG_STORE_BE16(msg_client_hello + 7, hostnamesz + 185); - MG_STORE_BE16(msg_client_hello + 82, hostnamesz + 110); - MG_STORE_BE16(msg_client_hello + 187, hostnamesz + 5); - MG_STORE_BE16(msg_client_hello + 189, hostnamesz + 3); - MG_STORE_BE16(msg_client_hello + 192, hostnamesz); + // clang-format off + uint8_t msg_client_hello[145] = { + // TLS Client Hello header reported as TLS1.2 (5) + 0x16, 0x03, 0x03, 0x00, 0xfe, + // client hello, tls 1.2 (6) + 0x01, 0x00, 0x00, 0x8c, 0x03, 0x03, + // random (32 bytes) + PLACEHOLDER_32B, + // session ID length + session ID (32 bytes) + 0x20, PLACEHOLDER_32B, 0x00, + 0x02, // size = 2 bytes +#if defined(CHACHA20) && CHACHA20 + // TLS_CHACHA20_POLY1305_SHA256 + 0x13, 0x03, +#else + // TLS_AES_128_GCM_SHA256 + 0x13, 0x01, +#endif + // no compression + 0x01, 0x00, + // extensions + keyshare + 0x00, 0xfe, + // x25519 keyshare + 0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, + PLACEHOLDER_32B, + // supported groups (x25519) + 0x00, 0x0a, 0x00, 0x04, 0x00, 0x02, 0x00, 0x1d, + // supported versions (tls1.3 == 0x304) + 0x00, 0x2b, 0x00, 0x03, 0x02, 0x03, 0x04, + // session ticket (none) + 0x00, 0x23, 0x00, 0x00, // 144 bytes till here + }; + // clang-format on + const char *hostname = tls->hostname; + size_t hostnamesz = strlen(tls->hostname); + size_t hostname_extsz = hostnamesz ? hostnamesz + 9 : 0; + uint8_t *sig_alg = tls->skip_verification ? all_sig_algs : secp256r1_sig_algs; + size_t sig_alg_sz = tls->skip_verification ? sizeof(all_sig_algs) + : sizeof(secp256r1_sig_algs); + + // patch ClientHello with correct hostname ext length (if any) + MG_STORE_BE16(msg_client_hello + 3, + hostname_extsz + 183 - 9 - 34 + sig_alg_sz); + MG_STORE_BE16(msg_client_hello + 7, + hostname_extsz + 179 - 9 - 34 + sig_alg_sz); + MG_STORE_BE16(msg_client_hello + 82, + hostname_extsz + 104 - 9 - 34 + sig_alg_sz); + + if (hostnamesz > 0) { + MG_STORE_BE16(server_name_ext + 2, hostnamesz + 5); + MG_STORE_BE16(server_name_ext + 4, hostnamesz + 3); + MG_STORE_BE16(server_name_ext + 7, hostnamesz); + } // calculate keyshare mg_random(tls->x25519_cli, sizeof(tls->x25519_cli)); @@ -10297,12 +10240,18 @@ static void mg_tls_client_send_hello(struct mg_connection *c) { memmove(msg_client_hello + 44, tls->session_id, sizeof(tls->session_id)); memmove(msg_client_hello + 94, x25519_pub, sizeof(x25519_pub)); - // server hello message + // client hello message mg_iobuf_add(wio, wio->len, msg_client_hello, sizeof(msg_client_hello)); - mg_iobuf_add(wio, wio->len, hostname, strlen(hostname)); mg_sha256_update(&tls->sha256, msg_client_hello + 5, sizeof(msg_client_hello) - 5); - mg_sha256_update(&tls->sha256, (uint8_t *) hostname, strlen(hostname)); + mg_iobuf_add(wio, wio->len, sig_alg, sig_alg_sz); + mg_sha256_update(&tls->sha256, sig_alg, sig_alg_sz); + if (hostnamesz > 0) { + mg_iobuf_add(wio, wio->len, server_name_ext, sizeof(server_name_ext)); + mg_iobuf_add(wio, wio->len, hostname, hostnamesz); + mg_sha256_update(&tls->sha256, server_name_ext, sizeof(server_name_ext)); + mg_sha256_update(&tls->sha256, (uint8_t *) hostname, hostnamesz); + } // change cipher message mg_iobuf_add(wio, wio->len, (const char *) "\x14\x03\x03\x00\x01\x01", 6); @@ -10392,6 +10341,12 @@ static int mg_tls_client_recv_cert(struct mg_connection *c) { if (mg_tls_recv_record(c) < 0) { return -1; } + if (tls->recv.buf[0] == MG_TLS_CERTIFICATE_REQUEST) { + MG_VERBOSE(("got certificate request")); + mg_tls_drop_message(c); + tls->cert_requested = 1; + return -1; + } if (tls->recv.buf[0] != MG_TLS_CERTIFICATE) { mg_error(c, "expected server certificate but got msg 0x%02x", tls->recv.buf[0]); @@ -10478,7 +10433,7 @@ static int mg_tls_client_recv_cert(struct mg_connection *c) { } while (0); mg_tls_drop_message(c); - mg_tls_calc_cert_verify_hash(c, tls->sighash); + mg_tls_calc_cert_verify_hash(c, tls->sighash, 0); return 0; } @@ -10560,7 +10515,7 @@ static void mg_tls_client_send_finish(struct mg_connection *c) { uint8_t finish[36] = {0x14, 0, 0, 32}; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_hmac_sha256(finish + 4, tls->client_finished_key, 32, hash, 32); + mg_hmac_sha256(finish + 4, tls->enc.client_finished_key, 32, hash, 32); mg_tls_encrypt(c, finish, sizeof(finish), MG_TLS_HANDSHAKE); mg_io_send(c, wio->buf, wio->len); wio->len = 0; @@ -10601,8 +10556,23 @@ static void mg_tls_client_handshake(struct mg_connection *c) { if (mg_tls_client_recv_finish(c) < 0) { break; } - mg_tls_client_send_finish(c); - mg_tls_generate_application_keys(c); + if (tls->cert_requested) { + /* for mTLS we should generate application keys at this point + * but then restore handshake keys and continue with + * the rest of the handshake */ + struct tls_enc app_keys; + struct tls_enc hs_keys = tls->enc; + mg_tls_generate_application_keys(c); + app_keys = tls->enc; + tls->enc = hs_keys; + mg_tls_server_send_cert(c); + mg_tls_send_cert_verify(c, 1); + mg_tls_client_send_finish(c); + tls->enc = app_keys; + } else { + mg_tls_client_send_finish(c); + mg_tls_generate_application_keys(c); + } tls->state = MG_TLS_STATE_CLIENT_CONNECTED; c->is_tls_hs = 0; break; @@ -10621,7 +10591,7 @@ static void mg_tls_server_handshake(struct mg_connection *c) { mg_tls_generate_handshake_keys(c); mg_tls_server_send_ext(c); mg_tls_server_send_cert(c); - mg_tls_server_send_cert_verify(c); + mg_tls_send_cert_verify(c, 0); mg_tls_server_send_finish(c); tls->state = MG_TLS_STATE_SERVER_NEGOTIATED; // fallthrough @@ -10705,14 +10675,13 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { tls->hostname[opts->name.len] = 0; } - if (c->is_client) { - tls->server_cert_der.buf = NULL; + if (opts->cert.buf == NULL) { + MG_VERBOSE(("no certificate provided")); return; } // parse PEM or DER certificate - if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->server_cert_der) < - 0) { + if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->cert_der) < 0) { MG_ERROR(("Failed to load certificate")); return; } @@ -10737,7 +10706,7 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { if (memcmp(key.buf + 2, "\x02\x01\x01\x04\x20", 5) != 0) { MG_ERROR(("EC private key: ASN.1 bad data")); } - memmove(tls->server_key, key.buf + 7, 32); + memmove(tls->ec_key, key.buf + 7, 32); free((void *) key.buf); } else if (mg_parse_pem(opts->key, mg_str_s("PRIVATE KEY"), &key) == 0) { mg_error(c, "PKCS8 private key format is not supported"); @@ -10750,7 +10719,7 @@ void mg_tls_free(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; if (tls != NULL) { mg_iobuf_free(&tls->send); - free((void *) tls->server_cert_der.buf); + free((void *) tls->cert_der.buf); } free(c->tls); c->tls = NULL; @@ -10778,6 +10747,7 @@ long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { if (r < 0) { return r; } + if (tls->content_type != MG_TLS_APP_DATA) { tls->recv.len = 0; mg_tls_drop_record(c); @@ -10806,6 +10776,1347 @@ void mg_tls_ctx_free(struct mg_mgr *mgr) { } #endif +#ifdef MG_ENABLE_LINES +#line 1 "src/tls_chacha20.c" +#endif +// portable8439 v1.0.1 +// Source: https://github.com/DavyLandman/portable8439 +// Licensed under CC0-1.0 +// Contains poly1305-donna e6ad6e091d30d7f4ec2d4f978be1fcfcbce72781 (Public +// Domain) + + + + +#if MG_TLS == MG_TLS_BUILTIN +// ******* BEGIN: chacha-portable/chacha-portable.h ******** + +#if !defined(__cplusplus) && !defined(_MSC_VER) && \ + (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) +#error "C99 or newer required" +#endif + +#define CHACHA20_KEY_SIZE (32) +#define CHACHA20_NONCE_SIZE (12) + +#if defined(_MSC_VER) || defined(__cplusplus) +// add restrict support +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || defined(__clang__) || \ + defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif +#endif + +// xor data with a ChaCha20 keystream as per RFC8439 +static PORTABLE_8439_DECL void chacha20_xor_stream( + uint8_t *restrict dest, const uint8_t *restrict source, size_t length, + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], uint32_t counter); + +static PORTABLE_8439_DECL void rfc8439_keygen( + uint8_t poly_key[32], const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE]); + +// ******* END: chacha-portable/chacha-portable.h ******** +// ******* BEGIN: poly1305-donna/poly1305-donna.h ******** + +#include + +typedef struct poly1305_context { + size_t aligner; + unsigned char opaque[136]; +} poly1305_context; + +static PORTABLE_8439_DECL void poly1305_init(poly1305_context *ctx, + const unsigned char key[32]); +static PORTABLE_8439_DECL void poly1305_update(poly1305_context *ctx, + const unsigned char *m, + size_t bytes); +static PORTABLE_8439_DECL void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]); + +// ******* END: poly1305-donna/poly1305-donna.h ******** +// ******* BEGIN: chacha-portable.c ******** + +#include +#include + +// this is a fresh implementation of chacha20, based on the description in +// rfc8349 it's such a nice compact algorithm that it is easy to do. In +// relationship to other c implementation this implementation: +// - pure c99 +// - big & little endian support +// - safe for architectures that don't support unaligned reads +// +// Next to this, we try to be fast as possible without resorting inline +// assembly. + +// based on https://sourceforge.net/p/predef/wiki/Endianness/ +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __HAVE_LITTLE_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || \ + defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) || \ + defined(__MIPSEL) || defined(__MIPSEL__) || defined(__XTENSA_EL__) || \ + defined(__AVR__) || defined(LITTLE_ENDIAN) +#define __HAVE_LITTLE_ENDIAN 1 +#endif + +#ifndef TEST_SLOW_PATH +#if defined(__HAVE_LITTLE_ENDIAN) +#define FAST_PATH +#endif +#endif + +#define CHACHA20_STATE_WORDS (16) +#define CHACHA20_BLOCK_SIZE (CHACHA20_STATE_WORDS * sizeof(uint32_t)) + +#ifdef FAST_PATH +#define store_32_le(target, source) memcpy(&(target), source, sizeof(uint32_t)) +#else +#define store_32_le(target, source) \ + target = (uint32_t) (source)[0] | ((uint32_t) (source)[1]) << 8 | \ + ((uint32_t) (source)[2]) << 16 | ((uint32_t) (source)[3]) << 24 +#endif + +static void initialize_state(uint32_t state[CHACHA20_STATE_WORDS], + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], + uint32_t counter) { +#ifdef static_assert + static_assert(sizeof(uint32_t) == 4, + "We don't support systems that do not conform to standard of " + "uint32_t being exact 32bit wide"); +#endif + state[0] = 0x61707865; + state[1] = 0x3320646e; + state[2] = 0x79622d32; + state[3] = 0x6b206574; + store_32_le(state[4], key); + store_32_le(state[5], key + 4); + store_32_le(state[6], key + 8); + store_32_le(state[7], key + 12); + store_32_le(state[8], key + 16); + store_32_le(state[9], key + 20); + store_32_le(state[10], key + 24); + store_32_le(state[11], key + 28); + state[12] = counter; + store_32_le(state[13], nonce); + store_32_le(state[14], nonce + 4); + store_32_le(state[15], nonce + 8); +} + +#define increment_counter(state) (state)[12]++ + +// source: http://blog.regehr.org/archives/1063 +#define rotl32a(x, n) ((x) << (n)) | ((x) >> (32 - (n))) + +#define Qround(a, b, c, d) \ + a += b; \ + d ^= a; \ + d = rotl32a(d, 16); \ + c += d; \ + b ^= c; \ + b = rotl32a(b, 12); \ + a += b; \ + d ^= a; \ + d = rotl32a(d, 8); \ + c += d; \ + b ^= c; \ + b = rotl32a(b, 7); + +#define TIMES16(x) \ + x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) x(9) x(10) x(11) x(12) x(13) \ + x(14) x(15) + +static void core_block(const uint32_t *restrict start, + uint32_t *restrict output) { +// instead of working on the output array, +// we let the compiler allocate 16 local variables on the stack +#define __LV(i) uint32_t __s##i = start[i]; + TIMES16(__LV) + +#define __Q(a, b, c, d) Qround(__s##a, __s##b, __s##c, __s##d) + + for (int i = 0; i < 10; i++) { + __Q(0, 4, 8, 12); + __Q(1, 5, 9, 13); + __Q(2, 6, 10, 14); + __Q(3, 7, 11, 15); + __Q(0, 5, 10, 15); + __Q(1, 6, 11, 12); + __Q(2, 7, 8, 13); + __Q(3, 4, 9, 14); + } + +#define __FIN(i) output[i] = start[i] + __s##i; + TIMES16(__FIN) +} + +#define U8(x) ((uint8_t) ((x) & 0xFF)) + +#ifdef FAST_PATH +#define xor32_le(dst, src, pad) \ + uint32_t __value; \ + memcpy(&__value, src, sizeof(uint32_t)); \ + __value ^= *(pad); \ + memcpy(dst, &__value, sizeof(uint32_t)); +#else +#define xor32_le(dst, src, pad) \ + (dst)[0] = (src)[0] ^ U8(*(pad)); \ + (dst)[1] = (src)[1] ^ U8(*(pad) >> 8); \ + (dst)[2] = (src)[2] ^ U8(*(pad) >> 16); \ + (dst)[3] = (src)[3] ^ U8(*(pad) >> 24); +#endif + +#define index8_32(a, ix) ((a) + ((ix) * sizeof(uint32_t))) + +#define xor32_blocks(dest, source, pad, words) \ + for (unsigned int __i = 0; __i < words; __i++) { \ + xor32_le(index8_32(dest, __i), index8_32(source, __i), (pad) + __i) \ + } + +static void xor_block(uint8_t *restrict dest, const uint8_t *restrict source, + const uint32_t *restrict pad, unsigned int chunk_size) { + unsigned int full_blocks = chunk_size / sizeof(uint32_t); + // have to be carefull, we are going back from uint32 to uint8, so endianess + // matters again + xor32_blocks(dest, source, pad, full_blocks) + + dest += full_blocks * sizeof(uint32_t); + source += full_blocks * sizeof(uint32_t); + pad += full_blocks; + + switch (chunk_size % sizeof(uint32_t)) { + case 1: dest[0] = source[0] ^ U8(*pad); break; + case 2: + dest[0] = source[0] ^ U8(*pad); + dest[1] = source[1] ^ U8(*pad >> 8); + break; + case 3: + dest[0] = source[0] ^ U8(*pad); + dest[1] = source[1] ^ U8(*pad >> 8); + dest[2] = source[2] ^ U8(*pad >> 16); + break; + } +} + +static void chacha20_xor_stream(uint8_t *restrict dest, + const uint8_t *restrict source, size_t length, + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], + uint32_t counter) { + uint32_t state[CHACHA20_STATE_WORDS]; + initialize_state(state, key, nonce, counter); + + uint32_t pad[CHACHA20_STATE_WORDS]; + size_t full_blocks = length / CHACHA20_BLOCK_SIZE; + for (size_t b = 0; b < full_blocks; b++) { + core_block(state, pad); + increment_counter(state); + xor32_blocks(dest, source, pad, CHACHA20_STATE_WORDS) dest += + CHACHA20_BLOCK_SIZE; + source += CHACHA20_BLOCK_SIZE; + } + unsigned int last_block = (unsigned int) (length % CHACHA20_BLOCK_SIZE); + if (last_block > 0) { + core_block(state, pad); + xor_block(dest, source, pad, last_block); + } +} + +#ifdef FAST_PATH +#define serialize(poly_key, result) memcpy(poly_key, result, 32) +#else +#define store32_le(target, source) \ + (target)[0] = U8(*(source)); \ + (target)[1] = U8(*(source) >> 8); \ + (target)[2] = U8(*(source) >> 16); \ + (target)[3] = U8(*(source) >> 24); + +#define serialize(poly_key, result) \ + for (unsigned int i = 0; i < 32 / sizeof(uint32_t); i++) { \ + store32_le(index8_32(poly_key, i), result + i); \ + } +#endif + +static void rfc8439_keygen(uint8_t poly_key[32], + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE]) { + uint32_t state[CHACHA20_STATE_WORDS]; + uint32_t result[CHACHA20_STATE_WORDS]; + initialize_state(state, key, nonce, 0); + core_block(state, result); + serialize(poly_key, result); +} +// ******* END: chacha-portable.c ******** +// ******* BEGIN: poly1305-donna.c ******** + +/* auto detect between 32bit / 64bit */ +#if /* uint128 available on 64bit system*/ \ + (defined(__SIZEOF_INT128__) && \ + defined(__LP64__)) /* MSVC 64bit compiler */ \ + || (defined(_MSC_VER) && defined(_M_X64)) /* gcc >= 4.4 64bit */ \ + || (defined(__GNUC__) && defined(__LP64__) && \ + ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) +#define __GUESS64 +#else +#define __GUESS32 +#endif + +#if defined(POLY1305_8BIT) +/* + poly1305 implementation using 8 bit * 8 bit = 16 bit multiplication and +32 bit addition + + based on the public domain reference version in supercop by djb +static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 51*sizeof(unsigned char) */ +typedef struct poly1305_state_internal_t { + unsigned char buffer[poly1305_block_size]; + size_t leftover; + unsigned char h[17]; + unsigned char r[17]; + unsigned char pad[17]; + unsigned char final; +} poly1305_state_internal_t; + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + st->leftover = 0; + + /* h = 0 */ + for (i = 0; i < 17; i++) st->h[i] = 0; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = key[0] & 0xff; + st->r[1] = key[1] & 0xff; + st->r[2] = key[2] & 0xff; + st->r[3] = key[3] & 0x0f; + st->r[4] = key[4] & 0xfc; + st->r[5] = key[5] & 0xff; + st->r[6] = key[6] & 0xff; + st->r[7] = key[7] & 0x0f; + st->r[8] = key[8] & 0xfc; + st->r[9] = key[9] & 0xff; + st->r[10] = key[10] & 0xff; + st->r[11] = key[11] & 0x0f; + st->r[12] = key[12] & 0xfc; + st->r[13] = key[13] & 0xff; + st->r[14] = key[14] & 0xff; + st->r[15] = key[15] & 0x0f; + st->r[16] = 0; + + /* save pad for later */ + for (i = 0; i < 16; i++) st->pad[i] = key[i + 16]; + st->pad[16] = 0; + + st->final = 0; +} + +static void poly1305_add(unsigned char h[17], const unsigned char c[17]) { + unsigned short u; + unsigned int i; + for (u = 0, i = 0; i < 17; i++) { + u += (unsigned short) h[i] + (unsigned short) c[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } +} + +static void poly1305_squeeze(unsigned char h[17], unsigned long hr[17]) { + unsigned long u; + unsigned int i; + u = 0; + for (i = 0; i < 16; i++) { + u += hr[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } + u += hr[16]; + h[16] = (unsigned char) u & 0x03; + u >>= 2; + u += (u << 2); /* u *= 5; */ + for (i = 0; i < 16; i++) { + u += h[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } + h[16] += (unsigned char) u; +} + +static void poly1305_freeze(unsigned char h[17]) { + const unsigned char minusp[17] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfc}; + unsigned char horig[17], negative; + unsigned int i; + + /* compute h + -p */ + for (i = 0; i < 17; i++) horig[i] = h[i]; + poly1305_add(h, minusp); + + /* select h if h < p, or h + -p if h >= p */ + negative = -(h[16] >> 7); + for (i = 0; i < 17; i++) h[i] ^= negative & (horig[i] ^ h[i]); +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned char hibit = st->final ^ 1; /* 1 << 128 */ + + while (bytes >= poly1305_block_size) { + unsigned long hr[17], u; + unsigned char c[17]; + unsigned int i, j; + + /* h += m */ + for (i = 0; i < 16; i++) c[i] = m[i]; + c[16] = hibit; + poly1305_add(st->h, c); + + /* h *= r */ + for (i = 0; i < 17; i++) { + u = 0; + for (j = 0; j <= i; j++) { + u += (unsigned short) st->h[j] * st->r[i - j]; + } + for (j = i + 1; j < 17; j++) { + unsigned long v = (unsigned short) st->h[j] * st->r[i + 17 - j]; + v = ((v << 8) + (v << 6)); /* v *= (5 << 6); */ + u += v; + } + hr[i] = u; + } + + /* (partial) h %= p */ + poly1305_squeeze(st->h, hr); + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully reduce h */ + poly1305_freeze(st->h); + + /* h = (h + pad) % (1 << 128) */ + poly1305_add(st->h, st->pad); + for (i = 0; i < 16; i++) mac[i] = st->h[i]; + + /* zero out the state */ + for (i = 0; i < 17; i++) st->h[i] = 0; + for (i = 0; i < 17; i++) st->r[i] = 0; + for (i = 0; i < 17; i++) st->pad[i] = 0; +} +#elif defined(POLY1305_16BIT) +/* + poly1305 implementation using 16 bit * 16 bit = 32 bit multiplication +and 32 bit addition static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 18*sizeof(unsigned short) */ +typedef struct poly1305_state_internal_t { + unsigned char buffer[poly1305_block_size]; + size_t leftover; + unsigned short r[10]; + unsigned short h[10]; + unsigned short pad[8]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret two 8 bit unsigned integers as a 16 bit unsigned integer in little + * endian */ +static unsigned short U8TO16(const unsigned char *p) { + return (((unsigned short) (p[0] & 0xff)) | + ((unsigned short) (p[1] & 0xff) << 8)); +} + +/* store a 16 bit unsigned integer as two 8 bit unsigned integers in little + * endian */ +static void U16TO8(unsigned char *p, unsigned short v) { + p[0] = (v) & 0xff; + p[1] = (v >> 8) & 0xff; +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned short t0, t1, t2, t3, t4, t5, t6, t7; + size_t i; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO16(&key[0]); + st->r[0] = (t0) & 0x1fff; + t1 = U8TO16(&key[2]); + st->r[1] = ((t0 >> 13) | (t1 << 3)) & 0x1fff; + t2 = U8TO16(&key[4]); + st->r[2] = ((t1 >> 10) | (t2 << 6)) & 0x1f03; + t3 = U8TO16(&key[6]); + st->r[3] = ((t2 >> 7) | (t3 << 9)) & 0x1fff; + t4 = U8TO16(&key[8]); + st->r[4] = ((t3 >> 4) | (t4 << 12)) & 0x00ff; + st->r[5] = ((t4 >> 1)) & 0x1ffe; + t5 = U8TO16(&key[10]); + st->r[6] = ((t4 >> 14) | (t5 << 2)) & 0x1fff; + t6 = U8TO16(&key[12]); + st->r[7] = ((t5 >> 11) | (t6 << 5)) & 0x1f81; + t7 = U8TO16(&key[14]); + st->r[8] = ((t6 >> 8) | (t7 << 8)) & 0x1fff; + st->r[9] = ((t7 >> 5)) & 0x007f; + + /* h = 0 */ + for (i = 0; i < 10; i++) st->h[i] = 0; + + /* save pad for later */ + for (i = 0; i < 8; i++) st->pad[i] = U8TO16(&key[16 + (2 * i)]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned short hibit = (st->final) ? 0 : (1 << 11); /* 1 << 128 */ + unsigned short t0, t1, t2, t3, t4, t5, t6, t7; + unsigned long d[10]; + unsigned long c; + + while (bytes >= poly1305_block_size) { + size_t i, j; + + /* h += m[i] */ + t0 = U8TO16(&m[0]); + st->h[0] += (t0) & 0x1fff; + t1 = U8TO16(&m[2]); + st->h[1] += ((t0 >> 13) | (t1 << 3)) & 0x1fff; + t2 = U8TO16(&m[4]); + st->h[2] += ((t1 >> 10) | (t2 << 6)) & 0x1fff; + t3 = U8TO16(&m[6]); + st->h[3] += ((t2 >> 7) | (t3 << 9)) & 0x1fff; + t4 = U8TO16(&m[8]); + st->h[4] += ((t3 >> 4) | (t4 << 12)) & 0x1fff; + st->h[5] += ((t4 >> 1)) & 0x1fff; + t5 = U8TO16(&m[10]); + st->h[6] += ((t4 >> 14) | (t5 << 2)) & 0x1fff; + t6 = U8TO16(&m[12]); + st->h[7] += ((t5 >> 11) | (t6 << 5)) & 0x1fff; + t7 = U8TO16(&m[14]); + st->h[8] += ((t6 >> 8) | (t7 << 8)) & 0x1fff; + st->h[9] += ((t7 >> 5)) | hibit; + + /* h *= r, (partial) h %= p */ + for (i = 0, c = 0; i < 10; i++) { + d[i] = c; + for (j = 0; j < 10; j++) { + d[i] += (unsigned long) st->h[j] * + ((j <= i) ? st->r[i - j] : (5 * st->r[i + 10 - j])); + /* Sum(h[i] * r[i] * 5) will overflow slightly above 6 products with an + * unclamped r, so carry at 5 */ + if (j == 4) { + c = (d[i] >> 13); + d[i] &= 0x1fff; + } + } + c += (d[i] >> 13); + d[i] &= 0x1fff; + } + c = ((c << 2) + c); /* c *= 5 */ + c += d[0]; + d[0] = ((unsigned short) c & 0x1fff); + c = (c >> 13); + d[1] += c; + + for (i = 0; i < 10; i++) st->h[i] = (unsigned short) d[i]; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned short c; + unsigned short g[10]; + unsigned short mask; + unsigned long f; + size_t i; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + c = st->h[1] >> 13; + st->h[1] &= 0x1fff; + for (i = 2; i < 10; i++) { + st->h[i] += c; + c = st->h[i] >> 13; + st->h[i] &= 0x1fff; + } + st->h[0] += (c * 5); + c = st->h[0] >> 13; + st->h[0] &= 0x1fff; + st->h[1] += c; + c = st->h[1] >> 13; + st->h[1] &= 0x1fff; + st->h[2] += c; + + /* compute h + -p */ + g[0] = st->h[0] + 5; + c = g[0] >> 13; + g[0] &= 0x1fff; + for (i = 1; i < 10; i++) { + g[i] = st->h[i] + c; + c = g[i] >> 13; + g[i] &= 0x1fff; + } + + /* select h if h < p, or h + -p if h >= p */ + mask = (c ^ 1) - 1; + for (i = 0; i < 10; i++) g[i] &= mask; + mask = ~mask; + for (i = 0; i < 10; i++) st->h[i] = (st->h[i] & mask) | g[i]; + + /* h = h % (2^128) */ + st->h[0] = ((st->h[0]) | (st->h[1] << 13)) & 0xffff; + st->h[1] = ((st->h[1] >> 3) | (st->h[2] << 10)) & 0xffff; + st->h[2] = ((st->h[2] >> 6) | (st->h[3] << 7)) & 0xffff; + st->h[3] = ((st->h[3] >> 9) | (st->h[4] << 4)) & 0xffff; + st->h[4] = ((st->h[4] >> 12) | (st->h[5] << 1) | (st->h[6] << 14)) & 0xffff; + st->h[5] = ((st->h[6] >> 2) | (st->h[7] << 11)) & 0xffff; + st->h[6] = ((st->h[7] >> 5) | (st->h[8] << 8)) & 0xffff; + st->h[7] = ((st->h[8] >> 8) | (st->h[9] << 5)) & 0xffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long) st->h[0] + st->pad[0]; + st->h[0] = (unsigned short) f; + for (i = 1; i < 8; i++) { + f = (unsigned long) st->h[i] + st->pad[i] + (f >> 16); + st->h[i] = (unsigned short) f; + } + + for (i = 0; i < 8; i++) U16TO8(mac + (i * 2), st->h[i]); + + /* zero out the state */ + for (i = 0; i < 10; i++) st->h[i] = 0; + for (i = 0; i < 10; i++) st->r[i] = 0; + for (i = 0; i < 8; i++) st->pad[i] = 0; +} +#elif defined(POLY1305_32BIT) || \ + (!defined(POLY1305_64BIT) && defined(__GUESS32)) +/* + poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication +and 64 bit addition static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */ +typedef struct poly1305_state_internal_t { + unsigned long r[5]; + unsigned long h[5]; + unsigned long pad[4]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little + * endian */ +static unsigned long U8TO32(const unsigned char *p) { + return (((unsigned long) (p[0] & 0xff)) | + ((unsigned long) (p[1] & 0xff) << 8) | + ((unsigned long) (p[2] & 0xff) << 16) | + ((unsigned long) (p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little + * endian */ +static void U32TO8(unsigned char *p, unsigned long v) { + p[0] = (unsigned char) ((v) & 0xff); + p[1] = (unsigned char) ((v >> 8) & 0xff); + p[2] = (unsigned char) ((v >> 16) & 0xff); + p[3] = (unsigned char) ((v >> 24) & 0xff); +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[0])) & 0x3ffffff; + st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned long hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */ + unsigned long r0, r1, r2, r3, r4; + unsigned long s1, s2, s3, s4; + unsigned long h0, h1, h2, h3, h4; + unsigned long long d0, d1, d2, d3, d4; + unsigned long c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= poly1305_block_size) { + /* h += m[i] */ + h0 += (U8TO32(m + 0)) & 0x3ffffff; + h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m + 12) >> 8) | hibit; + + /* h *= r */ + d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) + + ((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) + + ((unsigned long long) h4 * s1); + d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) + + ((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) + + ((unsigned long long) h4 * s2); + d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) + + ((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) + + ((unsigned long long) h4 * s3); + d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) + + ((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) + + ((unsigned long long) h4 * s4); + d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) + + ((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) + + ((unsigned long long) h4 * r0); + + /* (partial) h %= p */ + c = (unsigned long) (d0 >> 26); + h0 = (unsigned long) d0 & 0x3ffffff; + d1 += c; + c = (unsigned long) (d1 >> 26); + h1 = (unsigned long) d1 & 0x3ffffff; + d2 += c; + c = (unsigned long) (d2 >> 26); + h2 = (unsigned long) d2 & 0x3ffffff; + d3 += c; + c = (unsigned long) (d3 >> 26); + h3 = (unsigned long) d3 & 0x3ffffff; + d4 += c; + c = (unsigned long) (d4 >> 26); + h4 = (unsigned long) d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long h0, h1, h2, h3, h4, c; + unsigned long g0, g1, g2, g3, g4; + unsigned long long f; + unsigned long mask; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; + h1 = h1 & 0x3ffffff; + h2 += c; + c = h2 >> 26; + h2 = h2 & 0x3ffffff; + h3 += c; + c = h3 >> 26; + h3 = h3 & 0x3ffffff; + h4 += c; + c = h4 >> 26; + h4 = h4 & 0x3ffffff; + h0 += c * 5; + c = h0 >> 26; + h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 26; + g0 &= 0x3ffffff; + g1 = h1 + c; + c = g1 >> 26; + g1 &= 0x3ffffff; + g2 = h2 + c; + c = g2 >> 26; + g2 &= 0x3ffffff; + g3 = h3 + c; + c = g3 >> 26; + g3 &= 0x3ffffff; + g4 = h4 + c - (1UL << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long long) h0 + st->pad[0]; + h0 = (unsigned long) f; + f = (unsigned long long) h1 + st->pad[1] + (f >> 32); + h1 = (unsigned long) f; + f = (unsigned long long) h2 + st->pad[2] + (f >> 32); + h2 = (unsigned long) f; + f = (unsigned long long) h3 + st->pad[3] + (f >> 32); + h3 = (unsigned long) f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} + +#else +/* + poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication +and 128 bit addition static */ + +#if defined(_MSC_VER) + +typedef struct uint128_t { + uint64_t lo; + uint64_t hi; +} uint128_t; + +#define MUL128(out, x, y) out.lo = _umul128((x), (y), &out.hi) +#define ADD(out, in) \ + { \ + unsigned long long t = out.lo; \ + out.lo += in.lo; \ + out.hi += (out.lo < t) + in.hi; \ + } +#define ADDLO(out, in) \ + { \ + unsigned long long t = out.lo; \ + out.lo += in; \ + out.hi += (out.lo < t); \ + } +#define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift))) +#define LO(in) (in.lo) + +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#if defined(__SIZEOF_INT128__) +// Get rid of GCC warning "ISO C does not support '__int128' types" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef unsigned __int128 uint128_t; +#pragma GCC diagnostic pop +#else +typedef unsigned uint128_t __attribute__((mode(TI))); +#endif + +#define MUL128(out, x, y) out = ((uint128_t) x * y) +#define ADD(out, in) out += in +#define ADDLO(out, in) out += in +#define SHR(in, shift) (unsigned long long) (in >> (shift)) +#define LO(in) (unsigned long long) (in) + +#define POLY1305_NOINLINE __attribute__((noinline)) +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 8*sizeof(unsigned long long) */ +typedef struct poly1305_state_internal_t { + unsigned long long r[3]; + unsigned long long h[3]; + unsigned long long pad[2]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in + * little endian */ +static unsigned long long U8TO64(const unsigned char *p) { + return (((unsigned long long) (p[0] & 0xff)) | + ((unsigned long long) (p[1] & 0xff) << 8) | + ((unsigned long long) (p[2] & 0xff) << 16) | + ((unsigned long long) (p[3] & 0xff) << 24) | + ((unsigned long long) (p[4] & 0xff) << 32) | + ((unsigned long long) (p[5] & 0xff) << 40) | + ((unsigned long long) (p[6] & 0xff) << 48) | + ((unsigned long long) (p[7] & 0xff) << 56)); +} + +/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little + * endian */ +static void U64TO8(unsigned char *p, unsigned long long v) { + p[0] = (unsigned char) ((v) & 0xff); + p[1] = (unsigned char) ((v >> 8) & 0xff); + p[2] = (unsigned char) ((v >> 16) & 0xff); + p[3] = (unsigned char) ((v >> 24) & 0xff); + p[4] = (unsigned char) ((v >> 32) & 0xff); + p[5] = (unsigned char) ((v >> 40) & 0xff); + p[6] = (unsigned char) ((v >> 48) & 0xff); + p[7] = (unsigned char) ((v >> 56) & 0xff); +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long long t0, t1; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO64(&key[0]); + t1 = U8TO64(&key[8]); + + st->r[0] = (t0) & 0xffc0fffffff; + st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; + st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + + /* save pad for later */ + st->pad[0] = U8TO64(&key[16]); + st->pad[1] = U8TO64(&key[24]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned long long hibit = + (st->final) ? 0 : ((unsigned long long) 1 << 40); /* 1 << 128 */ + unsigned long long r0, r1, r2; + unsigned long long s1, s2; + unsigned long long h0, h1, h2; + unsigned long long c; + uint128_t d0, d1, d2, d; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + s1 = r1 * (5 << 2); + s2 = r2 * (5 << 2); + + while (bytes >= poly1305_block_size) { + unsigned long long t0, t1; + + /* h += m[i] */ + t0 = U8TO64(&m[0]); + t1 = U8TO64(&m[8]); + + h0 += ((t0) & 0xfffffffffff); + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); + h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit; + + /* h *= r */ + MUL128(d0, h0, r0); + MUL128(d, h1, s2); + ADD(d0, d); + MUL128(d, h2, s1); + ADD(d0, d); + MUL128(d1, h0, r1); + MUL128(d, h1, r0); + ADD(d1, d); + MUL128(d, h2, s2); + ADD(d1, d); + MUL128(d2, h0, r2); + MUL128(d, h1, r1); + ADD(d2, d); + MUL128(d, h2, r0); + ADD(d2, d); + + /* (partial) h %= p */ + c = SHR(d0, 44); + h0 = LO(d0) & 0xfffffffffff; + ADDLO(d1, c); + c = SHR(d1, 44); + h1 = LO(d1) & 0xfffffffffff; + ADDLO(d2, c); + c = SHR(d2, 42); + h2 = LO(d2) & 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 = h0 & 0xfffffffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long long h0, h1, h2, c; + unsigned long long g0, g1, g2; + unsigned long long t0, t1; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i] = 1; + for (i = i + 1; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += c; + c = (h2 >> 42); + h2 &= 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += c; + c = (h2 >> 42); + h2 &= 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = (g0 >> 44); + g0 &= 0xfffffffffff; + g1 = h1 + c; + c = (g1 >> 44); + g1 &= 0xfffffffffff; + g2 = h2 + c - ((unsigned long long) 1 << 42); + + /* select h if h < p, or h + -p if h >= p */ + c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1; + g0 &= c; + g1 &= c; + g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + /* h = (h + pad) */ + t0 = st->pad[0]; + t1 = st->pad[1]; + + h0 += ((t0) & 0xfffffffffff); + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += (((t1 >> 24)) & 0x3ffffffffff) + c; + h2 &= 0x3ffffffffff; + + /* mac = h % (2^128) */ + h0 = ((h0) | (h1 << 44)); + h1 = ((h1 >> 20) | (h2 << 24)); + + U64TO8(&mac[0], h0); + U64TO8(&mac[8], h1); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->pad[0] = 0; + st->pad[1] = 0; +} + +#endif + +static void poly1305_update(poly1305_context *ctx, const unsigned char *m, + size_t bytes) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + /* handle leftover */ + if (st->leftover) { + size_t want = (poly1305_block_size - st->leftover); + if (want > bytes) want = bytes; + for (i = 0; i < want; i++) st->buffer[st->leftover + i] = m[i]; + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < poly1305_block_size) return; + poly1305_blocks(st, st->buffer, poly1305_block_size); + st->leftover = 0; + } + + /* process full blocks */ + if (bytes >= poly1305_block_size) { + size_t want = (bytes & (size_t) ~(poly1305_block_size - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) st->buffer[st->leftover + i] = m[i]; + st->leftover += bytes; + } +} + +// ******* END: poly1305-donna.c ******** +// ******* BEGIN: portable8439.c ******** + +#define __CHACHA20_BLOCK_SIZE (64) +#define __POLY1305_KEY_SIZE (32) + +static PORTABLE_8439_DECL uint8_t __ZEROES[16] = {0}; +static PORTABLE_8439_DECL void pad_if_needed(poly1305_context *ctx, + size_t size) { + size_t padding = size % 16; + if (padding != 0) { + poly1305_update(ctx, __ZEROES, 16 - padding); + } +} + +#define __u8(v) ((uint8_t) ((v) & 0xFF)) + +// TODO: make this depending on the unaligned/native read size possible +static PORTABLE_8439_DECL void write_64bit_int(poly1305_context *ctx, + uint64_t value) { + uint8_t result[8]; + result[0] = __u8(value); + result[1] = __u8(value >> 8); + result[2] = __u8(value >> 16); + result[3] = __u8(value >> 24); + result[4] = __u8(value >> 32); + result[5] = __u8(value >> 40); + result[6] = __u8(value >> 48); + result[7] = __u8(value >> 56); + poly1305_update(ctx, result, 8); +} + +static PORTABLE_8439_DECL void poly1305_calculate_mac( + uint8_t *mac, const uint8_t *cipher_text, size_t cipher_text_size, + const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *ad, + size_t ad_size) { + // init poly key (section 2.6) + uint8_t poly_key[__POLY1305_KEY_SIZE] = {0}; + rfc8439_keygen(poly_key, key, nonce); + // start poly1305 mac + poly1305_context poly_ctx; + poly1305_init(&poly_ctx, poly_key); + + if (ad != NULL && ad_size > 0) { + // write AD if present + poly1305_update(&poly_ctx, ad, ad_size); + pad_if_needed(&poly_ctx, ad_size); + } + + // now write the cipher text + poly1305_update(&poly_ctx, cipher_text, cipher_text_size); + pad_if_needed(&poly_ctx, cipher_text_size); + + // write sizes + write_64bit_int(&poly_ctx, ad_size); + write_64bit_int(&poly_ctx, cipher_text_size); + + // calculate MAC + poly1305_finish(&poly_ctx, mac); +} + +#define PM(p) ((size_t) (p)) + +// pointers overlap if the smaller either ahead of the end, +// or its end is before the start of the other +// +// s_size should be smaller or equal to b_size +#define OVERLAPPING(s, s_size, b, b_size) \ + (PM(s) < PM((b) + (b_size))) && (PM(b) < PM((s) + (s_size))) + +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_encrypt( + uint8_t *restrict cipher_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *restrict ad, + size_t ad_size, const uint8_t *restrict plain_text, + size_t plain_text_size) { + size_t new_size = plain_text_size + RFC_8439_TAG_SIZE; + if (OVERLAPPING(plain_text, plain_text_size, cipher_text, new_size)) { + return (size_t) -1; + } + chacha20_xor_stream(cipher_text, plain_text, plain_text_size, key, nonce, 1); + poly1305_calculate_mac(cipher_text + plain_text_size, cipher_text, + plain_text_size, key, nonce, ad, ad_size); + return new_size; +} + +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_decrypt( + uint8_t *restrict plain_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], + const uint8_t *restrict cipher_text, size_t cipher_text_size) { + // first we calculate the mac and see if it lines up, only then do we decrypt + size_t actual_size = cipher_text_size - RFC_8439_TAG_SIZE; + if (OVERLAPPING(plain_text, actual_size, cipher_text, cipher_text_size)) { + return (size_t) -1; + } + + chacha20_xor_stream(plain_text, cipher_text, actual_size, key, nonce, 1); + return actual_size; +} +// ******* END: portable8439.c ******** +#endif // MG_TLS == MG_TLS_BUILTIN + #ifdef MG_ENABLE_LINES #line 1 "src/tls_dummy.c" #endif @@ -11173,6 +12484,19 @@ static int mg_bio_write(BIO *bio, const char *buf, int len) { return len; } +#ifdef MG_TLS_SSLKEYLOGFILE +static void ssl_keylog_cb(const SSL *ssl, const char *line) { + char *keylogfile = getenv("SSLKEYLOGFILE"); + if (keylogfile == NULL) { + return; + } + FILE *f = fopen(keylogfile, "a"); + fprintf(f, "%s\n", line); + fflush(f); + fclose(f); +} +#endif + void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { struct mg_tls *tls = (struct mg_tls *) calloc(1, sizeof(*tls)); const char *id = "mongoose"; @@ -11192,6 +12516,9 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { MG_DEBUG(("%lu Setting TLS", c->id)); tls->ctx = c->is_client ? SSL_CTX_new(SSLv23_client_method()) : SSL_CTX_new(SSLv23_server_method()); +#if MG_TLS_SSLKEYLOGFILE + SSL_CTX_set_keylog_callback(tls->ctx, ssl_keylog_cb); +#endif if ((tls->ssl = SSL_new(tls->ctx)) == NULL) { mg_error(c, "SSL_new"); goto fail; diff --git a/mongoose.h b/mongoose.h index ae5bf136..f538b32f 100644 --- a/mongoose.h +++ b/mongoose.h @@ -1936,6 +1936,116 @@ typedef uint64_t mg_uecc_word_t; #endif /* _UECC_TYPES_H_ */ // End of uecc BSD-2 +// portable8439 v1.0.1 +// Source: https://github.com/DavyLandman/portable8439 +// Licensed under CC0-1.0 +// Contains poly1305-donna e6ad6e091d30d7f4ec2d4f978be1fcfcbce72781 (Public +// Domain) + + + + +#ifndef __PORTABLE_8439_H +#define __PORTABLE_8439_H +#if defined(__cplusplus) +extern "C" { +#endif + +// provide your own decl specificier like -DPORTABLE_8439_DECL=ICACHE_RAM_ATTR +#ifndef PORTABLE_8439_DECL +#define PORTABLE_8439_DECL +#endif + +/* + This library implements RFC 8439 a.k.a. ChaCha20-Poly1305 AEAD + + You can use this library to avoid attackers mutating or reusing your + encrypted messages. This does assume you never reuse a nonce+key pair and, + if possible, carefully pick your associated data. +*/ + +// Make sure we are either nested in C++ or running in a C99+ compiler +#if !defined(__cplusplus) && !defined(_MSC_VER) && \ + (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) +#error "C99 or newer required" +#endif + +// #if CHAR_BIT > 8 +// # error "Systems without native octals not suppoted" +// #endif + +#if defined(_MSC_VER) || defined(__cplusplus) +// add restrict support is possible +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || defined(__clang__) || \ + defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif +#endif + +#define RFC_8439_TAG_SIZE (16) +#define RFC_8439_KEY_SIZE (32) +#define RFC_8439_NONCE_SIZE (12) + +/* + Encrypt/Seal plain text bytes into a cipher text that can only be + decrypted by knowing the key, nonce and associated data. + + input: + - key: RFC_8439_KEY_SIZE bytes that all parties have agreed + upon beforehand + - nonce: RFC_8439_NONCE_SIZE bytes that should never be repeated + for the same key. A counter or a pseudo-random value are fine. + - ad: associated data to include with calculating the tag of the + cipher text. Can be null for empty. + - plain_text: data to be encrypted, pointer + size should not overlap + with cipher_text pointer + + output: + - cipher_text: encrypted plain_text with a tag appended. Make sure to + allocate at least plain_text_size + RFC_8439_TAG_SIZE + + returns: + - size of bytes written to cipher_text, can be -1 if overlapping + pointers are passed for plain_text and cipher_text +*/ +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_encrypt( + uint8_t *restrict cipher_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *restrict ad, + size_t ad_size, const uint8_t *restrict plain_text, size_t plain_text_size); + +/* + Decrypt/unseal cipher text given the right key, nonce, and additional data. + + input: + - key: RFC_8439_KEY_SIZE bytes that all parties have agreed + upon beforehand + - nonce: RFC_8439_NONCE_SIZE bytes that should never be repeated for + the same key. A counter or a pseudo-random value are fine. + - ad: associated data to include with calculating the tag of the + cipher text. Can be null for empty. + - cipher_text: encrypted message. + + output: + - plain_text: data to be encrypted, pointer + size should not overlap + with cipher_text pointer, leave at least enough room for + cipher_text_size - RFC_8439_TAG_SIZE + + returns: + - size of bytes written to plain_text, -1 signals either: + - incorrect key/nonce/ad + - corrupted cipher_text + - overlapping pointers are passed for plain_text and cipher_text +*/ +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_decrypt( + uint8_t *restrict plain_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], + const uint8_t *restrict cipher_text, size_t cipher_text_size); +#if defined(__cplusplus) +} +#endif +#endif struct mg_connection; @@ -2940,49 +3050,6 @@ struct mg_tcpip_driver_tm4c_data { #endif -#if MG_ENABLE_TCPIP && defined(MG_ENABLE_DRIVER_W5500) && MG_ENABLE_DRIVER_W5500 - -#undef MG_ENABLE_TCPIP_DRIVER_INIT -#define MG_ENABLE_TCPIP_DRIVER_INIT 0 - -#endif - - -#if MG_ENABLE_TCPIP && defined(MG_ENABLE_DRIVER_XMC7) && MG_ENABLE_DRIVER_XMC7 - -struct mg_tcpip_driver_xmc7_data { - int mdc_cr; // Valid values: -1, 0, 1, 2, 3, 4, 5 - uint8_t phy_addr; -}; - -#ifndef MG_TCPIP_PHY_ADDR -#define MG_TCPIP_PHY_ADDR 0 -#endif - -#ifndef MG_DRIVER_MDC_CR -#define MG_DRIVER_MDC_CR 3 -#endif - -#define MG_TCPIP_DRIVER_INIT(mgr) \ - do { \ - static struct mg_tcpip_driver_xmc7_data driver_data_; \ - static struct mg_tcpip_if mif_; \ - driver_data_.mdc_cr = MG_DRIVER_MDC_CR; \ - driver_data_.phy_addr = MG_TCPIP_PHY_ADDR; \ - mif_.ip = MG_TCPIP_IP; \ - mif_.mask = MG_TCPIP_MASK; \ - mif_.gw = MG_TCPIP_GW; \ - mif_.driver = &mg_tcpip_driver_xmc7; \ - mif_.driver_data = &driver_data_; \ - MG_SET_MAC_ADDRESS(mif_.mac); \ - mg_tcpip_init(mgr, &mif_); \ - MG_INFO(("Driver: xmc7, MAC: %M", mg_print_mac, mif_.mac)); \ - } while (0) - -#endif - - - #if MG_ENABLE_TCPIP && defined(MG_ENABLE_DRIVER_XMC) && MG_ENABLE_DRIVER_XMC struct mg_tcpip_driver_xmc_data { @@ -3029,6 +3096,41 @@ struct mg_tcpip_driver_xmc_data { #endif + +#if MG_ENABLE_TCPIP && defined(MG_ENABLE_DRIVER_XMC7) && MG_ENABLE_DRIVER_XMC7 + +struct mg_tcpip_driver_xmc7_data { + int mdc_cr; // Valid values: -1, 0, 1, 2, 3, 4, 5 + uint8_t phy_addr; +}; + +#ifndef MG_TCPIP_PHY_ADDR +#define MG_TCPIP_PHY_ADDR 0 +#endif + +#ifndef MG_DRIVER_MDC_CR +#define MG_DRIVER_MDC_CR 3 +#endif + +#define MG_TCPIP_DRIVER_INIT(mgr) \ + do { \ + static struct mg_tcpip_driver_xmc7_data driver_data_; \ + static struct mg_tcpip_if mif_; \ + driver_data_.mdc_cr = MG_DRIVER_MDC_CR; \ + driver_data_.phy_addr = MG_TCPIP_PHY_ADDR; \ + mif_.ip = MG_TCPIP_IP; \ + mif_.mask = MG_TCPIP_MASK; \ + mif_.gw = MG_TCPIP_GW; \ + mif_.driver = &mg_tcpip_driver_xmc7; \ + mif_.driver_data = &driver_data_; \ + MG_SET_MAC_ADDRESS(mif_.mac); \ + mg_tcpip_init(mgr, &mif_); \ + MG_INFO(("Driver: xmc7, MAC: %M", mg_print_mac, mif_.mac)); \ + } while (0) + +#endif + + #ifdef __cplusplus } #endif diff --git a/src/net.c b/src/net.c index de2d87b9..2bc12c41 100644 --- a/src/net.c +++ b/src/net.c @@ -85,7 +85,7 @@ static bool mg_aton6(struct mg_str str, struct mg_addr *addr) { if ((str.buf[i] >= '0' && str.buf[i] <= '9') || (str.buf[i] >= 'a' && str.buf[i] <= 'f') || (str.buf[i] >= 'A' && str.buf[i] <= 'F')) { - unsigned long val; // TODO(): This loops on chars, refactor + unsigned long val = 0; // TODO(): This loops on chars, refactor if (i > j + 3) return false; // MG_DEBUG(("%lu %lu [%.*s]", i, j, (int) (i - j + 1), &str.buf[j])); mg_str_to_num(mg_str_n(&str.buf[j], i - j + 1), 16, &val, sizeof(val)); diff --git a/src/tls_builtin.c b/src/tls_builtin.c index 781fdb9d..920f9273 100644 --- a/src/tls_builtin.c +++ b/src/tls_builtin.c @@ -1,9 +1,17 @@ +#include "base64.h" +#include "config.h" +#include "printf.h" +#include "sha256.h" #include "tls.h" #include "tls_aes128.h" +#include "tls_chacha20.h" +#include "tls_uecc.h" #include "tls_x25519.h" #if MG_TLS == MG_TLS_BUILTIN +#define CHACHA20 1 + /* TLS 1.3 Record Content Type (RFC8446 B.1) */ #define MG_TLS_CHANGE_CIPHER 20 #define MG_TLS_ALERT 21 @@ -16,6 +24,7 @@ #define MG_TLS_SERVER_HELLO 2 #define MG_TLS_ENCRYPTED_EXTENSIONS 8 #define MG_TLS_CERTIFICATE 11 +#define MG_TLS_CERTIFICATE_REQUEST 13 #define MG_TLS_CERTIFICATE_VERIFY 15 #define MG_TLS_FINISHED 20 @@ -37,6 +46,20 @@ enum mg_tls_hs_state { MG_TLS_STATE_SERVER_CONNECTED // Done }; +// encryption keys for a TLS connection +struct tls_enc { + uint32_t sseq; // server sequence number, used in encryption + uint32_t cseq; // client sequence number, used in decryption + // keys for AES encryption or ChaCha20 + uint8_t handshake_secret[32]; + uint8_t server_write_key[32]; + uint8_t server_write_iv[12]; + uint8_t server_finished_key[32]; + uint8_t client_write_key[32]; + uint8_t client_write_iv[12]; + uint8_t client_finished_key[32]; +}; + // per-connection TLS data struct tls_data { enum mg_tls_hs_state state; // keep track of connection handshake progress @@ -49,31 +72,22 @@ struct tls_data { mg_sha256_ctx sha256; // incremental SHA-256 hash for TLS handshake - uint32_t sseq; // server sequence number, used in encryption - uint32_t cseq; // client sequence number, used in decryption - uint8_t random[32]; // client random from ClientHello uint8_t session_id[32]; // client session ID between the handshake states uint8_t x25519_cli[32]; // client X25519 key between the handshake states uint8_t x25519_sec[32]; // x25519 secret between the handshake states - int skip_verification; // perform checks on server certificate? - struct mg_str server_cert_der; // server certificate in DER format - uint8_t server_key[32]; // server EC private key - char hostname[254]; // server hostname (client extension) + int skip_verification; // perform checks on server certificate? + int cert_requested; // client received a CertificateRequest? + struct mg_str cert_der; // certificate in DER format + uint8_t ec_key[32]; // EC private key + char hostname[254]; // server hostname (client extension) uint8_t certhash[32]; // certificate message hash uint8_t pubkey[64]; // server EC public key to verify cert uint8_t sighash[32]; // server EC public key to verify cert - // keys for AES encryption - uint8_t handshake_secret[32]; - uint8_t server_write_key[16]; - uint8_t server_write_iv[12]; - uint8_t server_finished_key[32]; - uint8_t client_write_key[16]; - uint8_t client_write_iv[12]; - uint8_t client_finished_key[32]; + struct tls_enc enc; }; #define MG_LOAD_BE16(p) ((uint16_t) ((MG_U8P(p)[0] << 8U) | MG_U8P(p)[1])) @@ -88,15 +102,7 @@ struct tls_data { #define TLS_RECHDR_SIZE 5 // 1 byte type, 2 bytes version, 2 bytes length #define TLS_MSGHDR_SIZE 4 // 1 byte type, 3 bytes length -#if 1 -static void mg_ssl_key_log(const char *label, uint8_t client_random[32], - uint8_t *secret, size_t secretsz) { - (void) label; - (void) client_random; - (void) secret; - (void) secretsz; -} -#else +#ifdef MG_TLS_SSLKEYLOGFILE #include static void mg_ssl_key_log(const char *label, uint8_t client_random[32], uint8_t *secret, size_t secretsz) { @@ -243,14 +249,19 @@ static void mg_tls_generate_handshake_keys(struct mg_connection *c) { uint8_t hello_hash[32]; uint8_t server_hs_secret[32]; uint8_t client_hs_secret[32]; +#if CHACHA20 + const size_t keysz = 32; +#else + const size_t keysz = 16; +#endif mg_hmac_sha256(early_secret, NULL, 0, zeros, sizeof(zeros)); mg_tls_derive_secret("tls13 derived", early_secret, 32, zeros_sha256_digest, 32, pre_extract_secret, 32); - mg_hmac_sha256(tls->handshake_secret, pre_extract_secret, + mg_hmac_sha256(tls->enc.handshake_secret, pre_extract_secret, sizeof(pre_extract_secret), tls->x25519_sec, sizeof(tls->x25519_sec)); - mg_tls_hexdump("hs secret", tls->handshake_secret, 32); + mg_tls_hexdump("hs secret", tls->enc.handshake_secret, 32); // mg_sha256_final is not idempotent, need to copy sha256 context to calculate // the digest @@ -259,37 +270,40 @@ static void mg_tls_generate_handshake_keys(struct mg_connection *c) { mg_tls_hexdump("hello hash", hello_hash, 32); // derive keys needed for the rest of the handshake - mg_tls_derive_secret("tls13 s hs traffic", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 s hs traffic", tls->enc.handshake_secret, 32, hello_hash, 32, server_hs_secret, 32); - mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, - tls->server_write_key, 16); - mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, - tls->server_write_iv, 12); - mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, - tls->server_finished_key, 32); - - mg_tls_derive_secret("tls13 c hs traffic", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 c hs traffic", tls->enc.handshake_secret, 32, hello_hash, 32, client_hs_secret, 32); + + mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, + tls->enc.server_write_key, keysz); + mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, + tls->enc.server_write_iv, 12); + mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, + tls->enc.server_finished_key, 32); + mg_tls_derive_secret("tls13 key", client_hs_secret, 32, NULL, 0, - tls->client_write_key, 16); + tls->enc.client_write_key, keysz); mg_tls_derive_secret("tls13 iv", client_hs_secret, 32, NULL, 0, - tls->client_write_iv, 12); + tls->enc.client_write_iv, 12); mg_tls_derive_secret("tls13 finished", client_hs_secret, 32, NULL, 0, - tls->client_finished_key, 32); + tls->enc.client_finished_key, 32); mg_tls_hexdump("s hs traffic", server_hs_secret, 32); - mg_tls_hexdump("s key", tls->server_write_key, 16); - mg_tls_hexdump("s iv", tls->server_write_iv, 12); - mg_tls_hexdump("s finished", tls->server_finished_key, 32); + mg_tls_hexdump("s key", tls->enc.server_write_key, keysz); + mg_tls_hexdump("s iv", tls->enc.server_write_iv, 12); + mg_tls_hexdump("s finished", tls->enc.server_finished_key, 32); mg_tls_hexdump("c hs traffic", client_hs_secret, 32); - mg_tls_hexdump("c key", tls->client_write_key, 16); - mg_tls_hexdump("c iv", tls->client_write_iv, 16); - mg_tls_hexdump("c finished", tls->client_finished_key, 32); + mg_tls_hexdump("c key", tls->enc.client_write_key, keysz); + mg_tls_hexdump("c iv", tls->enc.client_write_iv, 12); + mg_tls_hexdump("c finished", tls->enc.client_finished_key, 32); +#ifdef MG_TLS_SSLKEYLOGFILE mg_ssl_key_log("SERVER_HANDSHAKE_TRAFFIC_SECRET", tls->random, server_hs_secret, 32); mg_ssl_key_log("CLIENT_HANDSHAKE_TRAFFIC_SECRET", tls->random, client_hs_secret, 32); +#endif } static void mg_tls_generate_application_keys(struct mg_connection *c) { @@ -299,40 +313,47 @@ static void mg_tls_generate_application_keys(struct mg_connection *c) { uint8_t master_secret[32]; uint8_t server_secret[32]; uint8_t client_secret[32]; +#if CHACHA20 + const size_t keysz = 32; +#else + const size_t keysz = 16; +#endif mg_sha256_ctx sha256; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_tls_derive_secret("tls13 derived", tls->handshake_secret, 32, + mg_tls_derive_secret("tls13 derived", tls->enc.handshake_secret, 32, zeros_sha256_digest, 32, premaster_secret, 32); mg_hmac_sha256(master_secret, premaster_secret, 32, zeros, 32); mg_tls_derive_secret("tls13 s ap traffic", master_secret, 32, hash, 32, server_secret, 32); mg_tls_derive_secret("tls13 key", server_secret, 32, NULL, 0, - tls->server_write_key, 16); + tls->enc.server_write_key, keysz); mg_tls_derive_secret("tls13 iv", server_secret, 32, NULL, 0, - tls->server_write_iv, 12); + tls->enc.server_write_iv, 12); mg_tls_derive_secret("tls13 c ap traffic", master_secret, 32, hash, 32, client_secret, 32); mg_tls_derive_secret("tls13 key", client_secret, 32, NULL, 0, - tls->client_write_key, 16); + tls->enc.client_write_key, keysz); mg_tls_derive_secret("tls13 iv", client_secret, 32, NULL, 0, - tls->client_write_iv, 12); + tls->enc.client_write_iv, 12); mg_tls_hexdump("s ap traffic", server_secret, 32); - mg_tls_hexdump("s key", tls->server_write_key, 16); - mg_tls_hexdump("s iv", tls->server_write_iv, 12); - mg_tls_hexdump("s finished", tls->server_finished_key, 32); + mg_tls_hexdump("s key", tls->enc.server_write_key, keysz); + mg_tls_hexdump("s iv", tls->enc.server_write_iv, 12); + mg_tls_hexdump("s finished", tls->enc.server_finished_key, 32); mg_tls_hexdump("c ap traffic", client_secret, 32); - mg_tls_hexdump("c key", tls->client_write_key, 16); - mg_tls_hexdump("c iv", tls->client_write_iv, 16); - mg_tls_hexdump("c finished", tls->client_finished_key, 32); - tls->sseq = tls->cseq = 0; + mg_tls_hexdump("c key", tls->enc.client_write_key, keysz); + mg_tls_hexdump("c iv", tls->enc.client_write_iv, 12); + mg_tls_hexdump("c finished", tls->enc.client_finished_key, 32); + tls->enc.sseq = tls->enc.cseq = 0; +#ifdef MG_TLS_SSLKEYLOGFILE mg_ssl_key_log("SERVER_TRAFFIC_SECRET_0", tls->random, server_secret, 32); mg_ssl_key_log("CLIENT_TRAFFIC_SECRET_0", tls->random, client_secret, 32); +#endif } // AES GCM encryption of the message + put encoded data into the write buffer @@ -350,21 +371,21 @@ static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, (uint8_t) (encsz & 0xff)}; uint8_t nonce[12]; - mg_gcm_initialize(); + uint32_t seq = c->is_client ? tls->enc.cseq : tls->enc.sseq; + uint8_t *key = + c->is_client ? tls->enc.client_write_key : tls->enc.server_write_key; + uint8_t *iv = + c->is_client ? tls->enc.client_write_iv : tls->enc.server_write_iv; - if (c->is_client) { - memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); - nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); - } else { - memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); - nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); - } +#if !CHACHA20 + mg_gcm_initialize(); +#endif + + memmove(nonce, iv, sizeof(nonce)); + nonce[8] ^= (uint8_t) ((seq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((seq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((seq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((seq) & 255U); mg_iobuf_add(wio, wio->len, hdr, sizeof(hdr)); mg_iobuf_resize(wio, wio->len + encsz); @@ -372,17 +393,18 @@ static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, tag = wio->buf + wio->len + msgsz + 1; memmove(outmsg, msg, msgsz); outmsg[msgsz] = msgtype; - if (c->is_client) { - mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->client_write_key, - sizeof(tls->client_write_key), nonce, sizeof(nonce), - associated_data, sizeof(associated_data), tag, 16); - tls->cseq++; - } else { - mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->server_write_key, - sizeof(tls->server_write_key), nonce, sizeof(nonce), - associated_data, sizeof(associated_data), tag, 16); - tls->sseq++; - } +#if CHACHA20 + (void) tag; // tag is only used in aes gcm + uint8_t enc[8192]; + size_t n = + mg_chacha20_poly1305_encrypt(enc, key, nonce, associated_data, + sizeof(associated_data), outmsg, msgsz + 1); + memmove(outmsg, enc, n); +#else + mg_aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, key, 16, nonce, sizeof(nonce), + associated_data, sizeof(associated_data), tag, 16); +#endif + c->is_client ? tls->enc.cseq++ : tls->enc.sseq++; wio->len += encsz; } @@ -394,6 +416,13 @@ static int mg_tls_recv_record(struct mg_connection *c) { uint8_t *msg; uint8_t nonce[12]; int r; + + uint32_t seq = c->is_client ? tls->enc.sseq : tls->enc.cseq; + uint8_t *key = + c->is_client ? tls->enc.server_write_key : tls->enc.client_write_key; + uint8_t *iv = + c->is_client ? tls->enc.server_write_iv : tls->enc.client_write_iv; + if (tls->recv.len > 0) { return 0; /* some data from previous record is still present */ } @@ -415,43 +444,47 @@ static int mg_tls_recv_record(struct mg_connection *c) { } } +#if !CHACHA20 mg_gcm_initialize(); +#endif + msgsz = MG_LOAD_BE16(rio->buf + 3); msg = rio->buf + 5; - if (c->is_client) { - memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); - nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); - mg_aes_gcm_decrypt(msg, msg, msgsz - 16, tls->server_write_key, - sizeof(tls->server_write_key), nonce, sizeof(nonce)); - tls->sseq++; - } else { - memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); - nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); - nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); - nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); - nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); - mg_aes_gcm_decrypt(msg, msg, msgsz - 16, tls->client_write_key, - sizeof(tls->client_write_key), nonce, sizeof(nonce)); - tls->cseq++; - } + memmove(nonce, iv, sizeof(nonce)); + nonce[8] ^= (uint8_t) ((seq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((seq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((seq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((seq) & 255U); +#if CHACHA20 + uint8_t dec[8192]; + size_t n = mg_chacha20_poly1305_decrypt(dec, key, nonce, msg, msgsz); + memmove(msg, dec, n); +#else + mg_aes_gcm_decrypt(msg, msg, msgsz - 16, key, 16, nonce, sizeof(nonce)); +#endif r = msgsz - 16 - 1; tls->content_type = msg[msgsz - 16 - 1]; tls->recv.buf = msg; tls->recv.size = tls->recv.len = msgsz - 16 - 1; + c->is_client ? tls->enc.sseq++ : tls->enc.cseq++; return r; } static void mg_tls_calc_cert_verify_hash(struct mg_connection *c, - uint8_t hash[32]) { + uint8_t hash[32], int is_client) { struct tls_data *tls = (struct tls_data *) c->tls; - uint8_t sig_content[130] = { - " " - " " - "TLS 1.3, server CertificateVerify\0"}; + uint8_t server_context[34] = "TLS 1.3, server CertificateVerify"; + uint8_t client_context[34] = "TLS 1.3, client CertificateVerify"; + uint8_t sig_content[130]; mg_sha256_ctx sha256; + + memset(sig_content, 0x20, 64); + if (is_client) { + memmove(sig_content + 64, client_context, sizeof(client_context)); + } else { + memmove(sig_content + 64, server_context, sizeof(server_context)); + } + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(sig_content + 98, &sha256); @@ -528,51 +561,28 @@ static void mg_tls_server_send_hello(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; struct mg_iobuf *wio = &tls->send; + // clang-format off uint8_t msg_server_hello[122] = { - // server hello, tls 1.2 - 0x02, - 0x00, - 0x00, - 0x76, - 0x03, - 0x03, - // random (32 bytes) - PLACEHOLDER_32B, - // session ID length + session ID (32 bytes) - 0x20, - PLACEHOLDER_32B, + // server hello, tls 1.2 + 0x02, 0x00, 0x00, 0x76, 0x03, 0x03, + // random (32 bytes) + PLACEHOLDER_32B, + // session ID length + session ID (32 bytes) + 0x20, PLACEHOLDER_32B, #if defined(CHACHA20) && CHACHA20 - // TLS_CHACHA20_POLY1305_SHA256 + no compression - 0x13, - 0x03, - 0x00, + // TLS_CHACHA20_POLY1305_SHA256 + no compression + 0x13, 0x03, 0x00, #else - // TLS_AES_128_GCM_SHA256 + no compression - 0x13, - 0x01, - 0x00, + // TLS_AES_128_GCM_SHA256 + no compression + 0x13, 0x01, 0x00, #endif - // extensions + keyshare - 0x00, - 0x2e, - 0x00, - 0x33, - 0x00, - 0x24, - 0x00, - 0x1d, - 0x00, - 0x20, - // x25519 keyshare - PLACEHOLDER_32B, - // supported versions (tls1.3 == 0x304) - 0x00, - 0x2b, - 0x00, - 0x02, - 0x03, - 0x04 - }; + // extensions + keyshare + 0x00, 0x2e, 0x00, 0x33, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, + // x25519 keyshare + PLACEHOLDER_32B, + // supported versions (tls1.3 == 0x304) + 0x00, 0x2b, 0x00, 0x02, 0x03, 0x04}; + // clang-format on // calculate keyshare uint8_t x25519_pub[X25519_BYTES]; @@ -607,7 +617,7 @@ static void mg_tls_server_send_ext(struct mg_connection *c) { static void mg_tls_server_send_cert(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; // server DER certificate (empty) - size_t n = tls->server_cert_der.len; + size_t n = tls->cert_der.len; uint8_t *cert = (uint8_t *) calloc(1, 13 + n); if (cert == NULL) { mg_error(c, "tls cert oom"); @@ -626,7 +636,7 @@ static void mg_tls_server_send_cert(struct mg_connection *c) { cert[9] = (uint8_t) (((n) >> 8) & 255U); cert[10] = (uint8_t) (n & 255U); // bytes 11+ are certificate in DER format - memmove(cert + 11, tls->server_cert_der.buf, n); + memmove(cert + 11, tls->cert_der.buf, n); cert[11 + n] = cert[12 + n] = 0; // certificate extensions (none) mg_sha256_update(&tls->sha256, cert, 13 + n); mg_tls_encrypt(c, cert, 13 + n, MG_TLS_HANDSHAKE); @@ -655,7 +665,7 @@ static void finish_SHA256(const MG_UECC_HashContext *base, mg_sha256_final(hash_result, &c->ctx); } -static void mg_tls_server_send_cert_verify(struct mg_connection *c) { +static void mg_tls_send_cert_verify(struct mg_connection *c, int is_client) { struct tls_data *tls = (struct tls_data *) c->tls; // server certificate verify packet uint8_t verify[82] = {0x0f, 0x00, 0x00, 0x00, 0x04, 0x03, 0x00, 0x00}; @@ -667,10 +677,10 @@ static void mg_tls_server_send_cert_verify(struct mg_connection *c) { int neg1, neg2; uint8_t sig[64] = {0}; - mg_tls_calc_cert_verify_hash(c, (uint8_t *) hash); + mg_tls_calc_cert_verify_hash(c, (uint8_t *) hash, is_client); - mg_uecc_sign_deterministic(tls->server_key, hash, sizeof(hash), &ctx.uECC, - sig, mg_uecc_secp256r1()); + mg_uecc_sign_deterministic(tls->ec_key, hash, sizeof(hash), &ctx.uECC, sig, + mg_uecc_secp256r1()); neg1 = !!(sig[0] & 0x80); neg2 = !!(sig[32] & 0x80); @@ -700,7 +710,7 @@ static void mg_tls_server_send_finish(struct mg_connection *c) { uint8_t finish[36] = {0x14, 0, 0, 32}; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_hmac_sha256(finish + 4, tls->server_finished_key, 32, hash, 32); + mg_hmac_sha256(finish + 4, tls->enc.server_finished_key, 32, hash, 32); mg_tls_encrypt(c, finish, sizeof(finish), MG_TLS_HANDSHAKE); mg_io_send(c, wio->buf, wio->len); wio->len = 0; @@ -732,140 +742,73 @@ static void mg_tls_client_send_hello(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; struct mg_iobuf *wio = &tls->send; - const char *hostname = tls->hostname; - size_t hostnamesz = strlen(tls->hostname); uint8_t x25519_pub[X25519_BYTES]; - uint8_t msg_client_hello[162 + 32] = { - // TLS Client Hello header reported as TLS1.2 (5) - 0x16, - 0x03, - 0x01, - 0x00, - 0xfe, - // server hello, tls 1.2 (6) - 0x01, - 0x00, - 0x00, - 0x8c, - 0x03, - 0x03, - // random (32 bytes) - PLACEHOLDER_32B, - // session ID length + session ID (32 bytes) - 0x20, - PLACEHOLDER_32B, -#if defined(CHACHA20) && CHACHA20 - // TLS_CHACHA20_POLY1305_SHA256 + no compression - 0x13, - 0x03, - 0x00, -#else - 0x00, - 0x02, // size = 2 bytes - 0x13, - 0x01, // TLS_AES_128_GCM_SHA256 - 0x01, - 0x00, // no compression -#endif - - // extensions + keyshare - 0x00, - 0xfe, - // x25519 keyshare - 0x00, - 0x33, - 0x00, - 0x26, - 0x00, - 0x24, - 0x00, - 0x1d, - 0x00, - 0x20, - PLACEHOLDER_32B, - // supported groups (x25519) - 0x00, - 0x0a, - 0x00, - 0x04, - 0x00, - 0x02, - 0x00, - 0x1d, - // supported versions (tls1.3 == 0x304) - 0x00, - 0x2b, - 0x00, - 0x03, - 0x02, - 0x03, - 0x04, - // session ticket (none) - 0x00, - 0x23, - 0x00, - 0x00, - // signature algorithms (we don't care, so list all the common ones) - 0x00, - 0x0d, - 0x00, - 0x24, - 0x00, - 0x22, - 0x04, - 0x03, - 0x05, - 0x03, - 0x06, - 0x03, - 0x08, - 0x07, - 0x08, - 0x08, - 0x08, - 0x1a, - 0x08, - 0x1b, - 0x08, - 0x1c, - 0x08, - 0x09, - 0x08, - 0x0a, - 0x08, - 0x0b, - 0x08, - 0x04, - 0x08, - 0x05, - 0x08, - 0x06, - 0x04, - 0x01, - 0x05, - 0x01, - 0x06, - 0x01, - // server name - 0x00, - 0x00, - 0x00, - 0xfe, - 0x00, - 0xfe, - 0x00, - 0x00, - 0xfe + // the only signature algorithm we actually support + uint8_t secp256r1_sig_algs[8] = { + 0x00, 0x0d, 0x00, 0x04, 0x00, 0x02, 0x04, 0x03, }; + // all popular signature algorithms (if we don't care about verification) + uint8_t all_sig_algs[34] = { + 0x00, 0x0d, 0x00, 0x1e, 0x00, 0x1c, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, + 0x08, 0x07, 0x08, 0x08, 0x08, 0x09, 0x08, 0x0a, 0x08, 0x0b, 0x08, 0x04, + 0x08, 0x05, 0x08, 0x06, 0x04, 0x01, 0x05, 0x01, 0x06, 0x01}; + uint8_t server_name_ext[9] = {0x00, 0x00, 0x00, 0xfe, 0x00, + 0xfe, 0x00, 0x00, 0xfe}; - // patch ClientHello with correct hostname length + offset: - MG_STORE_BE16(msg_client_hello + 3, hostnamesz + 189); - MG_STORE_BE16(msg_client_hello + 7, hostnamesz + 185); - MG_STORE_BE16(msg_client_hello + 82, hostnamesz + 110); - MG_STORE_BE16(msg_client_hello + 187, hostnamesz + 5); - MG_STORE_BE16(msg_client_hello + 189, hostnamesz + 3); - MG_STORE_BE16(msg_client_hello + 192, hostnamesz); + // clang-format off + uint8_t msg_client_hello[145] = { + // TLS Client Hello header reported as TLS1.2 (5) + 0x16, 0x03, 0x03, 0x00, 0xfe, + // client hello, tls 1.2 (6) + 0x01, 0x00, 0x00, 0x8c, 0x03, 0x03, + // random (32 bytes) + PLACEHOLDER_32B, + // session ID length + session ID (32 bytes) + 0x20, PLACEHOLDER_32B, 0x00, + 0x02, // size = 2 bytes +#if defined(CHACHA20) && CHACHA20 + // TLS_CHACHA20_POLY1305_SHA256 + 0x13, 0x03, +#else + // TLS_AES_128_GCM_SHA256 + 0x13, 0x01, +#endif + // no compression + 0x01, 0x00, + // extensions + keyshare + 0x00, 0xfe, + // x25519 keyshare + 0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, + PLACEHOLDER_32B, + // supported groups (x25519) + 0x00, 0x0a, 0x00, 0x04, 0x00, 0x02, 0x00, 0x1d, + // supported versions (tls1.3 == 0x304) + 0x00, 0x2b, 0x00, 0x03, 0x02, 0x03, 0x04, + // session ticket (none) + 0x00, 0x23, 0x00, 0x00, // 144 bytes till here + }; + // clang-format on + const char *hostname = tls->hostname; + size_t hostnamesz = strlen(tls->hostname); + size_t hostname_extsz = hostnamesz ? hostnamesz + 9 : 0; + uint8_t *sig_alg = tls->skip_verification ? all_sig_algs : secp256r1_sig_algs; + size_t sig_alg_sz = tls->skip_verification ? sizeof(all_sig_algs) + : sizeof(secp256r1_sig_algs); + + // patch ClientHello with correct hostname ext length (if any) + MG_STORE_BE16(msg_client_hello + 3, + hostname_extsz + 183 - 9 - 34 + sig_alg_sz); + MG_STORE_BE16(msg_client_hello + 7, + hostname_extsz + 179 - 9 - 34 + sig_alg_sz); + MG_STORE_BE16(msg_client_hello + 82, + hostname_extsz + 104 - 9 - 34 + sig_alg_sz); + + if (hostnamesz > 0) { + MG_STORE_BE16(server_name_ext + 2, hostnamesz + 5); + MG_STORE_BE16(server_name_ext + 4, hostnamesz + 3); + MG_STORE_BE16(server_name_ext + 7, hostnamesz); + } // calculate keyshare mg_random(tls->x25519_cli, sizeof(tls->x25519_cli)); @@ -878,12 +821,18 @@ static void mg_tls_client_send_hello(struct mg_connection *c) { memmove(msg_client_hello + 44, tls->session_id, sizeof(tls->session_id)); memmove(msg_client_hello + 94, x25519_pub, sizeof(x25519_pub)); - // server hello message + // client hello message mg_iobuf_add(wio, wio->len, msg_client_hello, sizeof(msg_client_hello)); - mg_iobuf_add(wio, wio->len, hostname, strlen(hostname)); mg_sha256_update(&tls->sha256, msg_client_hello + 5, sizeof(msg_client_hello) - 5); - mg_sha256_update(&tls->sha256, (uint8_t *) hostname, strlen(hostname)); + mg_iobuf_add(wio, wio->len, sig_alg, sig_alg_sz); + mg_sha256_update(&tls->sha256, sig_alg, sig_alg_sz); + if (hostnamesz > 0) { + mg_iobuf_add(wio, wio->len, server_name_ext, sizeof(server_name_ext)); + mg_iobuf_add(wio, wio->len, hostname, hostnamesz); + mg_sha256_update(&tls->sha256, server_name_ext, sizeof(server_name_ext)); + mg_sha256_update(&tls->sha256, (uint8_t *) hostname, hostnamesz); + } // change cipher message mg_iobuf_add(wio, wio->len, (const char *) "\x14\x03\x03\x00\x01\x01", 6); @@ -973,6 +922,12 @@ static int mg_tls_client_recv_cert(struct mg_connection *c) { if (mg_tls_recv_record(c) < 0) { return -1; } + if (tls->recv.buf[0] == MG_TLS_CERTIFICATE_REQUEST) { + MG_VERBOSE(("got certificate request")); + mg_tls_drop_message(c); + tls->cert_requested = 1; + return -1; + } if (tls->recv.buf[0] != MG_TLS_CERTIFICATE) { mg_error(c, "expected server certificate but got msg 0x%02x", tls->recv.buf[0]); @@ -1059,7 +1014,7 @@ static int mg_tls_client_recv_cert(struct mg_connection *c) { } while (0); mg_tls_drop_message(c); - mg_tls_calc_cert_verify_hash(c, tls->sighash); + mg_tls_calc_cert_verify_hash(c, tls->sighash, 0); return 0; } @@ -1141,7 +1096,7 @@ static void mg_tls_client_send_finish(struct mg_connection *c) { uint8_t finish[36] = {0x14, 0, 0, 32}; memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); mg_sha256_final(hash, &sha256); - mg_hmac_sha256(finish + 4, tls->client_finished_key, 32, hash, 32); + mg_hmac_sha256(finish + 4, tls->enc.client_finished_key, 32, hash, 32); mg_tls_encrypt(c, finish, sizeof(finish), MG_TLS_HANDSHAKE); mg_io_send(c, wio->buf, wio->len); wio->len = 0; @@ -1182,8 +1137,23 @@ static void mg_tls_client_handshake(struct mg_connection *c) { if (mg_tls_client_recv_finish(c) < 0) { break; } - mg_tls_client_send_finish(c); - mg_tls_generate_application_keys(c); + if (tls->cert_requested) { + /* for mTLS we should generate application keys at this point + * but then restore handshake keys and continue with + * the rest of the handshake */ + struct tls_enc app_keys; + struct tls_enc hs_keys = tls->enc; + mg_tls_generate_application_keys(c); + app_keys = tls->enc; + tls->enc = hs_keys; + mg_tls_server_send_cert(c); + mg_tls_send_cert_verify(c, 1); + mg_tls_client_send_finish(c); + tls->enc = app_keys; + } else { + mg_tls_client_send_finish(c); + mg_tls_generate_application_keys(c); + } tls->state = MG_TLS_STATE_CLIENT_CONNECTED; c->is_tls_hs = 0; break; @@ -1202,7 +1172,7 @@ static void mg_tls_server_handshake(struct mg_connection *c) { mg_tls_generate_handshake_keys(c); mg_tls_server_send_ext(c); mg_tls_server_send_cert(c); - mg_tls_server_send_cert_verify(c); + mg_tls_send_cert_verify(c, 0); mg_tls_server_send_finish(c); tls->state = MG_TLS_STATE_SERVER_NEGOTIATED; // fallthrough @@ -1286,14 +1256,13 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { tls->hostname[opts->name.len] = 0; } - if (c->is_client) { - tls->server_cert_der.buf = NULL; + if (opts->cert.buf == NULL) { + MG_VERBOSE(("no certificate provided")); return; } // parse PEM or DER certificate - if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->server_cert_der) < - 0) { + if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->cert_der) < 0) { MG_ERROR(("Failed to load certificate")); return; } @@ -1318,7 +1287,7 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { if (memcmp(key.buf + 2, "\x02\x01\x01\x04\x20", 5) != 0) { MG_ERROR(("EC private key: ASN.1 bad data")); } - memmove(tls->server_key, key.buf + 7, 32); + memmove(tls->ec_key, key.buf + 7, 32); free((void *) key.buf); } else if (mg_parse_pem(opts->key, mg_str_s("PRIVATE KEY"), &key) == 0) { mg_error(c, "PKCS8 private key format is not supported"); @@ -1331,7 +1300,7 @@ void mg_tls_free(struct mg_connection *c) { struct tls_data *tls = (struct tls_data *) c->tls; if (tls != NULL) { mg_iobuf_free(&tls->send); - free((void *) tls->server_cert_der.buf); + free((void *) tls->cert_der.buf); } free(c->tls); c->tls = NULL; @@ -1359,6 +1328,7 @@ long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { if (r < 0) { return r; } + if (tls->content_type != MG_TLS_APP_DATA) { tls->recv.len = 0; mg_tls_drop_record(c); diff --git a/src/tls_chacha20.c b/src/tls_chacha20.c new file mode 100644 index 00000000..1def3aae --- /dev/null +++ b/src/tls_chacha20.c @@ -0,0 +1,1337 @@ +// portable8439 v1.0.1 +// Source: https://github.com/DavyLandman/portable8439 +// Licensed under CC0-1.0 +// Contains poly1305-donna e6ad6e091d30d7f4ec2d4f978be1fcfcbce72781 (Public +// Domain) + +#include "tls.h" +#include "tls_chacha20.h" + +#if MG_TLS == MG_TLS_BUILTIN +// ******* BEGIN: chacha-portable/chacha-portable.h ******** + +#if !defined(__cplusplus) && !defined(_MSC_VER) && \ + (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) +#error "C99 or newer required" +#endif + +#define CHACHA20_KEY_SIZE (32) +#define CHACHA20_NONCE_SIZE (12) + +#if defined(_MSC_VER) || defined(__cplusplus) +// add restrict support +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || defined(__clang__) || \ + defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif +#endif + +// xor data with a ChaCha20 keystream as per RFC8439 +static PORTABLE_8439_DECL void chacha20_xor_stream( + uint8_t *restrict dest, const uint8_t *restrict source, size_t length, + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], uint32_t counter); + +static PORTABLE_8439_DECL void rfc8439_keygen( + uint8_t poly_key[32], const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE]); + +// ******* END: chacha-portable/chacha-portable.h ******** +// ******* BEGIN: poly1305-donna/poly1305-donna.h ******** + +#include + +typedef struct poly1305_context { + size_t aligner; + unsigned char opaque[136]; +} poly1305_context; + +static PORTABLE_8439_DECL void poly1305_init(poly1305_context *ctx, + const unsigned char key[32]); +static PORTABLE_8439_DECL void poly1305_update(poly1305_context *ctx, + const unsigned char *m, + size_t bytes); +static PORTABLE_8439_DECL void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]); + +// ******* END: poly1305-donna/poly1305-donna.h ******** +// ******* BEGIN: chacha-portable.c ******** + +#include +#include + +// this is a fresh implementation of chacha20, based on the description in +// rfc8349 it's such a nice compact algorithm that it is easy to do. In +// relationship to other c implementation this implementation: +// - pure c99 +// - big & little endian support +// - safe for architectures that don't support unaligned reads +// +// Next to this, we try to be fast as possible without resorting inline +// assembly. + +// based on https://sourceforge.net/p/predef/wiki/Endianness/ +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __HAVE_LITTLE_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || \ + defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) || \ + defined(__MIPSEL) || defined(__MIPSEL__) || defined(__XTENSA_EL__) || \ + defined(__AVR__) || defined(LITTLE_ENDIAN) +#define __HAVE_LITTLE_ENDIAN 1 +#endif + +#ifndef TEST_SLOW_PATH +#if defined(__HAVE_LITTLE_ENDIAN) +#define FAST_PATH +#endif +#endif + +#define CHACHA20_STATE_WORDS (16) +#define CHACHA20_BLOCK_SIZE (CHACHA20_STATE_WORDS * sizeof(uint32_t)) + +#ifdef FAST_PATH +#define store_32_le(target, source) memcpy(&(target), source, sizeof(uint32_t)) +#else +#define store_32_le(target, source) \ + target = (uint32_t) (source)[0] | ((uint32_t) (source)[1]) << 8 | \ + ((uint32_t) (source)[2]) << 16 | ((uint32_t) (source)[3]) << 24 +#endif + +static void initialize_state(uint32_t state[CHACHA20_STATE_WORDS], + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], + uint32_t counter) { +#ifdef static_assert + static_assert(sizeof(uint32_t) == 4, + "We don't support systems that do not conform to standard of " + "uint32_t being exact 32bit wide"); +#endif + state[0] = 0x61707865; + state[1] = 0x3320646e; + state[2] = 0x79622d32; + state[3] = 0x6b206574; + store_32_le(state[4], key); + store_32_le(state[5], key + 4); + store_32_le(state[6], key + 8); + store_32_le(state[7], key + 12); + store_32_le(state[8], key + 16); + store_32_le(state[9], key + 20); + store_32_le(state[10], key + 24); + store_32_le(state[11], key + 28); + state[12] = counter; + store_32_le(state[13], nonce); + store_32_le(state[14], nonce + 4); + store_32_le(state[15], nonce + 8); +} + +#define increment_counter(state) (state)[12]++ + +// source: http://blog.regehr.org/archives/1063 +#define rotl32a(x, n) ((x) << (n)) | ((x) >> (32 - (n))) + +#define Qround(a, b, c, d) \ + a += b; \ + d ^= a; \ + d = rotl32a(d, 16); \ + c += d; \ + b ^= c; \ + b = rotl32a(b, 12); \ + a += b; \ + d ^= a; \ + d = rotl32a(d, 8); \ + c += d; \ + b ^= c; \ + b = rotl32a(b, 7); + +#define TIMES16(x) \ + x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) x(9) x(10) x(11) x(12) x(13) \ + x(14) x(15) + +static void core_block(const uint32_t *restrict start, + uint32_t *restrict output) { +// instead of working on the output array, +// we let the compiler allocate 16 local variables on the stack +#define __LV(i) uint32_t __s##i = start[i]; + TIMES16(__LV) + +#define __Q(a, b, c, d) Qround(__s##a, __s##b, __s##c, __s##d) + + for (int i = 0; i < 10; i++) { + __Q(0, 4, 8, 12); + __Q(1, 5, 9, 13); + __Q(2, 6, 10, 14); + __Q(3, 7, 11, 15); + __Q(0, 5, 10, 15); + __Q(1, 6, 11, 12); + __Q(2, 7, 8, 13); + __Q(3, 4, 9, 14); + } + +#define __FIN(i) output[i] = start[i] + __s##i; + TIMES16(__FIN) +} + +#define U8(x) ((uint8_t) ((x) & 0xFF)) + +#ifdef FAST_PATH +#define xor32_le(dst, src, pad) \ + uint32_t __value; \ + memcpy(&__value, src, sizeof(uint32_t)); \ + __value ^= *(pad); \ + memcpy(dst, &__value, sizeof(uint32_t)); +#else +#define xor32_le(dst, src, pad) \ + (dst)[0] = (src)[0] ^ U8(*(pad)); \ + (dst)[1] = (src)[1] ^ U8(*(pad) >> 8); \ + (dst)[2] = (src)[2] ^ U8(*(pad) >> 16); \ + (dst)[3] = (src)[3] ^ U8(*(pad) >> 24); +#endif + +#define index8_32(a, ix) ((a) + ((ix) * sizeof(uint32_t))) + +#define xor32_blocks(dest, source, pad, words) \ + for (unsigned int __i = 0; __i < words; __i++) { \ + xor32_le(index8_32(dest, __i), index8_32(source, __i), (pad) + __i) \ + } + +static void xor_block(uint8_t *restrict dest, const uint8_t *restrict source, + const uint32_t *restrict pad, unsigned int chunk_size) { + unsigned int full_blocks = chunk_size / sizeof(uint32_t); + // have to be carefull, we are going back from uint32 to uint8, so endianess + // matters again + xor32_blocks(dest, source, pad, full_blocks) + + dest += full_blocks * sizeof(uint32_t); + source += full_blocks * sizeof(uint32_t); + pad += full_blocks; + + switch (chunk_size % sizeof(uint32_t)) { + case 1: dest[0] = source[0] ^ U8(*pad); break; + case 2: + dest[0] = source[0] ^ U8(*pad); + dest[1] = source[1] ^ U8(*pad >> 8); + break; + case 3: + dest[0] = source[0] ^ U8(*pad); + dest[1] = source[1] ^ U8(*pad >> 8); + dest[2] = source[2] ^ U8(*pad >> 16); + break; + } +} + +static void chacha20_xor_stream(uint8_t *restrict dest, + const uint8_t *restrict source, size_t length, + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE], + uint32_t counter) { + uint32_t state[CHACHA20_STATE_WORDS]; + initialize_state(state, key, nonce, counter); + + uint32_t pad[CHACHA20_STATE_WORDS]; + size_t full_blocks = length / CHACHA20_BLOCK_SIZE; + for (size_t b = 0; b < full_blocks; b++) { + core_block(state, pad); + increment_counter(state); + xor32_blocks(dest, source, pad, CHACHA20_STATE_WORDS) dest += + CHACHA20_BLOCK_SIZE; + source += CHACHA20_BLOCK_SIZE; + } + unsigned int last_block = (unsigned int) (length % CHACHA20_BLOCK_SIZE); + if (last_block > 0) { + core_block(state, pad); + xor_block(dest, source, pad, last_block); + } +} + +#ifdef FAST_PATH +#define serialize(poly_key, result) memcpy(poly_key, result, 32) +#else +#define store32_le(target, source) \ + (target)[0] = U8(*(source)); \ + (target)[1] = U8(*(source) >> 8); \ + (target)[2] = U8(*(source) >> 16); \ + (target)[3] = U8(*(source) >> 24); + +#define serialize(poly_key, result) \ + for (unsigned int i = 0; i < 32 / sizeof(uint32_t); i++) { \ + store32_le(index8_32(poly_key, i), result + i); \ + } +#endif + +static void rfc8439_keygen(uint8_t poly_key[32], + const uint8_t key[CHACHA20_KEY_SIZE], + const uint8_t nonce[CHACHA20_NONCE_SIZE]) { + uint32_t state[CHACHA20_STATE_WORDS]; + uint32_t result[CHACHA20_STATE_WORDS]; + initialize_state(state, key, nonce, 0); + core_block(state, result); + serialize(poly_key, result); +} +// ******* END: chacha-portable.c ******** +// ******* BEGIN: poly1305-donna.c ******** + +/* auto detect between 32bit / 64bit */ +#if /* uint128 available on 64bit system*/ \ + (defined(__SIZEOF_INT128__) && \ + defined(__LP64__)) /* MSVC 64bit compiler */ \ + || (defined(_MSC_VER) && defined(_M_X64)) /* gcc >= 4.4 64bit */ \ + || (defined(__GNUC__) && defined(__LP64__) && \ + ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) +#define __GUESS64 +#else +#define __GUESS32 +#endif + +#if defined(POLY1305_8BIT) +/* + poly1305 implementation using 8 bit * 8 bit = 16 bit multiplication and +32 bit addition + + based on the public domain reference version in supercop by djb +static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 51*sizeof(unsigned char) */ +typedef struct poly1305_state_internal_t { + unsigned char buffer[poly1305_block_size]; + size_t leftover; + unsigned char h[17]; + unsigned char r[17]; + unsigned char pad[17]; + unsigned char final; +} poly1305_state_internal_t; + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + st->leftover = 0; + + /* h = 0 */ + for (i = 0; i < 17; i++) st->h[i] = 0; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = key[0] & 0xff; + st->r[1] = key[1] & 0xff; + st->r[2] = key[2] & 0xff; + st->r[3] = key[3] & 0x0f; + st->r[4] = key[4] & 0xfc; + st->r[5] = key[5] & 0xff; + st->r[6] = key[6] & 0xff; + st->r[7] = key[7] & 0x0f; + st->r[8] = key[8] & 0xfc; + st->r[9] = key[9] & 0xff; + st->r[10] = key[10] & 0xff; + st->r[11] = key[11] & 0x0f; + st->r[12] = key[12] & 0xfc; + st->r[13] = key[13] & 0xff; + st->r[14] = key[14] & 0xff; + st->r[15] = key[15] & 0x0f; + st->r[16] = 0; + + /* save pad for later */ + for (i = 0; i < 16; i++) st->pad[i] = key[i + 16]; + st->pad[16] = 0; + + st->final = 0; +} + +static void poly1305_add(unsigned char h[17], const unsigned char c[17]) { + unsigned short u; + unsigned int i; + for (u = 0, i = 0; i < 17; i++) { + u += (unsigned short) h[i] + (unsigned short) c[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } +} + +static void poly1305_squeeze(unsigned char h[17], unsigned long hr[17]) { + unsigned long u; + unsigned int i; + u = 0; + for (i = 0; i < 16; i++) { + u += hr[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } + u += hr[16]; + h[16] = (unsigned char) u & 0x03; + u >>= 2; + u += (u << 2); /* u *= 5; */ + for (i = 0; i < 16; i++) { + u += h[i]; + h[i] = (unsigned char) u & 0xff; + u >>= 8; + } + h[16] += (unsigned char) u; +} + +static void poly1305_freeze(unsigned char h[17]) { + const unsigned char minusp[17] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfc}; + unsigned char horig[17], negative; + unsigned int i; + + /* compute h + -p */ + for (i = 0; i < 17; i++) horig[i] = h[i]; + poly1305_add(h, minusp); + + /* select h if h < p, or h + -p if h >= p */ + negative = -(h[16] >> 7); + for (i = 0; i < 17; i++) h[i] ^= negative & (horig[i] ^ h[i]); +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned char hibit = st->final ^ 1; /* 1 << 128 */ + + while (bytes >= poly1305_block_size) { + unsigned long hr[17], u; + unsigned char c[17]; + unsigned int i, j; + + /* h += m */ + for (i = 0; i < 16; i++) c[i] = m[i]; + c[16] = hibit; + poly1305_add(st->h, c); + + /* h *= r */ + for (i = 0; i < 17; i++) { + u = 0; + for (j = 0; j <= i; j++) { + u += (unsigned short) st->h[j] * st->r[i - j]; + } + for (j = i + 1; j < 17; j++) { + unsigned long v = (unsigned short) st->h[j] * st->r[i + 17 - j]; + v = ((v << 8) + (v << 6)); /* v *= (5 << 6); */ + u += v; + } + hr[i] = u; + } + + /* (partial) h %= p */ + poly1305_squeeze(st->h, hr); + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully reduce h */ + poly1305_freeze(st->h); + + /* h = (h + pad) % (1 << 128) */ + poly1305_add(st->h, st->pad); + for (i = 0; i < 16; i++) mac[i] = st->h[i]; + + /* zero out the state */ + for (i = 0; i < 17; i++) st->h[i] = 0; + for (i = 0; i < 17; i++) st->r[i] = 0; + for (i = 0; i < 17; i++) st->pad[i] = 0; +} +#elif defined(POLY1305_16BIT) +/* + poly1305 implementation using 16 bit * 16 bit = 32 bit multiplication +and 32 bit addition static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 18*sizeof(unsigned short) */ +typedef struct poly1305_state_internal_t { + unsigned char buffer[poly1305_block_size]; + size_t leftover; + unsigned short r[10]; + unsigned short h[10]; + unsigned short pad[8]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret two 8 bit unsigned integers as a 16 bit unsigned integer in little + * endian */ +static unsigned short U8TO16(const unsigned char *p) { + return (((unsigned short) (p[0] & 0xff)) | + ((unsigned short) (p[1] & 0xff) << 8)); +} + +/* store a 16 bit unsigned integer as two 8 bit unsigned integers in little + * endian */ +static void U16TO8(unsigned char *p, unsigned short v) { + p[0] = (v) & 0xff; + p[1] = (v >> 8) & 0xff; +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned short t0, t1, t2, t3, t4, t5, t6, t7; + size_t i; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO16(&key[0]); + st->r[0] = (t0) & 0x1fff; + t1 = U8TO16(&key[2]); + st->r[1] = ((t0 >> 13) | (t1 << 3)) & 0x1fff; + t2 = U8TO16(&key[4]); + st->r[2] = ((t1 >> 10) | (t2 << 6)) & 0x1f03; + t3 = U8TO16(&key[6]); + st->r[3] = ((t2 >> 7) | (t3 << 9)) & 0x1fff; + t4 = U8TO16(&key[8]); + st->r[4] = ((t3 >> 4) | (t4 << 12)) & 0x00ff; + st->r[5] = ((t4 >> 1)) & 0x1ffe; + t5 = U8TO16(&key[10]); + st->r[6] = ((t4 >> 14) | (t5 << 2)) & 0x1fff; + t6 = U8TO16(&key[12]); + st->r[7] = ((t5 >> 11) | (t6 << 5)) & 0x1f81; + t7 = U8TO16(&key[14]); + st->r[8] = ((t6 >> 8) | (t7 << 8)) & 0x1fff; + st->r[9] = ((t7 >> 5)) & 0x007f; + + /* h = 0 */ + for (i = 0; i < 10; i++) st->h[i] = 0; + + /* save pad for later */ + for (i = 0; i < 8; i++) st->pad[i] = U8TO16(&key[16 + (2 * i)]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned short hibit = (st->final) ? 0 : (1 << 11); /* 1 << 128 */ + unsigned short t0, t1, t2, t3, t4, t5, t6, t7; + unsigned long d[10]; + unsigned long c; + + while (bytes >= poly1305_block_size) { + size_t i, j; + + /* h += m[i] */ + t0 = U8TO16(&m[0]); + st->h[0] += (t0) & 0x1fff; + t1 = U8TO16(&m[2]); + st->h[1] += ((t0 >> 13) | (t1 << 3)) & 0x1fff; + t2 = U8TO16(&m[4]); + st->h[2] += ((t1 >> 10) | (t2 << 6)) & 0x1fff; + t3 = U8TO16(&m[6]); + st->h[3] += ((t2 >> 7) | (t3 << 9)) & 0x1fff; + t4 = U8TO16(&m[8]); + st->h[4] += ((t3 >> 4) | (t4 << 12)) & 0x1fff; + st->h[5] += ((t4 >> 1)) & 0x1fff; + t5 = U8TO16(&m[10]); + st->h[6] += ((t4 >> 14) | (t5 << 2)) & 0x1fff; + t6 = U8TO16(&m[12]); + st->h[7] += ((t5 >> 11) | (t6 << 5)) & 0x1fff; + t7 = U8TO16(&m[14]); + st->h[8] += ((t6 >> 8) | (t7 << 8)) & 0x1fff; + st->h[9] += ((t7 >> 5)) | hibit; + + /* h *= r, (partial) h %= p */ + for (i = 0, c = 0; i < 10; i++) { + d[i] = c; + for (j = 0; j < 10; j++) { + d[i] += (unsigned long) st->h[j] * + ((j <= i) ? st->r[i - j] : (5 * st->r[i + 10 - j])); + /* Sum(h[i] * r[i] * 5) will overflow slightly above 6 products with an + * unclamped r, so carry at 5 */ + if (j == 4) { + c = (d[i] >> 13); + d[i] &= 0x1fff; + } + } + c += (d[i] >> 13); + d[i] &= 0x1fff; + } + c = ((c << 2) + c); /* c *= 5 */ + c += d[0]; + d[0] = ((unsigned short) c & 0x1fff); + c = (c >> 13); + d[1] += c; + + for (i = 0; i < 10; i++) st->h[i] = (unsigned short) d[i]; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned short c; + unsigned short g[10]; + unsigned short mask; + unsigned long f; + size_t i; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + c = st->h[1] >> 13; + st->h[1] &= 0x1fff; + for (i = 2; i < 10; i++) { + st->h[i] += c; + c = st->h[i] >> 13; + st->h[i] &= 0x1fff; + } + st->h[0] += (c * 5); + c = st->h[0] >> 13; + st->h[0] &= 0x1fff; + st->h[1] += c; + c = st->h[1] >> 13; + st->h[1] &= 0x1fff; + st->h[2] += c; + + /* compute h + -p */ + g[0] = st->h[0] + 5; + c = g[0] >> 13; + g[0] &= 0x1fff; + for (i = 1; i < 10; i++) { + g[i] = st->h[i] + c; + c = g[i] >> 13; + g[i] &= 0x1fff; + } + + /* select h if h < p, or h + -p if h >= p */ + mask = (c ^ 1) - 1; + for (i = 0; i < 10; i++) g[i] &= mask; + mask = ~mask; + for (i = 0; i < 10; i++) st->h[i] = (st->h[i] & mask) | g[i]; + + /* h = h % (2^128) */ + st->h[0] = ((st->h[0]) | (st->h[1] << 13)) & 0xffff; + st->h[1] = ((st->h[1] >> 3) | (st->h[2] << 10)) & 0xffff; + st->h[2] = ((st->h[2] >> 6) | (st->h[3] << 7)) & 0xffff; + st->h[3] = ((st->h[3] >> 9) | (st->h[4] << 4)) & 0xffff; + st->h[4] = ((st->h[4] >> 12) | (st->h[5] << 1) | (st->h[6] << 14)) & 0xffff; + st->h[5] = ((st->h[6] >> 2) | (st->h[7] << 11)) & 0xffff; + st->h[6] = ((st->h[7] >> 5) | (st->h[8] << 8)) & 0xffff; + st->h[7] = ((st->h[8] >> 8) | (st->h[9] << 5)) & 0xffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long) st->h[0] + st->pad[0]; + st->h[0] = (unsigned short) f; + for (i = 1; i < 8; i++) { + f = (unsigned long) st->h[i] + st->pad[i] + (f >> 16); + st->h[i] = (unsigned short) f; + } + + for (i = 0; i < 8; i++) U16TO8(mac + (i * 2), st->h[i]); + + /* zero out the state */ + for (i = 0; i < 10; i++) st->h[i] = 0; + for (i = 0; i < 10; i++) st->r[i] = 0; + for (i = 0; i < 8; i++) st->pad[i] = 0; +} +#elif defined(POLY1305_32BIT) || \ + (!defined(POLY1305_64BIT) && defined(__GUESS32)) +/* + poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication +and 64 bit addition static */ + +#if defined(_MSC_VER) +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#define POLY1305_NOINLINE __attribute__((noinline)) +#else +#define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */ +typedef struct poly1305_state_internal_t { + unsigned long r[5]; + unsigned long h[5]; + unsigned long pad[4]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little + * endian */ +static unsigned long U8TO32(const unsigned char *p) { + return (((unsigned long) (p[0] & 0xff)) | + ((unsigned long) (p[1] & 0xff) << 8) | + ((unsigned long) (p[2] & 0xff) << 16) | + ((unsigned long) (p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little + * endian */ +static void U32TO8(unsigned char *p, unsigned long v) { + p[0] = (unsigned char) ((v) & 0xff); + p[1] = (unsigned char) ((v >> 8) & 0xff); + p[2] = (unsigned char) ((v >> 16) & 0xff); + p[3] = (unsigned char) ((v >> 24) & 0xff); +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[0])) & 0x3ffffff; + st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned long hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */ + unsigned long r0, r1, r2, r3, r4; + unsigned long s1, s2, s3, s4; + unsigned long h0, h1, h2, h3, h4; + unsigned long long d0, d1, d2, d3, d4; + unsigned long c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= poly1305_block_size) { + /* h += m[i] */ + h0 += (U8TO32(m + 0)) & 0x3ffffff; + h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m + 12) >> 8) | hibit; + + /* h *= r */ + d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) + + ((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) + + ((unsigned long long) h4 * s1); + d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) + + ((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) + + ((unsigned long long) h4 * s2); + d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) + + ((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) + + ((unsigned long long) h4 * s3); + d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) + + ((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) + + ((unsigned long long) h4 * s4); + d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) + + ((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) + + ((unsigned long long) h4 * r0); + + /* (partial) h %= p */ + c = (unsigned long) (d0 >> 26); + h0 = (unsigned long) d0 & 0x3ffffff; + d1 += c; + c = (unsigned long) (d1 >> 26); + h1 = (unsigned long) d1 & 0x3ffffff; + d2 += c; + c = (unsigned long) (d2 >> 26); + h2 = (unsigned long) d2 & 0x3ffffff; + d3 += c; + c = (unsigned long) (d3 >> 26); + h3 = (unsigned long) d3 & 0x3ffffff; + d4 += c; + c = (unsigned long) (d4 >> 26); + h4 = (unsigned long) d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long h0, h1, h2, h3, h4, c; + unsigned long g0, g1, g2, g3, g4; + unsigned long long f; + unsigned long mask; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; + h1 = h1 & 0x3ffffff; + h2 += c; + c = h2 >> 26; + h2 = h2 & 0x3ffffff; + h3 += c; + c = h3 >> 26; + h3 = h3 & 0x3ffffff; + h4 += c; + c = h4 >> 26; + h4 = h4 & 0x3ffffff; + h0 += c * 5; + c = h0 >> 26; + h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 26; + g0 &= 0x3ffffff; + g1 = h1 + c; + c = g1 >> 26; + g1 &= 0x3ffffff; + g2 = h2 + c; + c = g2 >> 26; + g2 &= 0x3ffffff; + g3 = h3 + c; + c = g3 >> 26; + g3 &= 0x3ffffff; + g4 = h4 + c - (1UL << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long long) h0 + st->pad[0]; + h0 = (unsigned long) f; + f = (unsigned long long) h1 + st->pad[1] + (f >> 32); + h1 = (unsigned long) f; + f = (unsigned long long) h2 + st->pad[2] + (f >> 32); + h2 = (unsigned long) f; + f = (unsigned long long) h3 + st->pad[3] + (f >> 32); + h3 = (unsigned long) f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} + +#else +/* + poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication +and 128 bit addition static */ + +#if defined(_MSC_VER) + +typedef struct uint128_t { + uint64_t lo; + uint64_t hi; +} uint128_t; + +#define MUL128(out, x, y) out.lo = _umul128((x), (y), &out.hi) +#define ADD(out, in) \ + { \ + unsigned long long t = out.lo; \ + out.lo += in.lo; \ + out.hi += (out.lo < t) + in.hi; \ + } +#define ADDLO(out, in) \ + { \ + unsigned long long t = out.lo; \ + out.lo += in; \ + out.hi += (out.lo < t); \ + } +#define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift))) +#define LO(in) (in.lo) + +#define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +#if defined(__SIZEOF_INT128__) +// Get rid of GCC warning "ISO C does not support '__int128' types" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef unsigned __int128 uint128_t; +#pragma GCC diagnostic pop +#else +typedef unsigned uint128_t __attribute__((mode(TI))); +#endif + +#define MUL128(out, x, y) out = ((uint128_t) x * y) +#define ADD(out, in) out += in +#define ADDLO(out, in) out += in +#define SHR(in, shift) (unsigned long long) (in >> (shift)) +#define LO(in) (unsigned long long) (in) + +#define POLY1305_NOINLINE __attribute__((noinline)) +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 8*sizeof(unsigned long long) */ +typedef struct poly1305_state_internal_t { + unsigned long long r[3]; + unsigned long long h[3]; + unsigned long long pad[2]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in + * little endian */ +static unsigned long long U8TO64(const unsigned char *p) { + return (((unsigned long long) (p[0] & 0xff)) | + ((unsigned long long) (p[1] & 0xff) << 8) | + ((unsigned long long) (p[2] & 0xff) << 16) | + ((unsigned long long) (p[3] & 0xff) << 24) | + ((unsigned long long) (p[4] & 0xff) << 32) | + ((unsigned long long) (p[5] & 0xff) << 40) | + ((unsigned long long) (p[6] & 0xff) << 48) | + ((unsigned long long) (p[7] & 0xff) << 56)); +} + +/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little + * endian */ +static void U64TO8(unsigned char *p, unsigned long long v) { + p[0] = (unsigned char) ((v) & 0xff); + p[1] = (unsigned char) ((v >> 8) & 0xff); + p[2] = (unsigned char) ((v >> 16) & 0xff); + p[3] = (unsigned char) ((v >> 24) & 0xff); + p[4] = (unsigned char) ((v >> 32) & 0xff); + p[5] = (unsigned char) ((v >> 40) & 0xff); + p[6] = (unsigned char) ((v >> 48) & 0xff); + p[7] = (unsigned char) ((v >> 56) & 0xff); +} + +static void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long long t0, t1; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO64(&key[0]); + t1 = U8TO64(&key[8]); + + st->r[0] = (t0) & 0xffc0fffffff; + st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; + st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + + /* save pad for later */ + st->pad[0] = U8TO64(&key[16]); + st->pad[1] = U8TO64(&key[24]); + + st->leftover = 0; + st->final = 0; +} + +static void poly1305_blocks(poly1305_state_internal_t *st, + const unsigned char *m, size_t bytes) { + const unsigned long long hibit = + (st->final) ? 0 : ((unsigned long long) 1 << 40); /* 1 << 128 */ + unsigned long long r0, r1, r2; + unsigned long long s1, s2; + unsigned long long h0, h1, h2; + unsigned long long c; + uint128_t d0, d1, d2, d; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + s1 = r1 * (5 << 2); + s2 = r2 * (5 << 2); + + while (bytes >= poly1305_block_size) { + unsigned long long t0, t1; + + /* h += m[i] */ + t0 = U8TO64(&m[0]); + t1 = U8TO64(&m[8]); + + h0 += ((t0) & 0xfffffffffff); + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); + h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit; + + /* h *= r */ + MUL128(d0, h0, r0); + MUL128(d, h1, s2); + ADD(d0, d); + MUL128(d, h2, s1); + ADD(d0, d); + MUL128(d1, h0, r1); + MUL128(d, h1, r0); + ADD(d1, d); + MUL128(d, h2, s2); + ADD(d1, d); + MUL128(d2, h0, r2); + MUL128(d, h1, r1); + ADD(d2, d); + MUL128(d, h2, r0); + ADD(d2, d); + + /* (partial) h %= p */ + c = SHR(d0, 44); + h0 = LO(d0) & 0xfffffffffff; + ADDLO(d1, c); + c = SHR(d1, 44); + h1 = LO(d1) & 0xfffffffffff; + ADDLO(d2, c); + c = SHR(d2, 42); + h2 = LO(d2) & 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 = h0 & 0xfffffffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; +} + +static POLY1305_NOINLINE void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + unsigned long long h0, h1, h2, c; + unsigned long long g0, g1, g2; + unsigned long long t0, t1; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i] = 1; + for (i = i + 1; i < poly1305_block_size; i++) st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += c; + c = (h2 >> 42); + h2 &= 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += c; + c = (h2 >> 42); + h2 &= 0x3ffffffffff; + h0 += c * 5; + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = (g0 >> 44); + g0 &= 0xfffffffffff; + g1 = h1 + c; + c = (g1 >> 44); + g1 &= 0xfffffffffff; + g2 = h2 + c - ((unsigned long long) 1 << 42); + + /* select h if h < p, or h + -p if h >= p */ + c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1; + g0 &= c; + g1 &= c; + g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + /* h = (h + pad) */ + t0 = st->pad[0]; + t1 = st->pad[1]; + + h0 += ((t0) & 0xfffffffffff); + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += (((t1 >> 24)) & 0x3ffffffffff) + c; + h2 &= 0x3ffffffffff; + + /* mac = h % (2^128) */ + h0 = ((h0) | (h1 << 44)); + h1 = ((h1 >> 20) | (h2 << 24)); + + U64TO8(&mac[0], h0); + U64TO8(&mac[8], h1); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->pad[0] = 0; + st->pad[1] = 0; +} + +#endif + +static void poly1305_update(poly1305_context *ctx, const unsigned char *m, + size_t bytes) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *) ctx; + size_t i; + + /* handle leftover */ + if (st->leftover) { + size_t want = (poly1305_block_size - st->leftover); + if (want > bytes) want = bytes; + for (i = 0; i < want; i++) st->buffer[st->leftover + i] = m[i]; + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < poly1305_block_size) return; + poly1305_blocks(st, st->buffer, poly1305_block_size); + st->leftover = 0; + } + + /* process full blocks */ + if (bytes >= poly1305_block_size) { + size_t want = (bytes & (size_t) ~(poly1305_block_size - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) st->buffer[st->leftover + i] = m[i]; + st->leftover += bytes; + } +} + +// ******* END: poly1305-donna.c ******** +// ******* BEGIN: portable8439.c ******** + +#define __CHACHA20_BLOCK_SIZE (64) +#define __POLY1305_KEY_SIZE (32) + +static PORTABLE_8439_DECL uint8_t __ZEROES[16] = {0}; +static PORTABLE_8439_DECL void pad_if_needed(poly1305_context *ctx, + size_t size) { + size_t padding = size % 16; + if (padding != 0) { + poly1305_update(ctx, __ZEROES, 16 - padding); + } +} + +#define __u8(v) ((uint8_t) ((v) & 0xFF)) + +// TODO: make this depending on the unaligned/native read size possible +static PORTABLE_8439_DECL void write_64bit_int(poly1305_context *ctx, + uint64_t value) { + uint8_t result[8]; + result[0] = __u8(value); + result[1] = __u8(value >> 8); + result[2] = __u8(value >> 16); + result[3] = __u8(value >> 24); + result[4] = __u8(value >> 32); + result[5] = __u8(value >> 40); + result[6] = __u8(value >> 48); + result[7] = __u8(value >> 56); + poly1305_update(ctx, result, 8); +} + +static PORTABLE_8439_DECL void poly1305_calculate_mac( + uint8_t *mac, const uint8_t *cipher_text, size_t cipher_text_size, + const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *ad, + size_t ad_size) { + // init poly key (section 2.6) + uint8_t poly_key[__POLY1305_KEY_SIZE] = {0}; + rfc8439_keygen(poly_key, key, nonce); + // start poly1305 mac + poly1305_context poly_ctx; + poly1305_init(&poly_ctx, poly_key); + + if (ad != NULL && ad_size > 0) { + // write AD if present + poly1305_update(&poly_ctx, ad, ad_size); + pad_if_needed(&poly_ctx, ad_size); + } + + // now write the cipher text + poly1305_update(&poly_ctx, cipher_text, cipher_text_size); + pad_if_needed(&poly_ctx, cipher_text_size); + + // write sizes + write_64bit_int(&poly_ctx, ad_size); + write_64bit_int(&poly_ctx, cipher_text_size); + + // calculate MAC + poly1305_finish(&poly_ctx, mac); +} + +#define PM(p) ((size_t) (p)) + +// pointers overlap if the smaller either ahead of the end, +// or its end is before the start of the other +// +// s_size should be smaller or equal to b_size +#define OVERLAPPING(s, s_size, b, b_size) \ + (PM(s) < PM((b) + (b_size))) && (PM(b) < PM((s) + (s_size))) + +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_encrypt( + uint8_t *restrict cipher_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *restrict ad, + size_t ad_size, const uint8_t *restrict plain_text, + size_t plain_text_size) { + size_t new_size = plain_text_size + RFC_8439_TAG_SIZE; + if (OVERLAPPING(plain_text, plain_text_size, cipher_text, new_size)) { + return (size_t) -1; + } + chacha20_xor_stream(cipher_text, plain_text, plain_text_size, key, nonce, 1); + poly1305_calculate_mac(cipher_text + plain_text_size, cipher_text, + plain_text_size, key, nonce, ad, ad_size); + return new_size; +} + +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_decrypt( + uint8_t *restrict plain_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], + const uint8_t *restrict cipher_text, size_t cipher_text_size) { + // first we calculate the mac and see if it lines up, only then do we decrypt + size_t actual_size = cipher_text_size - RFC_8439_TAG_SIZE; + if (OVERLAPPING(plain_text, actual_size, cipher_text, cipher_text_size)) { + return (size_t) -1; + } + + chacha20_xor_stream(plain_text, cipher_text, actual_size, key, nonce, 1); + return actual_size; +} +// ******* END: portable8439.c ******** +#endif // MG_TLS == MG_TLS_BUILTIN diff --git a/src/tls_chacha20.h b/src/tls_chacha20.h new file mode 100644 index 00000000..4071a199 --- /dev/null +++ b/src/tls_chacha20.h @@ -0,0 +1,110 @@ +// portable8439 v1.0.1 +// Source: https://github.com/DavyLandman/portable8439 +// Licensed under CC0-1.0 +// Contains poly1305-donna e6ad6e091d30d7f4ec2d4f978be1fcfcbce72781 (Public +// Domain) + +#include "arch.h" +#include "config.h" + +#ifndef __PORTABLE_8439_H +#define __PORTABLE_8439_H +#if defined(__cplusplus) +extern "C" { +#endif + +// provide your own decl specificier like -DPORTABLE_8439_DECL=ICACHE_RAM_ATTR +#ifndef PORTABLE_8439_DECL +#define PORTABLE_8439_DECL +#endif + +/* + This library implements RFC 8439 a.k.a. ChaCha20-Poly1305 AEAD + + You can use this library to avoid attackers mutating or reusing your + encrypted messages. This does assume you never reuse a nonce+key pair and, + if possible, carefully pick your associated data. +*/ + +// Make sure we are either nested in C++ or running in a C99+ compiler +#if !defined(__cplusplus) && !defined(_MSC_VER) && \ + (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) +#error "C99 or newer required" +#endif + +// #if CHAR_BIT > 8 +// # error "Systems without native octals not suppoted" +// #endif + +#if defined(_MSC_VER) || defined(__cplusplus) +// add restrict support is possible +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || defined(__clang__) || \ + defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif +#endif + +#define RFC_8439_TAG_SIZE (16) +#define RFC_8439_KEY_SIZE (32) +#define RFC_8439_NONCE_SIZE (12) + +/* + Encrypt/Seal plain text bytes into a cipher text that can only be + decrypted by knowing the key, nonce and associated data. + + input: + - key: RFC_8439_KEY_SIZE bytes that all parties have agreed + upon beforehand + - nonce: RFC_8439_NONCE_SIZE bytes that should never be repeated + for the same key. A counter or a pseudo-random value are fine. + - ad: associated data to include with calculating the tag of the + cipher text. Can be null for empty. + - plain_text: data to be encrypted, pointer + size should not overlap + with cipher_text pointer + + output: + - cipher_text: encrypted plain_text with a tag appended. Make sure to + allocate at least plain_text_size + RFC_8439_TAG_SIZE + + returns: + - size of bytes written to cipher_text, can be -1 if overlapping + pointers are passed for plain_text and cipher_text +*/ +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_encrypt( + uint8_t *restrict cipher_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], const uint8_t *restrict ad, + size_t ad_size, const uint8_t *restrict plain_text, size_t plain_text_size); + +/* + Decrypt/unseal cipher text given the right key, nonce, and additional data. + + input: + - key: RFC_8439_KEY_SIZE bytes that all parties have agreed + upon beforehand + - nonce: RFC_8439_NONCE_SIZE bytes that should never be repeated for + the same key. A counter or a pseudo-random value are fine. + - ad: associated data to include with calculating the tag of the + cipher text. Can be null for empty. + - cipher_text: encrypted message. + + output: + - plain_text: data to be encrypted, pointer + size should not overlap + with cipher_text pointer, leave at least enough room for + cipher_text_size - RFC_8439_TAG_SIZE + + returns: + - size of bytes written to plain_text, -1 signals either: + - incorrect key/nonce/ad + - corrupted cipher_text + - overlapping pointers are passed for plain_text and cipher_text +*/ +PORTABLE_8439_DECL size_t mg_chacha20_poly1305_decrypt( + uint8_t *restrict plain_text, const uint8_t key[RFC_8439_KEY_SIZE], + const uint8_t nonce[RFC_8439_NONCE_SIZE], + const uint8_t *restrict cipher_text, size_t cipher_text_size); +#if defined(__cplusplus) +} +#endif +#endif diff --git a/src/tls_openssl.c b/src/tls_openssl.c index 9d73aa9f..38d8a6f4 100644 --- a/src/tls_openssl.c +++ b/src/tls_openssl.c @@ -93,6 +93,19 @@ static int mg_bio_write(BIO *bio, const char *buf, int len) { return len; } +#ifdef MG_TLS_SSLKEYLOGFILE +static void ssl_keylog_cb(const SSL *ssl, const char *line) { + char *keylogfile = getenv("SSLKEYLOGFILE"); + if (keylogfile == NULL) { + return; + } + FILE *f = fopen(keylogfile, "a"); + fprintf(f, "%s\n", line); + fflush(f); + fclose(f); +} +#endif + void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { struct mg_tls *tls = (struct mg_tls *) calloc(1, sizeof(*tls)); const char *id = "mongoose"; @@ -112,6 +125,9 @@ void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { MG_DEBUG(("%lu Setting TLS", c->id)); tls->ctx = c->is_client ? SSL_CTX_new(SSLv23_client_method()) : SSL_CTX_new(SSLv23_server_method()); +#if MG_TLS_SSLKEYLOGFILE + SSL_CTX_set_keylog_callback(tls->ctx, ssl_keylog_cb); +#endif if ((tls->ssl = SSL_new(tls->ctx)) == NULL) { mg_error(c, "SSL_new"); goto fail; diff --git a/test/Makefile b/test/Makefile index fca1c48f..60a764f0 100644 --- a/test/Makefile +++ b/test/Makefile @@ -202,7 +202,7 @@ mongoose.c: Makefile $(wildcard ../src/*.c) $(wildcard ../src/drivers/*.c) cd .. && (export LC_ALL=C ; cat src/license.h; echo; echo '#include "mongoose.h"' ; (for F in src/*.c src/drivers/*.c ; do echo; echo '#ifdef MG_ENABLE_LINES'; echo "#line 1 \"$$F\""; echo '#endif'; cat $$F | sed -e 's,#include ".*,,'; done))> $@ mongoose.h: $(HDRS) Makefile - cd .. && (cat src/license.h; echo; echo '#ifndef MONGOOSE_H'; echo '#define MONGOOSE_H'; echo; cat src/version.h ; echo; echo '#ifdef __cplusplus'; echo 'extern "C" {'; echo '#endif'; cat src/arch.h src/arch_*.h src/net_ft.h src/net_lwip.h src/net_rl.h src/config.h src/str.h src/queue.h src/fmt.h src/printf.h src/log.h src/timer.h src/fs.h src/util.h src/url.h src/iobuf.h src/base64.h src/md5.h src/sha1.h src/sha256.h src/tls_x25519.h src/tls_aes128.h src/tls_uecc.h src/event.h src/net.h src/http.h src/ssi.h src/tls.h src/tls_mbed.h src/tls_openssl.h src/ws.h src/sntp.h src/mqtt.h src/dns.h src/json.h src/rpc.h src/ota.h src/device.h src/net_builtin.h src/profile.h src/drivers/*.h | sed -e '/keep/! s,#include ".*,,' -e 's,^#pragma once,,'; echo; echo '#ifdef __cplusplus'; echo '}'; echo '#endif'; echo '#endif // MONGOOSE_H')> $@ + cd .. && (cat src/license.h; echo; echo '#ifndef MONGOOSE_H'; echo '#define MONGOOSE_H'; echo; cat src/version.h ; echo; echo '#ifdef __cplusplus'; echo 'extern "C" {'; echo '#endif'; cat src/arch.h src/arch_*.h src/net_ft.h src/net_lwip.h src/net_rl.h src/config.h src/str.h src/queue.h src/fmt.h src/printf.h src/log.h src/timer.h src/fs.h src/util.h src/url.h src/iobuf.h src/base64.h src/md5.h src/sha1.h src/sha256.h src/tls_x25519.h src/tls_aes128.h src/tls_uecc.h src/tls_chacha20.h src/event.h src/net.h src/http.h src/ssi.h src/tls.h src/tls_mbed.h src/tls_openssl.h src/ws.h src/sntp.h src/mqtt.h src/dns.h src/json.h src/rpc.h src/ota.h src/device.h src/net_builtin.h src/profile.h src/drivers/*.h | sed -e '/keep/! s,#include ".*,,' -e 's,^#pragma once,,'; echo; echo '#ifdef __cplusplus'; echo '}'; echo '#endif'; echo '#endif // MONGOOSE_H')> $@ clean: clean_examples clean_refprojs clean_tutorials clean_examples_embedded diff --git a/test/unit_test.c b/test/unit_test.c index 8e90f706..e23d5ae9 100644 --- a/test/unit_test.c +++ b/test/unit_test.c @@ -2361,7 +2361,8 @@ static void test_util(void) { { uint32_t val, max = (uint32_t) -1; ASSERT(mg_str_to_num(mg_str("123"), 10, &val, sizeof(uint32_t)) && val == 123); - mg_snprintf(buf, sizeof(buf), "%lu", max); + mg_snprintf(buf, sizeof(buf), "%lu", (unsigned long) max); + ASSERT(strcmp(buf, "4294967295") == 0); ASSERT(mg_str_to_num(mg_str(buf), 10, &val, sizeof(uint32_t)) && val == max); ASSERT(mg_str_to_num(mg_str("01111011"), 2, &val, sizeof(uint32_t)) && val == 123); ASSERT(mg_str_to_num(mg_str("11111111111111111111111111111111"), 2, &val, sizeof(uint32_t)) && val == max);