Only shift unsigned values to avoid implementation-specific behavior.

This converts the arithmetic shifts to logical shifts. It is based in part on a stackoverflow answer by John Schultz, https://stackoverflow.com/questions/4533076/google-protocol-buffers-zigzag-encoding
2024-12-27 13:31:02 +08:00 · 2022-06-06 13:57:38 -06:00 · 2022-06-06 13:57:38 -06:00 · 6e389ce2c3
commit 6e389ce2c3
parent f224ab2eeb
1 changed files with 11 additions and 12 deletions
--- a/protobuf-c/protobuf-c.c
+++ b/protobuf-c/protobuf-c.c
@ -316,9 +316,8 @@ int32_size(int32_t v)
 static inline uint32_t
 zigzag32(int32_t v)
 {
-	// Note:  the right-shift must be arithmetic
-	// Note:  left shift must be unsigned because of overflow
-	return ((uint32_t)(v) << 1) ^ (uint32_t)(v >> 31);
+	// Note:  Using unsigned types prevents undefined behavior
+	return ((uint32_t)v << 1) ^ -((uint32_t)v >> 31);
 }

 /**
@ -380,9 +379,8 @@ uint64_size(uint64_t v)
 static inline uint64_t
 zigzag64(int64_t v)
 {
-	// Note:  the right-shift must be arithmetic
-	// Note:  left shift must be unsigned because of overflow
-	return ((uint64_t)(v) << 1) ^ (uint64_t)(v >> 63);
+	// Note:  Using unsigned types prevents undefined behavior
+	return ((uint64_t)v << 1) ^ -((uint64_t)v >> 63);
 }

 /**
@ -802,7 +800,8 @@ uint32_pack(uint32_t value, uint8_t *out)
 }

 /**
- * Pack a signed 32-bit integer and return the number of bytes written.
+ * Pack a signed 32-bit integer and return the number of bytes written,
+ * passed as unsigned to avoid implementation-specific behavior.
 * Negative numbers are encoded as two's complement 64-bit integers.
 *
 * \param value
@ -813,14 +812,14 @@ uint32_pack(uint32_t value, uint8_t *out)
 *      Number of bytes written to `out`.
 */
 static inline size_t
-int32_pack(int32_t value, uint8_t *out)
+int32_pack(uint32_t value, uint8_t *out)
 {
-	if (value < 0) {
+	if ((int32_t)value < 0) {
 		out[0] = value | 0x80;
 		out[1] = (value >> 7) | 0x80;
 		out[2] = (value >> 14) | 0x80;
 		out[3] = (value >> 21) | 0x80;
-		out[4] = (value >> 28) | 0x80;
+		out[4] = (value >> 28) | 0xf0;
 		out[5] = out[6] = out[7] = out[8] = 0xff;
 		out[9] = 0x01;
 		return 10;
@ -2425,7 +2424,7 @@ static inline int32_t
 unzigzag32(uint32_t v)
 {
 	// Note:  Using unsigned types prevents undefined behavior
-	return (int32_t)((v >> 1) ^ (~(v & 1) + 1));
+	return (int32_t)((v >> 1) ^ -(v & 1));
 }

 static inline uint32_t
@ -2467,7 +2466,7 @@ static inline int64_t
 unzigzag64(uint64_t v)
 {
 	// Note:  Using unsigned types prevents undefined behavior
-	return (int64_t)((v >> 1) ^ (~(v & 1) + 1));
+	return (int64_t)((v >> 1) ^ -(v & 1));
 }

 static inline uint64_t