Optimize HTTP2 HPack huffman decoding (#43603)

Optimized to use 8 bits lookup tables tree with result of about 0.35 CPU utilization as oppose to former version.
Decoding table is lazy generated as ushort[].
This commit is contained in:
Roman Konecny 2020-11-27 10:02:08 +01:00 committed by GitHub
parent be0efd3584
commit 7171407822
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 335 additions and 153 deletions

3
.gitignore vendored
View File

@ -133,6 +133,9 @@ AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# BenchmarkDotNet
BenchmarkDotNet.Artifacts/
# Installshield output folder
[Ee]xpress/

View File

@ -1,13 +1,15 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
#nullable enable
using System.Diagnostics;
using System.Threading;
namespace System.Net.Http.HPack
{
internal class Huffman
internal static class Huffman
{
// TODO: this can be constructed from _decodingTable
// HPack static huffman code. see: https://httpwg.org/specs/rfc7541.html#huffman.code
private static readonly (uint code, int bitLength)[] _encodingTable = new (uint code, int bitLength)[]
{
(0b11111111_11000000_00000000_00000000, 13),
@ -269,36 +271,129 @@ namespace System.Net.Http.HPack
(0b11111111_11111111_11111111_11111100, 30)
};
private static readonly (int codeLength, int[] codes)[] _decodingTable = new[]
{
(5, new[] { 48, 49, 50, 97, 99, 101, 105, 111, 115, 116 }),
(6, new[] { 32, 37, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 61, 65, 95, 98, 100, 102, 103, 104, 108, 109, 110, 112, 114, 117 }),
(7, new[] { 58, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 89, 106, 107, 113, 118, 119, 120, 121, 122 }),
(8, new[] { 38, 42, 44, 59, 88, 90 }),
(10, new[] { 33, 34, 40, 41, 63 }),
(11, new[] { 39, 43, 124 }),
(12, new[] { 35, 62 }),
(13, new[] { 0, 36, 64, 91, 93, 126 }),
(14, new[] { 94, 125 }),
(15, new[] { 60, 96, 123 }),
(19, new[] { 92, 195, 208 }),
(20, new[] { 128, 130, 131, 162, 184, 194, 224, 226 }),
(21, new[] { 153, 161, 167, 172, 176, 177, 179, 209, 216, 217, 227, 229, 230 }),
(22, new[] { 129, 132, 133, 134, 136, 146, 154, 156, 160, 163, 164, 169, 170, 173, 178, 181, 185, 186, 187, 189, 190, 196, 198, 228, 232, 233 }),
(23, new[] { 1, 135, 137, 138, 139, 140, 141, 143, 147, 149, 150, 151, 152, 155, 157, 158, 165, 166, 168, 174, 175, 180, 182, 183, 188, 191, 197, 231, 239 }),
(24, new[] { 9, 142, 144, 145, 148, 159, 171, 206, 215, 225, 236, 237 }),
(25, new[] { 199, 207, 234, 235 }),
(26, new[] { 192, 193, 200, 201, 202, 205, 210, 213, 218, 219, 238, 240, 242, 243, 255 }),
(27, new[] { 203, 204, 211, 212, 214, 221, 222, 223, 241, 244, 245, 246, 247, 248, 250, 251, 252, 253, 254 }),
(28, new[] { 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 220, 249 }),
(30, new[] { 10, 13, 22, 256 })
};
private static readonly ushort[] s_decodingTree = GenerateDecodingLookupTree();
public static (uint encoded, int bitLength) Encode(int data)
{
return _encodingTable[data];
}
private static ushort[] GenerateDecodingLookupTree()
{
// Decoding lookup tree is a tree of 8 bit lookup tables stored in
// one dimensional array of ushort to reduce allocations.
// First 256 ushort is lookup table with index 0, next 256 ushort is lookup table with index 1, etc...
// lookup_value = [(lookup_table_index << 8) + lookup_index]
// lookup_index is next 8 bits of huffman code, if there is less than 8 bits in source.
// lookup_index MUST be aligned to 8 bits with LSB bits set to anything (zeros are recommended).
// Lookup value is encoded in ushort as either.
// -----------------------------------------------------------------
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 1 | next_lookup_table_index | not_used |
// +---+---------------------------+-------------------------------+
// or
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 0 | number_of_used_bits | octet |
// +---+---------------------------+-------------------------------+
// Bit 15 unset indicates a leaf value of decoding tree.
// For example value 0x0241 means that we have reached end of huffman code
// with result byte 0x41 'A' and from lookup bits only rightmost 2 bits were used
// and rest of bits are part of next huffman code.
// Bit 15 set indicates that code is not yet decoded and next lookup table index shall be used
// for next n bits of huffman code.
// 0 in 'next lookup table index' is considered as decoding error - invalid huffman code
// Because HPack uses static huffman code defined in RFC https://httpwg.org/specs/rfc7541.html#huffman.code
// it is guaranteed that for this huffman code generated decoding lookup tree MUST consist of exactly 15 lookup tables
var decodingTree = new ushort[15 * 256];
int allocatedLookupTableIndex = 0;
// Create traverse path for all 0..256 octets, 256 is EOS, see: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
for (int octet = 0; octet <= 256; octet++)
{
(uint code, int bitLength) = Encode(octet);
int lookupTableIndex = 0;
int bitsLeft = bitLength;
while (bitsLeft > 0)
{
// read next 8 bits from huffman code
int indexInLookupTable = (int)(code >> (32 - 8));
if (bitsLeft <= 8)
{
// Reached last lookup table for this huffman code.
// Identical lookup value has to be stored for every combination of unused bits,
// For example: 12 bit code could be looked up during decoding as this:
// ---------------------------------
// 7 6 5 4 3 2 1 0
// +---+---+---+---+---+---+---+---+
// |last_code_bits | next_code_bits|
// +-------------------------------+
// next_code_bits are 'random' bits of next huffman code, so in order for lookup
// to work, lookup value has to be stored for all 4 unused bits, in this case for suffix 0..15
int suffixCount = 1 << (8 - bitsLeft);
for (int suffix = 0; suffix < suffixCount; suffix++)
{
if (octet == 256)
{
// EOS (in our case 256) have special meaning in HPack static huffman code
// see: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
// > A Huffman-encoded string literal containing the EOS symbol MUST be treated as a decoding error.
// To force decoding error we store 0 as 'next lookup table index' which MUST be treated as decoding error.
// Invalid huffman code - EOS
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 1 | 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 |
// +---+---------------------------+-------------------------------+
decodingTree[(lookupTableIndex << 8) + (indexInLookupTable | suffix)] = 0x80ff;
}
else
{
// Leaf lookup value
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 0 | number_of_used_bits | code |
// +---+---------------------------+-------------------------------+
decodingTree[(lookupTableIndex << 8) + (indexInLookupTable | suffix)] = (ushort)((bitsLeft << 8) | octet);
}
}
}
else
{
// More than 8 bits left in huffman code means that we need to traverse to another lookup table for next 8 bits
ushort lookupValue = decodingTree[(lookupTableIndex << 8) + indexInLookupTable];
// Because next_lookup_table_index can not be 0, as 0 is index of root table, default value of array element
// means that we have not initialized it yet => lookup table MUST be allocated and its index assigned to that lookup value
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 1 | next_lookup_table_index | not_used |
// +---+---------------------------+-------------------------------+
if (lookupValue == 0)
{
++allocatedLookupTableIndex;
decodingTree[(lookupTableIndex << 8) + indexInLookupTable] = (ushort)((0x80 | allocatedLookupTableIndex) << 8);
lookupTableIndex = allocatedLookupTableIndex;
}
else
{
lookupTableIndex = (lookupValue & 0x7f00) >> 8;
}
}
bitsLeft -= 8;
code <<= 8;
}
}
return decodingTree;
}
/// <summary>
/// Decodes a Huffman encoded string from a byte array.
/// </summary>
@ -307,50 +402,111 @@ namespace System.Net.Http.HPack
/// <returns>The number of decoded symbols.</returns>
public static int Decode(ReadOnlySpan<byte> src, ref byte[] dstArray)
{
// The code below implements the decoding logic for an HPack huffman encoded literal values.
// https://httpwg.org/specs/rfc7541.html#string.literal.representation
//
// To decode a symbol, we traverse the decoding lookup table tree by 8 bits for each lookup
// until we found a leaf - which contains decoded symbol (octet)
//
// see comments in GenerateDecodingLookupTree() describing decoding table
Span<byte> dst = dstArray;
Debug.Assert(dst != null && dst.Length > 0);
ushort[] decodingTree = s_decodingTree;
int lookupTableIndex = 0;
int lookupIndex;
uint acc = 0;
int bitsInAcc = 0;
int i = 0;
int j = 0;
int lastDecodedBits = 0;
while (i < src.Length)
{
// Note that if lastDecodeBits is 3 or more, then we will only get 5 bits (or less)
// from src[i]. Thus we need to read 5 bytes here to ensure that we always have
// at least 30 bits available for decoding.
// TODO https://github.com/dotnet/runtime/issues/1506:
// Rework this as part of Huffman perf improvements
uint next = (uint)(src[i] << 24 + lastDecodedBits);
next |= (i + 1 < src.Length ? (uint)(src[i + 1] << 16 + lastDecodedBits) : 0);
next |= (i + 2 < src.Length ? (uint)(src[i + 2] << 8 + lastDecodedBits) : 0);
next |= (i + 3 < src.Length ? (uint)(src[i + 3] << lastDecodedBits) : 0);
next |= (i + 4 < src.Length ? (uint)(src[i + 4] >> (8 - lastDecodedBits)) : 0);
// Load next 8 bits into accumulator.
acc <<= 8;
acc |= src[i++];
bitsInAcc += 8;
uint ones = (uint)(int.MinValue >> (8 - lastDecodedBits - 1));
if (i == src.Length - 1 && lastDecodedBits > 0 && (next & ones) == ones)
// Decode bits in accumulator.
do
{
// The remaining 7 or less bits are all 1, which is padding.
// We specifically check that lastDecodedBits > 0 because padding
// longer than 7 bits should be treated as a decoding error.
// http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
break;
lookupIndex = (byte)(acc >> (bitsInAcc - 8));
int lookupValue = decodingTree[(lookupTableIndex << 8) + lookupIndex];
if (lookupValue < 0x80_00)
{
// Octet found.
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 0 | number_of_used_bits | octet |
// +---+---------------------------+-------------------------------+
if (j == dst.Length)
{
Array.Resize(ref dstArray, dst.Length * 2);
dst = dstArray;
}
dst[j++] = (byte)lookupValue;
// The longest possible symbol size is 30 bits. If we're at the last 4 bytes
// of the input, we need to make sure we pass the correct number of valid bits
// left, otherwise the trailing 0s in next may form a valid symbol.
int validBits = Math.Min(30, (8 - lastDecodedBits) + (src.Length - i - 1) * 8);
int ch = DecodeValue(next, validBits, out int decodedBits);
if (ch == -1)
// Start lookup of next symbol
lookupTableIndex = 0;
bitsInAcc -= lookupValue >> 8;
}
else
{
// No valid symbol could be decoded with the bits in next
// Traverse to next lookup table.
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 1 | next_lookup_table_index | not_used |
// +---+---------------------------+-------------------------------+
lookupTableIndex = (lookupValue & 0x7f00) >> 8;
if (lookupTableIndex == 0)
{
// No valid symbol could be decoded or EOS was decoded
throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
}
else if (ch == 256)
bitsInAcc -= 8;
}
} while (bitsInAcc >= 8);
}
// Finish decoding last < 8 bits of src.
// Processing of the last byte has to handle several corner cases
// so it's extracted outside of the main loop for performance reasons.
while (bitsInAcc > 0)
{
// A Huffman-encoded string literal containing the EOS symbol MUST be treated as a decoding error.
// http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
Debug.Assert(bitsInAcc < 8);
// Check for correct EOS, which is padding with ones till end of byte
// when we STARTED new huffman code in last 8 bits (lookupTableIndex was reset to 0 -> root lookup table).
if (lookupTableIndex == 0)
{
// Check if all remaining bits are ones.
uint ones = uint.MaxValue >> (32 - bitsInAcc);
if ((acc & ones) == ones)
{
// Is it a EOS. See: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
break;
}
}
// Lookup index has to be 8 bits aligned to MSB
lookupIndex = (byte)(acc << (8 - bitsInAcc));
int lookupValue = decodingTree[(lookupTableIndex << 8) + lookupIndex];
if (lookupValue < 0x80_00)
{
// Octet found.
// +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
// | 0 | number_of_used_bits | octet |
// +---+---------------------------+-------------------------------+
bitsInAcc -= lookupValue >> 8;
if (bitsInAcc < 0)
{
// Last looked up code had more bits than was left in accumulator which indicated invalid or incomplete source
throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
}
@ -359,76 +515,26 @@ namespace System.Net.Http.HPack
Array.Resize(ref dstArray, dst.Length * 2);
dst = dstArray;
}
dst[j++] = (byte)lookupValue;
dst[j++] = (byte)ch;
// Set table index to root - start of new huffman code.
lookupTableIndex = 0;
}
else
{
// Src was depleted in middle of lookup tree or EOS was decoded.
throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
}
}
// If we crossed a byte boundary, advance i so we start at the next byte that's not fully decoded.
lastDecodedBits += decodedBits;
i += lastDecodedBits / 8;
// Modulo 8 since we only care about how many bits were decoded in the last byte that we processed.
lastDecodedBits %= 8;
if (lookupTableIndex != 0)
{
// Finished in middle of traversing - no valid symbol could be decoded
// or too long EOS padding (7 bits plus). See: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
}
return j;
}
/// <summary>
/// Decodes a single symbol from a 32-bit word.
/// </summary>
/// <param name="data">A 32-bit word containing a Huffman encoded symbol.</param>
/// <param name="validBits">
/// The number of bits in <paramref name="data"/> that may contain an encoded symbol.
/// This is not the exact number of bits that encode the symbol. Instead, it prevents
/// decoding the lower bits of <paramref name="data"/> if they don't contain any
/// encoded data.
/// </param>
/// <param name="decodedBits">The number of bits decoded from <paramref name="data"/>.</param>
/// <returns>The decoded symbol.</returns>
internal static int DecodeValue(uint data, int validBits, out int decodedBits)
{
// The code below implements the decoding logic for a canonical Huffman code.
//
// To decode a symbol, we scan the decoding table, which is sorted by ascending symbol bit length.
// For each bit length b, we determine the maximum b-bit encoded value, plus one (that is codeMax).
// This is done with the following logic:
//
// if we're at the first entry in the table,
// codeMax = the # of symbols encoded in b bits
// else,
// left-shift codeMax by the difference between b and the previous entry's bit length,
// then increment codeMax by the # of symbols encoded in b bits
//
// Next, we look at the value v encoded in the highest b bits of data. If v is less than codeMax,
// those bits correspond to a Huffman encoded symbol. We find the corresponding decoded
// symbol in the list of values associated with bit length b in the decoding table by indexing it
// with codeMax - v.
int codeMax = 0;
for (int i = 0; i < _decodingTable.Length && _decodingTable[i].codeLength <= validBits; i++)
{
(int codeLength, int[] codes) = _decodingTable[i];
if (i > 0)
{
codeMax <<= codeLength - _decodingTable[i - 1].codeLength;
}
codeMax += codes.Length;
int mask = int.MinValue >> (codeLength - 1);
long masked = (data & mask) >> (32 - codeLength);
if (masked < codeMax)
{
decodedBits = codeLength;
return codes[codes.Length - (codeMax - masked)];
}
}
decodedBits = 0;
return -1;
}
}
}

View File

@ -237,7 +237,9 @@ namespace System.Net.Http.Unit.Tests.HPack
// Sequences that uncovered errors
{ new byte[] { 0xb6, 0xb9, 0xac, 0x1c, 0x85, 0x58, 0xd5, 0x20, 0xa4, 0xb6, 0xc2, 0xad, 0x61, 0x7b, 0x5a, 0x54, 0x25, 0x1f }, Encoding.ASCII.GetBytes("upgrade-insecure-requests") },
{ new byte[] { 0xfe, 0x53 }, Encoding.ASCII.GetBytes("\"t") }
{ new byte[] { 0xfe, 0x53 }, Encoding.ASCII.GetBytes("\"t") },
{ new byte[] { 0xff, 0xff, 0xf6, 0xff, 0xff, 0xfd, 0x68 }, new byte[] { 0xcf, 0xf0, 0x73 } },
{ new byte[] { 0xff, 0xff, 0xf9, 0xff, 0xff, 0xfd, 0x86 }, new byte[] { 0xd5, 0xc7, 0x69 } },
};
[Theory]
@ -354,29 +356,6 @@ namespace System.Net.Http.Unit.Tests.HPack
Assert.Equal(expectedBitLength, bitLength);
}
[Theory]
[MemberData(nameof(HuffmanData))]
public void HuffmanDecode(int code, uint encoded, int bitLength)
{
Assert.Equal(code, Huffman.DecodeValue(encoded, bitLength, out int decodedBits));
Assert.Equal(bitLength, decodedBits);
}
[Theory]
[MemberData(nameof(HuffmanData))]
public void HuffmanEncodeDecode(
int code,
// Suppresses the warning about an unused theory parameter because
// this test shares data with other methods
#pragma warning disable xUnit1026
uint encoded,
#pragma warning restore xUnit1026
int bitLength)
{
Assert.Equal(code, Huffman.DecodeValue(Huffman.Encode(code).encoded, bitLength, out int decodedBits));
Assert.Equal(bitLength, decodedBits);
}
public static TheoryData<int, uint, int> HuffmanData
{
get

View File

@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<StringResourcesPath>../../../src/Resources/Strings.resx</StringResourcesPath>
<Nullable>enable</Nullable>
<LangVersion>9.0</LangVersion>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(CommonPath)System\Net\Http\aspnetcore\Http2\Hpack\Huffman.cs"
Link="Common\System\Net\Http\aspnetcore\Http2\Hpack\Huffman.cs" />
<Compile Include="$(CommonPath)System\Net\Http\aspnetcore\Http2\Hpack\HuffmanDecodingException.cs"
Link="Common\System\Net\Http\aspnetcore\Http2\Hpack\HuffmanDecodingException.cs" />
<Compile Include="Program.cs" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30709.64
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HPackHuffmanBenchmark", "HPackHuffmanBenchmark.csproj", "{FBC03CC3-4D63-4C17-8C09-ACE6E698B3C8}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{FBC03CC3-4D63-4C17-8C09-ACE6E698B3C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FBC03CC3-4D63-4C17-8C09-ACE6E698B3C8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FBC03CC3-4D63-4C17-8C09-ACE6E698B3C8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FBC03CC3-4D63-4C17-8C09-ACE6E698B3C8}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {1CD6E426-3019-4B48-B2DB-1A6679912054}
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,47 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Collections.Generic;
using System.Net.Http.HPack;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
namespace System.Net.Http.Tests.Performance
{
public class HPackHuffmanBenchmark
{
private List<byte[]> _data;
private byte[] _buffer;
public HPackHuffmanBenchmark()
{
_data = new List<byte[]> {
// www.example.com
new byte[] { 0xf1, 0xe3, 0xc2, 0xe5, 0xf2, 0x3a, 0x6b, 0xa0, 0xab, 0x90, 0xf4, 0xff },
// no-cache
new byte[] { 0xa8, 0xeb, 0x10, 0x64, 0x9c, 0xbf },
// upgrade-insecure-requests
new byte[] { 0xb6, 0xb9, 0xac, 0x1c, 0x85, 0x58, 0xd5, 0x20, 0xa4, 0xb6, 0xc2, 0xad, 0x61, 0x7b, 0x5a, 0x54, 0x25, 0x1f },
// mEO7bfwFStBMwJWfW4pmg2XL25AswjrVlfcfYbxkcS2ssduZmiKoipMH9XwoTGkb+Qnq9bcjwWbwDQzsea/vMQ==
Convert.FromBase64String("pwanY5fGHcm7o8ZeUvJqumYX5nE3Cjx0s40Skl5x+epNwkIkt/aTZjmr0Y3/zwffi6x/72Vdn4ydPHKPxf2e0FGx30bIIA=="),
};
_buffer = new byte[10000];
}
[Params(0, 1, 2, 3)]
public int DataIndex { get; set; }
[Benchmark]
public void Decode() => Huffman.Decode(_data[DataIndex], ref _buffer);
}
public class Program
{
public static void Main(string[] args)
{
var summary = BenchmarkRunner.Run(typeof(Program).Assembly);
}
}
}