Skip to content
This repository was archived by the owner on Jan 13, 2021. It is now read-only.

Commit 8cab7eb

Browse files
committed
Merge pull request #91 from Lukasa/headerdict
Do headers properly.
2 parents 4bb3a89 + 3c7981d commit 8cab7eb

File tree

5 files changed

+482
-0
lines changed

5 files changed

+482
-0
lines changed

docs/source/api.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ Primary HTTP/2 Interface
1919
.. autoclass:: hyper.HTTP20Push
2020
:inherited-members:
2121

22+
Headers
23+
-------
24+
25+
.. autoclass:: hyper.common.headers.HTTPHeaderMap
26+
:inherited-members:
27+
2228
Requests Transport Adapter
2329
--------------------------
2430

hyper/common/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
hyper/common
4+
~~~~~~~~~~~~
5+
6+
Common code in hyper.
7+
"""

hyper/common/headers.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
hyper/common/headers
4+
~~~~~~~~~~~~~~~~~~~~~
5+
6+
Contains hyper's structures for storing and working with HTTP headers.
7+
"""
8+
import collections
9+
10+
from hyper.compat import unicode, bytes, imap
11+
12+
13+
class HTTPHeaderMap(collections.MutableMapping):
14+
"""
15+
A structure that contains HTTP headers.
16+
17+
HTTP headers are a curious beast. At the surface level they look roughly
18+
like a name-value set, but in practice they have many variations that
19+
make them tricky:
20+
21+
- duplicate keys are allowed
22+
- keys are compared case-insensitively
23+
- duplicate keys are isomorphic to comma-separated values, *except when
24+
they aren't*!
25+
- they logically contain a form of ordering
26+
27+
This data structure is an attempt to preserve all of that information
28+
while being as user-friendly as possible. It retains all of the mapping
29+
convenience methods (allowing by-name indexing), while avoiding using a
30+
dictionary for storage.
31+
32+
When iterated over, this structure returns headers in 'canonical form'.
33+
This form is a tuple, where the first entry is the header name (in
34+
lower-case), and the second entry is a list of header values (in original
35+
case).
36+
37+
The mapping always emits both names and values in the form of bytestrings:
38+
never unicode strings. It can accept names and values in unicode form, and
39+
will automatically be encoded to bytestrings using UTF-8. The reason for
40+
what appears to be a user-unfriendly decision here is primarily to allow
41+
the broadest-possible compatibility (to make it possible to send headers in
42+
unusual encodings) while ensuring that users are never confused about what
43+
type of data they will receive.
44+
45+
..warning:: Note that this data structure makes none of the performance
46+
guarantees of a dictionary. Lookup and deletion is not an O(1)
47+
operation. Inserting a new value *is* O(1), all other
48+
operations are O(n), including *replacing* a header entirely.
49+
"""
50+
def __init__(self, *args, **kwargs):
51+
# The meat of the structure. In practice, headers are an ordered list
52+
# of tuples. This early version of the data structure simply uses this
53+
# directly under the covers.
54+
#
55+
# An important curiosity here is that the headers are not stored in
56+
# 'canonical form', but are instead stored in the form they were
57+
# provided in. This is to ensure that it is always possible to
58+
# reproduce the original header structure if necessary. This leads to
59+
# some unfortunate performance costs on structure access where it is
60+
# often necessary to transform the data into canonical form on access.
61+
# This cost is judged acceptable in low-level code like `hyper`, but
62+
# higher-level abstractions should consider if they really require this
63+
# logic.
64+
self._items = []
65+
66+
for arg in args:
67+
self._items.extend(map(lambda x: _to_bytestring_tuple(*x), arg))
68+
69+
for k, v in kwargs.items():
70+
self._items.append(_to_bytestring_tuple(k, v))
71+
72+
def __getitem__(self, key):
73+
"""
74+
Unlike the dict __getitem__, this returns a list of items in the order
75+
they were added. These items are returned in 'canonical form', meaning
76+
that comma-separated values are split into multiple values.
77+
"""
78+
key = _to_bytestring(key)
79+
values = []
80+
81+
for k, v in self._items:
82+
if _keys_equal(k, key):
83+
values.extend(x[1] for x in canonical_form(k, v))
84+
85+
if not values:
86+
raise KeyError("Nonexistent header key: {}".format(key))
87+
88+
return values
89+
90+
def __setitem__(self, key, value):
91+
"""
92+
Unlike the dict __setitem__, this appends to the list of items.
93+
"""
94+
self._items.append(_to_bytestring_tuple(key, value))
95+
96+
def __delitem__(self, key):
97+
"""
98+
Sadly, __delitem__ is kind of stupid here, but the best we can do is
99+
delete all headers with a given key. To correctly achieve the 'KeyError
100+
on missing key' logic from dictionaries, we need to do this slowly.
101+
"""
102+
key = _to_bytestring(key)
103+
indices = []
104+
for (i, (k, v)) in enumerate(self._items):
105+
if _keys_equal(k, key):
106+
indices.append(i)
107+
108+
if not indices:
109+
raise KeyError("Nonexistent header key: {}".format(key))
110+
111+
for i in indices[::-1]:
112+
self._items.pop(i)
113+
114+
def __iter__(self):
115+
"""
116+
This mapping iterates like the list of tuples it is. The headers are
117+
returned in canonical form.
118+
"""
119+
for pair in self._items:
120+
for value in canonical_form(*pair):
121+
yield value
122+
123+
def __len__(self):
124+
"""
125+
The length of this mapping is the number of individual headers in
126+
canonical form. Sadly, this is a somewhat expensive operation.
127+
"""
128+
size = 0
129+
for _ in self:
130+
size += 1
131+
132+
return size
133+
134+
def __contains__(self, key):
135+
"""
136+
If any header is present with this key, returns True.
137+
"""
138+
key = _to_bytestring(key)
139+
return any(_keys_equal(key, k) for k, _ in self._items)
140+
141+
def keys(self):
142+
"""
143+
Returns an iterable of the header keys in the mapping. This explicitly
144+
does not filter duplicates, ensuring that it's the same length as
145+
len().
146+
"""
147+
for n, _ in self:
148+
yield n
149+
150+
def items(self):
151+
"""
152+
This mapping iterates like the list of tuples it is.
153+
"""
154+
return self.__iter__()
155+
156+
def values(self):
157+
"""
158+
This is an almost nonsensical query on a header dictionary, but we
159+
satisfy it in the exact same way we satisfy 'keys'.
160+
"""
161+
for _, v in self:
162+
yield v
163+
164+
def get(self, name, default=None):
165+
"""
166+
Unlike the dict get, this returns a list of items in the order
167+
they were added.
168+
"""
169+
try:
170+
return self[name]
171+
except KeyError:
172+
return default
173+
174+
def iter_raw(self):
175+
"""
176+
Allows iterating over the headers in 'raw' form: that is, the form in
177+
which they were added to the structure. This iteration is in order,
178+
and can be used to rebuild the original headers (e.g. to determine
179+
exactly what a server sent).
180+
"""
181+
for item in self._items:
182+
yield item
183+
184+
def __eq__(self, other):
185+
return self._items == other._items
186+
187+
def __ne__(self, other):
188+
return self._items != other._items
189+
190+
191+
def canonical_form(k, v):
192+
"""
193+
Returns an iterable of key-value-pairs corresponding to the header in
194+
canonical form. This means that the header is split on commas unless for
195+
any reason it's a super-special snowflake (I'm looking at you Set-Cookie).
196+
"""
197+
SPECIAL_SNOWFLAKES = set([b'set-cookie', b'set-cookie2'])
198+
199+
k = k.lower()
200+
201+
if k in SPECIAL_SNOWFLAKES:
202+
yield k, v
203+
else:
204+
for sub_val in v.split(b','):
205+
yield k, sub_val.strip()
206+
207+
208+
def _to_bytestring(element):
209+
"""
210+
Converts a single string to a bytestring, encoding via UTF-8 if needed.
211+
"""
212+
if isinstance(element, unicode):
213+
return element.encode('utf-8')
214+
elif isinstance(element, bytes):
215+
return element
216+
else:
217+
raise ValueError("Non string type.")
218+
219+
220+
def _to_bytestring_tuple(*x):
221+
"""
222+
Converts the given strings to a bytestring if necessary, returning a
223+
tuple.
224+
"""
225+
return tuple(imap(_to_bytestring, x))
226+
227+
228+
def _keys_equal(x, y):
229+
"""
230+
Returns 'True' if the two keys are equal by the laws of HTTP headers.
231+
"""
232+
return x.lower() == y.lower()

hyper/compat.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def ignore_missing():
3636

3737
from urllib import urlencode
3838
from urlparse import urlparse, urlsplit
39+
from itertools import imap
3940

4041
def to_byte(char):
4142
return ord(char)
@@ -52,9 +53,14 @@ def zlib_compressobj(level=6, method=zlib.DEFLATED, wbits=15, memlevel=8,
5253
strategy=zlib.Z_DEFAULT_STRATEGY):
5354
return zlib.compressobj(level, method, wbits, memlevel, strategy)
5455

56+
unicode = unicode
57+
bytes = str
58+
5559
elif is_py3:
5660
from urllib.parse import urlencode, urlparse, urlsplit
5761

62+
imap = map
63+
5864
def to_byte(char):
5965
return char
6066

@@ -71,3 +77,6 @@ def write_to_stdout(data):
7177
ssl = ssl_compat
7278
else:
7379
import ssl
80+
81+
unicode = str
82+
bytes = bytes

0 commit comments

Comments
 (0)