Add files via upload

This commit is contained in:
Roy 2024-01-03 20:19:09 -08:00 committed by GitHub
parent 2630969ff9
commit 4f9a84d624
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 5633 additions and 0 deletions

View File

@ -1 +1,149 @@
"""
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
"""
from __future__ import annotations
# Set default logging handler to avoid "No handler found" warnings.
import logging
import typing
import warnings
from logging import NullHandler
from . import exceptions
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from ._version import __version__
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import BaseHTTPResponse, HTTPResponse
from .util.request import make_headers
from .util.retry import Retry
from .util.timeout import Timeout
# Ensure that Python is compiled with OpenSSL 1.1.1+
# If the 'ssl' module isn't available at all that's
# fine, we only care if the module is available.
try:
import ssl
except ImportError:
pass
else:
if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive:
warnings.warn(
"urllib3 v2 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/3020",
exceptions.NotOpenSSLWarning,
)
elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive:
raise ImportError(
"urllib3 v2 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/2168"
)
__author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
__license__ = "MIT"
__version__ = __version__
__all__ = (
"HTTPConnectionPool",
"HTTPHeaderDict",
"HTTPSConnectionPool",
"PoolManager",
"ProxyManager",
"HTTPResponse",
"Retry",
"Timeout",
"add_stderr_logger",
"connection_from_url",
"disable_warnings",
"encode_multipart_formdata",
"make_headers",
"proxy_from_url",
"request",
"BaseHTTPResponse",
)
logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(
level: int = logging.DEBUG,
) -> logging.StreamHandler[typing.TextIO]:
"""
Helper for quickly adding a StreamHandler to the logger. Useful for
debugging.
Returns the handler after adding it.
"""
# This method needs to be in this __init__.py to get the __name__ correct
# even if urllib3 is vendored within another package.
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
logger.addHandler(handler)
logger.setLevel(level)
logger.debug("Added a stderr logging handler to logger: %s", __name__)
return handler
# ... Clean up.
del NullHandler
# All warning filters *must* be appended unless you're really certain that they
# shouldn't be: otherwise, it's very hard for users to use most Python
# mechanisms to silence them.
# SecurityWarning's always go off by default.
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None:
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter("ignore", category)
_DEFAULT_POOL = PoolManager()
def request(
method: str,
url: str,
*,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
preload_content: bool | None = True,
decode_content: bool | None = True,
redirect: bool | None = True,
retries: Retry | bool | int | None = None,
timeout: Timeout | float | int | None = 3,
json: typing.Any | None = None,
) -> BaseHTTPResponse:
"""
A convenience, top-level request method. It uses a module-global ``PoolManager`` instance.
Therefore, its side effects could be shared across dependencies relying on it.
To avoid side effects create a new ``PoolManager`` instance and use it instead.
The method does not accept low-level ``**urlopen_kw`` keyword arguments.
"""
return _DEFAULT_POOL.request(
method,
url,
body=body,
fields=fields,
headers=headers,
preload_content=preload_content,
decode_content=decode_content,
redirect=redirect,
retries=retries,
timeout=timeout,
json=json,
)

View File

@ -0,0 +1,172 @@
from __future__ import annotations
import typing
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from .util.url import Url
_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str]
class ProxyConfig(typing.NamedTuple):
ssl_context: ssl.SSLContext | None
use_forwarding_for_https: bool
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
class _ResponseOptions(typing.NamedTuple):
# TODO: Remove this in favor of a better
# HTTP request/response lifecycle tracking.
request_method: str
request_url: str
preload_content: bool
decode_content: bool
enforce_content_length: bool
if typing.TYPE_CHECKING:
import ssl
from typing import Literal, Protocol
from .response import BaseHTTPResponse
class BaseHTTPConnection(Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
host: str
port: int
timeout: None | (
float
) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved.
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool
proxy_is_verified: bool | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
...
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
...
def connect(self) -> None:
...
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
...
def getresponse(self) -> BaseHTTPResponse:
...
def close(self) -> None:
...
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
class BaseHTTPSConnection(BaseHTTPConnection, Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
# Certificate verification methods
cert_reqs: int | str | None
assert_hostname: None | str | Literal[False]
assert_fingerprint: str | None
ssl_context: ssl.SSLContext | None
# Trusted CAs
ca_certs: str | None
ca_cert_dir: str | None
ca_cert_data: None | str | bytes
# TLS version
ssl_minimum_version: int | None
ssl_maximum_version: int | None
ssl_version: int | str | None # Deprecated
# Client certificates
cert_file: str | None
key_file: str | None
key_password: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
...

View File

@ -0,0 +1,483 @@
from __future__ import annotations
import typing
from collections import OrderedDict
from enum import Enum, auto
from threading import RLock
if typing.TYPE_CHECKING:
# We can only import Protocol if TYPE_CHECKING because it's a development
# dependency, and is not available at runtime.
from typing import Protocol
from typing_extensions import Self
class HasGettableStringKeys(Protocol):
def keys(self) -> typing.Iterator[str]:
...
def __getitem__(self, key: str) -> str:
...
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
# Key type
_KT = typing.TypeVar("_KT")
# Value type
_VT = typing.TypeVar("_VT")
# Default type
_DT = typing.TypeVar("_DT")
ValidHTTPHeaderSource = typing.Union[
"HTTPHeaderDict",
typing.Mapping[str, str],
typing.Iterable[typing.Tuple[str, str]],
"HasGettableStringKeys",
]
class _Sentinel(Enum):
not_passed = auto()
def ensure_can_construct_http_header_dict(
potential: object,
) -> ValidHTTPHeaderSource | None:
if isinstance(potential, HTTPHeaderDict):
return potential
elif isinstance(potential, typing.Mapping):
# Full runtime checking of the contents of a Mapping is expensive, so for the
# purposes of typechecking, we assume that any Mapping is the right shape.
return typing.cast(typing.Mapping[str, str], potential)
elif isinstance(potential, typing.Iterable):
# Similarly to Mapping, full runtime checking of the contents of an Iterable is
# expensive, so for the purposes of typechecking, we assume that any Iterable
# is the right shape.
return typing.cast(typing.Iterable[typing.Tuple[str, str]], potential)
elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
return typing.cast("HasGettableStringKeys", potential)
else:
return None
class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
"""
Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond
``maxsize``.
:param maxsize:
Maximum number of recent elements to retain.
:param dispose_func:
Every time an item is evicted from the container,
``dispose_func(value)`` is called. Callback which will get called
"""
_container: typing.OrderedDict[_KT, _VT]
_maxsize: int
dispose_func: typing.Callable[[_VT], None] | None
lock: RLock
def __init__(
self,
maxsize: int = 10,
dispose_func: typing.Callable[[_VT], None] | None = None,
) -> None:
super().__init__()
self._maxsize = maxsize
self.dispose_func = dispose_func
self._container = OrderedDict()
self.lock = RLock()
def __getitem__(self, key: _KT) -> _VT:
# Re-insert the item, moving it to the end of the eviction line.
with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key: _KT, value: _VT) -> None:
evicted_item = None
with self.lock:
# Possibly evict the existing value of 'key'
try:
# If the key exists, we'll overwrite it, which won't change the
# size of the pool. Because accessing a key should move it to
# the end of the eviction line, we pop it out first.
evicted_item = key, self._container.pop(key)
self._container[key] = value
except KeyError:
# When the key does not exist, we insert the value first so that
# evicting works in all cases, including when self._maxsize is 0
self._container[key] = value
if len(self._container) > self._maxsize:
# If we didn't evict an existing value, and we've hit our maximum
# size, then we have to evict the least recently used item from
# the beginning of the container.
evicted_item = self._container.popitem(last=False)
# After releasing the lock on the pool, dispose of any evicted value.
if evicted_item is not None and self.dispose_func:
_, evicted_value = evicted_item
self.dispose_func(evicted_value)
def __delitem__(self, key: _KT) -> None:
with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self) -> int:
with self.lock:
return len(self._container)
def __iter__(self) -> typing.NoReturn:
raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe."
)
def clear(self) -> None:
with self.lock:
# Copy pointers to all values, then wipe the mapping
values = list(self._container.values())
self._container.clear()
if self.dispose_func:
for value in values:
self.dispose_func(value)
def keys(self) -> set[_KT]: # type: ignore[override]
with self.lock:
return set(self._container.keys())
class HTTPHeaderDictItemView(typing.Set[typing.Tuple[str, str]]):
"""
HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
address.
If we directly try to get an item with a particular name, we will get a string
back that is the concatenated version of all the values:
>>> d['X-Header-Name']
'Value1, Value2, Value3'
However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
these values based on whether combine=True was called when building up the dictionary
>>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
>>> d.add("A", "2", combine=True)
>>> d.add("B", "bar")
>>> list(d.items())
[
('A', '1, 2'),
('B', 'foo'),
('B', 'bar'),
]
This class conforms to the interface required by the MutableMapping ABC while
also giving us the nonstandard iteration behavior we want; items with duplicate
keys, ordered by time of first insertion.
"""
_headers: HTTPHeaderDict
def __init__(self, headers: HTTPHeaderDict) -> None:
self._headers = headers
def __len__(self) -> int:
return len(list(self._headers.iteritems()))
def __iter__(self) -> typing.Iterator[tuple[str, str]]:
return self._headers.iteritems()
def __contains__(self, item: object) -> bool:
if isinstance(item, tuple) and len(item) == 2:
passed_key, passed_val = item
if isinstance(passed_key, str) and isinstance(passed_val, str):
return self._headers._has_value_for_header(passed_key, passed_val)
return False
class HTTPHeaderDict(typing.MutableMapping[str, str]):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
when compared case-insensitively.
:param kwargs:
Additional field-value pairs to pass in to ``dict.update``.
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
case-insensitively in order to maintain ``dict``'s api. For fields that
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
in a loop.
If multiple fields that are equal case-insensitively are passed to the
constructor or ``.update``, the behavior is undefined and some will be
lost.
>>> headers = HTTPHeaderDict()
>>> headers.add('Set-Cookie', 'foo=bar')
>>> headers.add('set-cookie', 'baz=quxx')
>>> headers['content-length'] = '7'
>>> headers['SET-cookie']
'foo=bar, baz=quxx'
>>> headers['Content-Length']
'7'
"""
_container: typing.MutableMapping[str, list[str]]
def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
super().__init__()
self._container = {} # 'dict' is insert-ordered
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
else:
self.extend(headers)
if kwargs:
self.extend(kwargs)
def __setitem__(self, key: str, val: str) -> None:
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
self._container[key.lower()] = [key, val]
def __getitem__(self, key: str) -> str:
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key: str) -> None:
del self._container[key.lower()]
def __contains__(self, key: object) -> bool:
if isinstance(key, str):
return key.lower() in self._container
return False
def setdefault(self, key: str, default: str = "") -> str:
return super().setdefault(key, default)
def __eq__(self, other: object) -> bool:
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return False
else:
other_as_http_header_dict = type(self)(maybe_constructable)
return {k.lower(): v for k, v in self.itermerged()} == {
k.lower(): v for k, v in other_as_http_header_dict.itermerged()
}
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def __len__(self) -> int:
return len(self._container)
def __iter__(self) -> typing.Iterator[str]:
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def discard(self, key: str) -> None:
try:
del self[key]
except KeyError:
pass
def add(self, key: str, val: str, *, combine: bool = False) -> None:
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
If this is called with combine=True, instead of adding a new header value
as a distinct item during iteration, this will instead append the value to
any existing header value with a comma. If no existing header value exists
for the key, then the value will simply be added, ignoring the combine parameter.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
>>> list(headers.items())
[('foo', 'bar'), ('foo', 'baz')]
>>> headers.add('foo', 'quz', combine=True)
>>> list(headers.items())
[('foo', 'bar, baz, quz')]
"""
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
key_lower = key.lower()
new_vals = [key, val]
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# if there are values here, then there is at least the initial
# key/value pair
assert len(vals) >= 2
if combine:
vals[-1] = vals[-1] + ", " + val
else:
vals.append(val)
def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError(
f"extend() takes at most 1 positional arguments ({len(args)} given)"
)
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, typing.Mapping):
for key, val in other.items():
self.add(key, val)
elif isinstance(other, typing.Iterable):
other = typing.cast(typing.Iterable[typing.Tuple[str, str]], other)
for key, value in other:
self.add(key, value)
elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
# THIS IS NOT A TYPESAFE BRANCH
# In this branch, the object has a `keys` attr but is not a Mapping or any of
# the other types indicated in the method signature. We do some stuff with
# it as though it partially implements the Mapping interface, but we're not
# doing that stuff safely AT ALL.
for key in other.keys():
self.add(key, other[key])
for key, value in kwargs.items():
self.add(key, value)
@typing.overload
def getlist(self, key: str) -> list[str]:
...
@typing.overload
def getlist(self, key: str, default: _DT) -> list[str] | _DT:
...
def getlist(
self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
if default is _Sentinel.not_passed:
# _DT is unbound; empty list is instance of List[str]
return []
# _DT is bound; default is instance of _DT
return default
else:
# _DT may or may not be bound; vals[1:] is instance of List[str], which
# meets our external interface requirement of `Union[List[str], _DT]`.
return vals[1:]
def _prepare_for_method_change(self) -> Self:
"""
Remove content-specific header fields before changing the request
method to GET or HEAD according to RFC 9110, Section 15.4.
"""
content_specific_headers = [
"Content-Encoding",
"Content-Language",
"Content-Location",
"Content-Type",
"Content-Length",
"Digest",
"Last-Modified",
]
for header in content_specific_headers:
self.discard(header)
return self
# Backwards compatibility for httplib
getheaders = getlist
getallmatchingheaders = getlist
iget = getlist
# Backwards compatibility for http.cookiejar
get_all = getlist
def __repr__(self) -> str:
return f"{type(self).__name__}({dict(self.itermerged())})"
def _copy_from(self, other: HTTPHeaderDict) -> None:
for key in other:
val = other.getlist(key)
self._container[key.lower()] = [key, *val]
def copy(self) -> HTTPHeaderDict:
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ", ".join(val[1:])
def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
return HTTPHeaderDictItemView(self)
def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
if header_name in self:
return potential_value in self._container[header_name.lower()][1:]
return False
def __ior__(self, other: object) -> HTTPHeaderDict:
# Supports extending a header dict in-place using operator |=
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
self.extend(maybe_constructable)
return self
def __or__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator |
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = self.copy()
result.extend(maybe_constructable)
return result
def __ror__(self, other: object) -> HTTPHeaderDict:
# Supports merging header dicts using operator | when other is on left side
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = type(self)(maybe_constructable)
result.extend(self)
return result

View File

@ -0,0 +1,217 @@
from __future__ import annotations
import json as _json
import typing
from urllib.parse import urlencode
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .response import BaseHTTPResponse
__all__ = ["RequestMethods"]
_TYPE_ENCODE_URL_FIELDS = typing.Union[
typing.Sequence[typing.Tuple[str, typing.Union[str, bytes]]],
typing.Mapping[str, typing.Union[str, bytes]],
]
class RequestMethods:
"""
Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`urllib3.HTTPConnectionPool` and
:class:`urllib3.PoolManager`.
Provides behavior for making common types of HTTP request methods and
decides which type of request field encoding to use.
Specifically,
:meth:`.request_encode_url` is for sending requests whose fields are
encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
(such as for POST, PUT, PATCH).
:meth:`.request` is for making any kind of request, it will look up the
appropriate encoding format and use one of the above two methods to make
the request.
Initializer parameters:
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
"""
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None:
self.headers = headers or {}
def urlopen(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**kw: typing.Any,
) -> BaseHTTPResponse: # Abstract
raise NotImplementedError(
"Classes extending RequestMethods must implement "
"their own ``urlopen`` method."
)
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
json: typing.Any | None = None,
**urlopen_kw: typing.Any,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
effort. It can be used in most situations, while still having the
option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
method = method.upper()
if json is not None and body is not None:
raise TypeError(
"request got values for both 'body' and 'json' parameters which are mutually exclusive"
)
if json is not None:
if headers is None:
headers = self.headers.copy() # type: ignore
if not ("content-type" in map(str.lower, headers.keys())):
headers["Content-Type"] = "application/json" # type: ignore
body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode(
"utf-8"
)
if body is not None:
urlopen_kw["body"] = body
if method in self._encode_url_methods:
return self.request_encode_url(
method,
url,
fields=fields, # type: ignore[arg-type]
headers=headers,
**urlopen_kw,
)
else:
return self.request_encode_body(
method, url, fields=fields, headers=headers, **urlopen_kw
)
def request_encode_url(
self,
method: str,
url: str,
fields: _TYPE_ENCODE_URL_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
"""
if headers is None:
headers = self.headers
extra_kw: dict[str, typing.Any] = {"headers": headers}
extra_kw.update(urlopen_kw)
if fields:
url += "?" + urlencode(fields)
return self.urlopen(method, url, **extra_kw)
def request_encode_body(
self,
method: str,
url: str,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
:func:`urllib3.encode_multipart_formdata` is used to encode
the payload with the appropriate content type. Otherwise
:func:`urllib.parse.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it in other times too. However, it may break request
signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
the MIME type is optional. For example::
fields = {
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(),
'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
}
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimic behavior of browsers.
Note that if ``headers`` are supplied, the 'Content-Type' header will
be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if headers is None:
headers = self.headers
extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)}
body: bytes | str
if fields:
if "body" in urlopen_kw:
raise TypeError(
"request got values for both 'fields' and 'body', can only specify one."
)
if encode_multipart:
body, content_type = encode_multipart_formdata(
fields, boundary=multipart_boundary
)
else:
body, content_type = (
urlencode(fields), # type: ignore[arg-type]
"application/x-www-form-urlencoded",
)
extra_kw["body"] = body
extra_kw["headers"].setdefault("Content-Type", content_type)
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)

View File

@ -0,0 +1,4 @@
# This file is protected via CODEOWNERS
from __future__ import annotations
__version__ = "2.1.0"

View File

@ -0,0 +1,905 @@
from __future__ import annotations
import datetime
import logging
import os
import re
import socket
import sys
import typing
import warnings
from http.client import HTTPConnection as _HTTPConnection
from http.client import HTTPException as HTTPException # noqa: F401
from http.client import ResponseNotReady
from socket import timeout as SocketTimeout
if typing.TYPE_CHECKING:
from typing import Literal
from .response import HTTPResponse
from .util.ssl_ import _TYPE_PEER_CERT_RET_DICT
from .util.ssltransport import SSLTransport
from ._collections import HTTPHeaderDict
from .util.response import assert_header_parsing
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT, Timeout
from .util.util import to_str
from .util.wait import wait_for_read
try: # Compiled with SSL?
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError):
ssl = None # type: ignore[assignment]
class BaseSSLError(BaseException): # type: ignore[no-redef]
pass
from ._base_connection import _TYPE_BODY
from ._base_connection import ProxyConfig as ProxyConfig
from ._base_connection import _ResponseOptions as _ResponseOptions
from ._version import __version__
from .exceptions import (
ConnectTimeoutError,
HeaderParsingError,
NameResolutionError,
NewConnectionError,
ProxyError,
SystemTimeWarning,
)
from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection, ssl_
from .util.request import body_to_chunks
from .util.ssl_ import assert_fingerprint as _assert_fingerprint
from .util.ssl_ import (
create_urllib3_context,
is_ipaddress,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
)
from .util.ssl_match_hostname import CertificateError, match_hostname
from .util.url import Url
# Not a no-op, we're adding this to the namespace so it can be imported.
ConnectionError = ConnectionError
BrokenPipeError = BrokenPipeError
log = logging.getLogger(__name__)
port_by_scheme = {"http": 80, "https": 443}
# When it comes time to update this value as a part of regular maintenance
# (ie test_recent_date is failing) update it to ~6 months before the current date.
RECENT_DATE = datetime.date(2022, 1, 1)
_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]")
_HAS_SYS_AUDIT = hasattr(sys, "audit")
class HTTPConnection(_HTTPConnection):
"""
Based on :class:`http.client.HTTPConnection` but provides an extra constructor
backwards-compatibility layer between older and newer Pythons.
Additional keyword parameters are used to configure attributes of the connection.
Accepted parameters include:
- ``source_address``: Set the source address for the current connection.
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
you might pass:
.. code-block:: python
HTTPConnection.default_socket_options + [
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
]
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
"""
default_port: typing.ClassVar[int] = port_by_scheme["http"] # type: ignore[misc]
#: Disable Nagle's algorithm by default.
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
default_socket_options: typing.ClassVar[connection._TYPE_SOCKET_OPTIONS] = [
(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
]
#: Whether this connection verifies the host's certificate.
is_verified: bool = False
#: Whether this proxy connection verified the proxy host's certificate.
# If no proxy is currently connected to the value will be ``None``.
proxy_is_verified: bool | None = None
blocksize: int
source_address: tuple[str, int] | None
socket_options: connection._TYPE_SOCKET_OPTIONS | None
_has_connected_to_proxy: bool
_response_options: _ResponseOptions | None
_tunnel_host: str | None
_tunnel_port: int | None
_tunnel_scheme: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: None
| (connection._TYPE_SOCKET_OPTIONS) = default_socket_options,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
super().__init__(
host=host,
port=port,
timeout=Timeout.resolve_default_timeout(timeout),
source_address=source_address,
blocksize=blocksize,
)
self.socket_options = socket_options
self.proxy = proxy
self.proxy_config = proxy_config
self._has_connected_to_proxy = False
self._response_options = None
self._tunnel_host: str | None = None
self._tunnel_port: int | None = None
self._tunnel_scheme: str | None = None
# https://github.com/python/mypy/issues/4125
# Mypy treats this as LSP violation, which is considered a bug.
# If `host` is made a property it violates LSP, because a writeable attribute is overridden with a read-only one.
# However, there is also a `host` setter so LSP is not violated.
# Potentially, a `@host.deleter` might be needed depending on how this issue will be fixed.
@property
def host(self) -> str:
"""
Getter method to remove any trailing dots that indicate the hostname is an FQDN.
In general, SSL certificates don't include the trailing dot indicating a
fully-qualified domain name, and thus, they don't validate properly when
checked against a domain name that includes the dot. In addition, some
servers may not expect to receive the trailing dot when provided.
However, the hostname with trailing dot is critical to DNS resolution; doing a
lookup with the trailing dot will properly only resolve the appropriate FQDN,
whereas a lookup without a trailing dot will search the system's search domain
list. Thus, it's important to keep the original host around for use only in
those cases where it's appropriate (i.e., when doing DNS lookup to establish the
actual TCP connection across which we're going to send HTTP requests).
"""
return self._dns_host.rstrip(".")
@host.setter
def host(self, value: str) -> None:
"""
Setter for the `host` property.
We assume that only urllib3 uses the _dns_host attribute; httplib itself
only uses `host`, and it seems reasonable that other libraries follow suit.
"""
self._dns_host = value
def _new_conn(self) -> socket.socket:
"""Establish a socket connection and set nodelay settings on it.
:return: New socket connection.
"""
try:
sock = connection.create_connection(
(self._dns_host, self.port),
self.timeout,
source_address=self.source_address,
socket_options=self.socket_options,
)
except socket.gaierror as e:
raise NameResolutionError(self.host, self, e) from e
except SocketTimeout as e:
raise ConnectTimeoutError(
self,
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
except OSError as e:
raise NewConnectionError(
self, f"Failed to establish a new connection: {e}"
) from e
# Audit hooks are only available in Python 3.8+
if _HAS_SYS_AUDIT:
sys.audit("http.client.connect", self, self.host, self.port)
return sock
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
if scheme not in ("http", "https"):
raise ValueError(
f"Invalid proxy scheme for tunneling: {scheme!r}, must be either 'http' or 'https'"
)
super().set_tunnel(host, port=port, headers=headers)
self._tunnel_scheme = scheme
def connect(self) -> None:
self.sock = self._new_conn()
if self._tunnel_host:
# If we're tunneling it means we're connected to our proxy.
self._has_connected_to_proxy = True
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel() # type: ignore[attr-defined]
# If there's a proxy to be connected to we are fully connected.
# This is set twice (once above and here) due to forwarding proxies
# not using tunnelling.
self._has_connected_to_proxy = bool(self.proxy)
@property
def is_closed(self) -> bool:
return self.sock is None
@property
def is_connected(self) -> bool:
if self.sock is None:
return False
return not wait_for_read(self.sock, timeout=0.0)
@property
def has_connected_to_proxy(self) -> bool:
return self._has_connected_to_proxy
def close(self) -> None:
try:
super().close()
finally:
# Reset all stateful properties so connection
# can be re-used without leaking prior configs.
self.sock = None
self.is_verified = False
self.proxy_is_verified = None
self._has_connected_to_proxy = False
self._response_options = None
self._tunnel_host = None
self._tunnel_port = None
self._tunnel_scheme = None
def putrequest(
self,
method: str,
url: str,
skip_host: bool = False,
skip_accept_encoding: bool = False,
) -> None:
""""""
# Empty docstring because the indentation of CPython's implementation
# is broken but we don't want this method in our documentation.
match = _CONTAINS_CONTROL_CHAR_RE.search(method)
if match:
raise ValueError(
f"Method cannot contain non-token characters {method!r} (found at least {match.group()!r})"
)
return super().putrequest(
method, url, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding
)
def putheader(self, header: str, *values: str) -> None:
""""""
if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
super().putheader(header, *values)
elif to_str(header.lower()) not in SKIPPABLE_HEADERS:
skippable_headers = "', '".join(
[str.title(header) for header in sorted(SKIPPABLE_HEADERS)]
)
raise ValueError(
f"urllib3.util.SKIP_HEADER only supports '{skippable_headers}'"
)
# `request` method's signature intentionally violates LSP.
# urllib3's API is different from `http.client.HTTPConnection` and the subclassing is only incidental.
def request( # type: ignore[override]
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
# Update the inner socket's timeout value to send the request.
# This only triggers if the connection is re-used.
if self.sock is not None:
self.sock.settimeout(self.timeout)
# Store these values to be fed into the HTTPResponse
# object later. TODO: Remove this in favor of a real
# HTTP lifecycle mechanism.
# We have to store these before we call .request()
# because sometimes we can still salvage a response
# off the wire even if we aren't able to completely
# send the request body.
self._response_options = _ResponseOptions(
request_method=method,
request_url=url,
preload_content=preload_content,
decode_content=decode_content,
enforce_content_length=enforce_content_length,
)
if headers is None:
headers = {}
header_keys = frozenset(to_str(k.lower()) for k in headers)
skip_accept_encoding = "accept-encoding" in header_keys
skip_host = "host" in header_keys
self.putrequest(
method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
)
# Transform the body into an iterable of sendall()-able chunks
# and detect if an explicit Content-Length is doable.
chunks_and_cl = body_to_chunks(body, method=method, blocksize=self.blocksize)
chunks = chunks_and_cl.chunks
content_length = chunks_and_cl.content_length
# When chunked is explicit set to 'True' we respect that.
if chunked:
if "transfer-encoding" not in header_keys:
self.putheader("Transfer-Encoding", "chunked")
else:
# Detect whether a framing mechanism is already in use. If so
# we respect that value, otherwise we pick chunked vs content-length
# depending on the type of 'body'.
if "content-length" in header_keys:
chunked = False
elif "transfer-encoding" in header_keys:
chunked = True
# Otherwise we go off the recommendation of 'body_to_chunks()'.
else:
chunked = False
if content_length is None:
if chunks is not None:
chunked = True
self.putheader("Transfer-Encoding", "chunked")
else:
self.putheader("Content-Length", str(content_length))
# Now that framing headers are out of the way we send all the other headers.
if "user-agent" not in header_keys:
self.putheader("User-Agent", _get_default_user_agent())
for header, value in headers.items():
self.putheader(header, value)
self.endheaders()
# If we're given a body we start sending that in chunks.
if chunks is not None:
for chunk in chunks:
# Sending empty chunks isn't allowed for TE: chunked
# as it indicates the end of the body.
if not chunk:
continue
if isinstance(chunk, str):
chunk = chunk.encode("utf-8")
if chunked:
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
else:
self.send(chunk)
# Regardless of whether we have a body or not, if we're in
# chunked mode we want to send an explicit empty chunk.
if chunked:
self.send(b"0\r\n\r\n")
def request_chunked(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
) -> None:
"""
Alternative to the common request method, which sends the
body with chunked encoding and not as one block
"""
warnings.warn(
"HTTPConnection.request_chunked() is deprecated and will be removed "
"in urllib3 v2.1.0. Instead use HTTPConnection.request(..., chunked=True).",
category=DeprecationWarning,
stacklevel=2,
)
self.request(method, url, body=body, headers=headers, chunked=True)
def getresponse( # type: ignore[override]
self,
) -> HTTPResponse:
"""
Get the response from the server.
If the HTTPConnection is in the correct state, returns an instance of HTTPResponse or of whatever object is returned by the response_class variable.
If a request has not been sent or if a previous response has not be handled, ResponseNotReady is raised. If the HTTP response indicates that the connection should be closed, then it will be closed before the response is returned. When the connection is closed, the underlying socket is closed.
"""
# Raise the same error as http.client.HTTPConnection
if self._response_options is None:
raise ResponseNotReady()
# Reset this attribute for being used again.
resp_options = self._response_options
self._response_options = None
# Since the connection's timeout value may have been updated
# we need to set the timeout on the socket.
self.sock.settimeout(self.timeout)
# This is needed here to avoid circular import errors
from .response import HTTPResponse
# Get the response from http.client.HTTPConnection
httplib_response = super().getresponse()
try:
assert_header_parsing(httplib_response.msg)
except (HeaderParsingError, TypeError) as hpe:
log.warning(
"Failed to parse headers (url=%s): %s",
_url_from_connection(self, resp_options.request_url),
hpe,
exc_info=True,
)
headers = HTTPHeaderDict(httplib_response.msg.items())
response = HTTPResponse(
body=httplib_response,
headers=headers,
status=httplib_response.status,
version=httplib_response.version,
reason=httplib_response.reason,
preload_content=resp_options.preload_content,
decode_content=resp_options.decode_content,
original_response=httplib_response,
enforce_content_length=resp_options.enforce_content_length,
request_method=resp_options.request_method,
request_url=resp_options.request_url,
)
return response
class HTTPSConnection(HTTPConnection):
"""
Many of the parameters to this constructor are passed to the underlying SSL
socket by means of :py:func:`urllib3.util.ssl_wrap_socket`.
"""
default_port = port_by_scheme["https"] # type: ignore[misc]
cert_reqs: int | str | None = None
ca_certs: str | None = None
ca_cert_dir: str | None = None
ca_cert_data: None | str | bytes = None
ssl_version: int | str | None = None
ssl_minimum_version: int | None = None
ssl_maximum_version: int | None = None
assert_fingerprint: str | None = None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: None
| (connection._TYPE_SOCKET_OPTIONS) = HTTPConnection.default_socket_options,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
super().__init__(
host,
port=port,
timeout=timeout,
source_address=source_address,
blocksize=blocksize,
socket_options=socket_options,
proxy=proxy,
proxy_config=proxy_config,
)
self.key_file = key_file
self.cert_file = cert_file
self.key_password = key_password
self.ssl_context = ssl_context
self.server_hostname = server_hostname
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.ssl_version = ssl_version
self.ssl_minimum_version = ssl_minimum_version
self.ssl_maximum_version = ssl_maximum_version
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
self.ca_cert_data = ca_cert_data
# cert_reqs depends on ssl_context so calculate last.
if cert_reqs is None:
if self.ssl_context is not None:
cert_reqs = self.ssl_context.verify_mode
else:
cert_reqs = resolve_cert_reqs(None)
self.cert_reqs = cert_reqs
def set_cert(
self,
key_file: str | None = None,
cert_file: str | None = None,
cert_reqs: int | str | None = None,
key_password: str | None = None,
ca_certs: str | None = None,
assert_hostname: None | str | Literal[False] = None,
assert_fingerprint: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
) -> None:
"""
This method should only be called once, before the connection is used.
"""
warnings.warn(
"HTTPSConnection.set_cert() is deprecated and will be removed "
"in urllib3 v2.1.0. Instead provide the parameters to the "
"HTTPSConnection constructor.",
category=DeprecationWarning,
stacklevel=2,
)
# If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
# have an SSLContext object in which case we'll use its verify_mode.
if cert_reqs is None:
if self.ssl_context is not None:
cert_reqs = self.ssl_context.verify_mode
else:
cert_reqs = resolve_cert_reqs(None)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.key_password = key_password
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
self.ca_cert_data = ca_cert_data
def connect(self) -> None:
sock: socket.socket | ssl.SSLSocket
self.sock = sock = self._new_conn()
server_hostname: str = self.host
tls_in_tls = False
# Do we need to establish a tunnel?
if self._tunnel_host is not None:
# We're tunneling to an HTTPS origin so need to do TLS-in-TLS.
if self._tunnel_scheme == "https":
self.sock = sock = self._connect_tls_proxy(self.host, sock)
tls_in_tls = True
# If we're tunneling it means we're connected to our proxy.
self._has_connected_to_proxy = True
self._tunnel() # type: ignore[attr-defined]
# Override the host with the one we're requesting data from.
server_hostname = self._tunnel_host
if self.server_hostname is not None:
server_hostname = self.server_hostname
is_time_off = datetime.date.today() < RECENT_DATE
if is_time_off:
warnings.warn(
(
f"System time is way off (before {RECENT_DATE}). This will probably "
"lead to SSL verification errors"
),
SystemTimeWarning,
)
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
sock=sock,
cert_reqs=self.cert_reqs,
ssl_version=self.ssl_version,
ssl_minimum_version=self.ssl_minimum_version,
ssl_maximum_version=self.ssl_maximum_version,
ca_certs=self.ca_certs,
ca_cert_dir=self.ca_cert_dir,
ca_cert_data=self.ca_cert_data,
cert_file=self.cert_file,
key_file=self.key_file,
key_password=self.key_password,
server_hostname=server_hostname,
ssl_context=self.ssl_context,
tls_in_tls=tls_in_tls,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint,
)
self.sock = sock_and_verified.socket
self.is_verified = sock_and_verified.is_verified
# If there's a proxy to be connected to we are fully connected.
# This is set twice (once above and here) due to forwarding proxies
# not using tunnelling.
self._has_connected_to_proxy = bool(self.proxy)
def _connect_tls_proxy(self, hostname: str, sock: socket.socket) -> ssl.SSLSocket:
"""
Establish a TLS connection to the proxy using the provided SSL context.
"""
# `_connect_tls_proxy` is called when self._tunnel_host is truthy.
proxy_config = typing.cast(ProxyConfig, self.proxy_config)
ssl_context = proxy_config.ssl_context
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
sock,
cert_reqs=self.cert_reqs,
ssl_version=self.ssl_version,
ssl_minimum_version=self.ssl_minimum_version,
ssl_maximum_version=self.ssl_maximum_version,
ca_certs=self.ca_certs,
ca_cert_dir=self.ca_cert_dir,
ca_cert_data=self.ca_cert_data,
server_hostname=hostname,
ssl_context=ssl_context,
assert_hostname=proxy_config.assert_hostname,
assert_fingerprint=proxy_config.assert_fingerprint,
# Features that aren't implemented for proxies yet:
cert_file=None,
key_file=None,
key_password=None,
tls_in_tls=False,
)
self.proxy_is_verified = sock_and_verified.is_verified
return sock_and_verified.socket # type: ignore[return-value]
class _WrappedAndVerifiedSocket(typing.NamedTuple):
"""
Wrapped socket and whether the connection is
verified after the TLS handshake
"""
socket: ssl.SSLSocket | SSLTransport
is_verified: bool
def _ssl_wrap_socket_and_match_hostname(
sock: socket.socket,
*,
cert_reqs: None | str | int,
ssl_version: None | str | int,
ssl_minimum_version: int | None,
ssl_maximum_version: int | None,
cert_file: str | None,
key_file: str | None,
key_password: str | None,
ca_certs: str | None,
ca_cert_dir: str | None,
ca_cert_data: None | str | bytes,
assert_hostname: None | str | Literal[False],
assert_fingerprint: str | None,
server_hostname: str | None,
ssl_context: ssl.SSLContext | None,
tls_in_tls: bool = False,
) -> _WrappedAndVerifiedSocket:
"""Logic for constructing an SSLContext from all TLS parameters, passing
that down into ssl_wrap_socket, and then doing certificate verification
either via hostname or fingerprint. This function exists to guarantee
that both proxies and targets have the same behavior when connecting via TLS.
"""
default_ssl_context = False
if ssl_context is None:
default_ssl_context = True
context = create_urllib3_context(
ssl_version=resolve_ssl_version(ssl_version),
ssl_minimum_version=ssl_minimum_version,
ssl_maximum_version=ssl_maximum_version,
cert_reqs=resolve_cert_reqs(cert_reqs),
)
else:
context = ssl_context
context.verify_mode = resolve_cert_reqs(cert_reqs)
# In some cases, we want to verify hostnames ourselves
if (
# `ssl` can't verify fingerprints or alternate hostnames
assert_fingerprint
or assert_hostname
# assert_hostname can be set to False to disable hostname checking
or assert_hostname is False
# We still support OpenSSL 1.0.2, which prevents us from verifying
# hostnames easily: https://github.com/pyca/pyopenssl/pull/933
or ssl_.IS_PYOPENSSL
or not ssl_.HAS_NEVER_CHECK_COMMON_NAME
):
context.check_hostname = False
# Try to load OS default certs if none are given. We need to do the hasattr() check
# for custom pyOpenSSL SSLContext objects because they don't support
# load_default_certs().
if (
not ca_certs
and not ca_cert_dir
and not ca_cert_data
and default_ssl_context
and hasattr(context, "load_default_certs")
):
context.load_default_certs()
# Ensure that IPv6 addresses are in the proper format and don't have a
# scope ID. Python's SSL module fails to recognize scoped IPv6 addresses
# and interprets them as DNS hostnames.
if server_hostname is not None:
normalized = server_hostname.strip("[]")
if "%" in normalized:
normalized = normalized[: normalized.rfind("%")]
if is_ipaddress(normalized):
server_hostname = normalized
ssl_sock = ssl_wrap_socket(
sock=sock,
keyfile=key_file,
certfile=cert_file,
key_password=key_password,
ca_certs=ca_certs,
ca_cert_dir=ca_cert_dir,
ca_cert_data=ca_cert_data,
server_hostname=server_hostname,
ssl_context=context,
tls_in_tls=tls_in_tls,
)
try:
if assert_fingerprint:
_assert_fingerprint(
ssl_sock.getpeercert(binary_form=True), assert_fingerprint
)
elif (
context.verify_mode != ssl.CERT_NONE
and not context.check_hostname
and assert_hostname is not False
):
cert: _TYPE_PEER_CERT_RET_DICT = ssl_sock.getpeercert() # type: ignore[assignment]
# Need to signal to our match_hostname whether to use 'commonName' or not.
# If we're using our own constructed SSLContext we explicitly set 'False'
# because PyPy hard-codes 'True' from SSLContext.hostname_checks_common_name.
if default_ssl_context:
hostname_checks_common_name = False
else:
hostname_checks_common_name = (
getattr(context, "hostname_checks_common_name", False) or False
)
_match_hostname(
cert,
assert_hostname or server_hostname, # type: ignore[arg-type]
hostname_checks_common_name,
)
return _WrappedAndVerifiedSocket(
socket=ssl_sock,
is_verified=context.verify_mode == ssl.CERT_REQUIRED
or bool(assert_fingerprint),
)
except BaseException:
ssl_sock.close()
raise
def _match_hostname(
cert: _TYPE_PEER_CERT_RET_DICT | None,
asserted_hostname: str,
hostname_checks_common_name: bool = False,
) -> None:
# Our upstream implementation of ssl.match_hostname()
# only applies this normalization to IP addresses so it doesn't
# match DNS SANs so we do the same thing!
stripped_hostname = asserted_hostname.strip("[]")
if is_ipaddress(stripped_hostname):
asserted_hostname = stripped_hostname
try:
match_hostname(cert, asserted_hostname, hostname_checks_common_name)
except CertificateError as e:
log.warning(
"Certificate did not match expected hostname: %s. Certificate: %s",
asserted_hostname,
cert,
)
# Add cert to exception and reraise so client code can inspect
# the cert when catching the exception, if they want to
e._peer_cert = cert # type: ignore[attr-defined]
raise
def _wrap_proxy_error(err: Exception, proxy_scheme: str | None) -> ProxyError:
# Look for the phrase 'wrong version number', if found
# then we should warn the user that we're very sure that
# this proxy is HTTP-only and they have a configuration issue.
error_normalized = " ".join(re.split("[^a-z]", str(err).lower()))
is_likely_http_proxy = (
"wrong version number" in error_normalized
or "unknown protocol" in error_normalized
)
http_proxy_warning = (
". Your proxy appears to only use HTTP and not HTTPS, "
"try changing your proxy URL to be HTTP. See: "
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html"
"#https-proxy-error-http-proxy"
)
new_err = ProxyError(
f"Unable to connect to proxy"
f"{http_proxy_warning if is_likely_http_proxy and proxy_scheme == 'https' else ''}",
err,
)
new_err.__cause__ = err
return new_err
def _get_default_user_agent() -> str:
return f"python-urllib3/{__version__}"
class DummyConnection:
"""Used to detect a failed ConnectionCls import."""
if not ssl:
HTTPSConnection = DummyConnection # type: ignore[misc, assignment] # noqa: F811
VerifiedHTTPSConnection = HTTPSConnection
def _url_from_connection(
conn: HTTPConnection | HTTPSConnection, path: str | None = None
) -> str:
"""Returns the URL from a given connection. This is mainly used for testing and logging."""
scheme = "https" if isinstance(conn, HTTPSConnection) else "http"
return Url(scheme=scheme, host=conn.host, port=conn.port, path=path).url

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,318 @@
from __future__ import annotations
import socket
import typing
import warnings
from email.errors import MessageDefect
from http.client import IncompleteRead as httplib_IncompleteRead
if typing.TYPE_CHECKING:
from .connection import HTTPConnection
from .connectionpool import ConnectionPool
from .response import HTTPResponse
from .util.retry import Retry
# Base Exceptions
class HTTPError(Exception):
"""Base exception used by this module."""
class HTTPWarning(Warning):
"""Base warning used by this module."""
_TYPE_REDUCE_RESULT = typing.Tuple[
typing.Callable[..., object], typing.Tuple[object, ...]
]
class PoolError(HTTPError):
"""Base exception for errors caused within a pool."""
def __init__(self, pool: ConnectionPool, message: str) -> None:
self.pool = pool
super().__init__(f"{pool}: {message}")
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, None)
class RequestError(PoolError):
"""Base exception for PoolErrors that have associated URLs."""
def __init__(self, pool: ConnectionPool, url: str, message: str) -> None:
self.url = url
super().__init__(pool, message)
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self.url, None)
class SSLError(HTTPError):
"""Raised when SSL certificate fails in an HTTPS connection."""
class ProxyError(HTTPError):
"""Raised when the connection to a proxy fails."""
# The original error is also available as __cause__.
original_error: Exception
def __init__(self, message: str, error: Exception) -> None:
super().__init__(message, error)
self.original_error = error
class DecodeError(HTTPError):
"""Raised when automatic decoding based on Content-Type fails."""
class ProtocolError(HTTPError):
"""Raised when something unexpected happens mid-request/response."""
#: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError
# Leaf Exceptions
class MaxRetryError(RequestError):
"""Raised when the maximum number of retries is exceeded.
:param pool: The connection pool
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
:param str url: The requested Url
:param reason: The underlying error
:type reason: :class:`Exception`
"""
def __init__(
self, pool: ConnectionPool, url: str, reason: Exception | None = None
) -> None:
self.reason = reason
message = f"Max retries exceeded with url: {url} (Caused by {reason!r})"
super().__init__(pool, url, message)
class HostChangedError(RequestError):
"""Raised when an existing pool gets a request for a foreign host."""
def __init__(
self, pool: ConnectionPool, url: str, retries: Retry | int = 3
) -> None:
message = f"Tried to open a foreign host with url: {url}"
super().__init__(pool, url, message)
self.retries = retries
class TimeoutStateError(HTTPError):
"""Raised when passing an invalid state to a timeout"""
class TimeoutError(HTTPError):
"""Raised when a socket timeout error occurs.
Catching this error will catch both :exc:`ReadTimeoutErrors
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
"""
class ReadTimeoutError(TimeoutError, RequestError):
"""Raised when a socket timeout occurs while receiving data from a server"""
# This timeout error does not have a URL attached and needs to inherit from the
# base HTTPError
class ConnectTimeoutError(TimeoutError):
"""Raised when a socket timeout occurs while connecting to a server"""
class NewConnectionError(ConnectTimeoutError, HTTPError):
"""Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
def __init__(self, conn: HTTPConnection, message: str) -> None:
self.conn = conn
super().__init__(f"{conn}: {message}")
@property
def pool(self) -> HTTPConnection:
warnings.warn(
"The 'pool' property is deprecated and will be removed "
"in urllib3 v2.1.0. Use 'conn' instead.",
DeprecationWarning,
stacklevel=2,
)
return self.conn
class NameResolutionError(NewConnectionError):
"""Raised when host name resolution fails."""
def __init__(self, host: str, conn: HTTPConnection, reason: socket.gaierror):
message = f"Failed to resolve '{host}' ({reason})"
super().__init__(conn, message)
class EmptyPoolError(PoolError):
"""Raised when a pool runs out of connections and no more are allowed."""
class FullPoolError(PoolError):
"""Raised when we try to add a connection to a full pool in blocking mode."""
class ClosedPoolError(PoolError):
"""Raised when a request enters a pool after the pool has been closed."""
class LocationValueError(ValueError, HTTPError):
"""Raised when there is something wrong with a given URL input."""
class LocationParseError(LocationValueError):
"""Raised when get_host or similar fails to parse the URL input."""
def __init__(self, location: str) -> None:
message = f"Failed to parse: {location}"
super().__init__(message)
self.location = location
class URLSchemeUnknown(LocationValueError):
"""Raised when a URL input has an unsupported scheme."""
def __init__(self, scheme: str):
message = f"Not supported URL scheme {scheme}"
super().__init__(message)
self.scheme = scheme
class ResponseError(HTTPError):
"""Used as a container for an error reason supplied in a MaxRetryError."""
GENERIC_ERROR = "too many error responses"
SPECIFIC_ERROR = "too many {status_code} error responses"
class SecurityWarning(HTTPWarning):
"""Warned when performing security reducing actions"""
class InsecureRequestWarning(SecurityWarning):
"""Warned when making an unverified HTTPS request."""
class NotOpenSSLWarning(SecurityWarning):
"""Warned when using unsupported SSL library"""
class SystemTimeWarning(SecurityWarning):
"""Warned when system time is suspected to be wrong"""
class InsecurePlatformWarning(SecurityWarning):
"""Warned when certain TLS/SSL configuration is not available on a platform."""
class DependencyWarning(HTTPWarning):
"""
Warned when an attempt is made to import a module with missing optional
dependencies.
"""
class ResponseNotChunked(ProtocolError, ValueError):
"""Response needs to be chunked in order to read it as chunks."""
class BodyNotHttplibCompatible(HTTPError):
"""
Body should be :class:`http.client.HTTPResponse` like
(have an fp attribute which returns raw chunks) for read_chunked().
"""
class IncompleteRead(HTTPError, httplib_IncompleteRead):
"""
Response length doesn't match expected Content-Length
Subclass of :class:`http.client.IncompleteRead` to allow int value
for ``partial`` to avoid creating large objects on streamed reads.
"""
def __init__(self, partial: int, expected: int) -> None:
self.partial = partial # type: ignore[assignment]
self.expected = expected
def __repr__(self) -> str:
return "IncompleteRead(%i bytes read, %i more expected)" % (
self.partial, # type: ignore[str-format]
self.expected,
)
class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
"""Invalid chunk length in a chunked response."""
def __init__(self, response: HTTPResponse, length: bytes) -> None:
self.partial: int = response.tell() # type: ignore[assignment]
self.expected: int | None = response.length_remaining
self.response = response
self.length = length
def __repr__(self) -> str:
return "InvalidChunkLength(got length %r, %i bytes read)" % (
self.length,
self.partial,
)
class InvalidHeader(HTTPError):
"""The header provided was somehow invalid."""
class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
"""ProxyManager does not support the supplied scheme"""
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
def __init__(self, scheme: str | None) -> None:
# 'localhost' is here because our URL parser parses
# localhost:8080 -> scheme=localhost, remove if we fix this.
if scheme == "localhost":
scheme = None
if scheme is None:
message = "Proxy URL had no scheme, should start with http:// or https://"
else:
message = f"Proxy URL had unsupported scheme {scheme}, should use http:// or https://"
super().__init__(message)
class ProxySchemeUnsupported(ValueError):
"""Fetching HTTPS resources through HTTPS proxies is unsupported"""
class HeaderParsingError(HTTPError):
"""Raised by assert_header_parsing, but we convert it to a log.warning statement."""
def __init__(
self, defects: list[MessageDefect], unparsed_data: bytes | str | None
) -> None:
message = f"{defects or 'Unknown'}, unparsed data: {unparsed_data!r}"
super().__init__(message)
class UnrewindableBodyError(HTTPError):
"""urllib3 encountered an error when trying to rewind a body"""

345
Modules/urllib3/fields.py Normal file
View File

@ -0,0 +1,345 @@
from __future__ import annotations
import email.utils
import mimetypes
import typing
_TYPE_FIELD_VALUE = typing.Union[str, bytes]
_TYPE_FIELD_VALUE_TUPLE = typing.Union[
_TYPE_FIELD_VALUE,
typing.Tuple[str, _TYPE_FIELD_VALUE],
typing.Tuple[str, _TYPE_FIELD_VALUE, str],
]
def guess_content_type(
filename: str | None, default: str = "application/octet-stream"
) -> str:
"""
Guess the "Content-Type" of a file.
:param filename:
The filename to guess the "Content-Type" of using :mod:`mimetypes`.
:param default:
If no "Content-Type" can be guessed, default to `default`.
"""
if filename:
return mimetypes.guess_type(filename)[0] or default
return default
def format_header_param_rfc2231(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Helper function to format and quote a single header parameter using the
strategy defined in RFC 2231.
Particularly useful for header parameters which might contain
non-ASCII values, like file names. This follows
`RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as ``bytes`` or `str``.
:returns:
An RFC-2231-formatted unicode string.
.. deprecated:: 2.0.0
Will be removed in urllib3 v2.1.0. This is not valid for
``multipart/form-data`` header parameters.
"""
import warnings
warnings.warn(
"'format_header_param_rfc2231' is deprecated and will be "
"removed in urllib3 v2.1.0. This is not valid for "
"multipart/form-data header parameters.",
DeprecationWarning,
stacklevel=2,
)
if isinstance(value, bytes):
value = value.decode("utf-8")
if not any(ch in value for ch in '"\\\r\n'):
result = f'{name}="{value}"'
try:
result.encode("ascii")
except (UnicodeEncodeError, UnicodeDecodeError):
pass
else:
return result
value = email.utils.encode_rfc2231(value, "utf-8")
value = f"{name}*={value}"
return value
def format_multipart_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Format and quote a single multipart header parameter.
This follows the `WHATWG HTML Standard`_ as of 2021/06/10, matching
the behavior of current browser and curl versions. Values are
assumed to be UTF-8. The ``\\n``, ``\\r``, and ``"`` characters are
percent encoded.
.. _WHATWG HTML Standard:
https://html.spec.whatwg.org/multipage/
form-control-infrastructure.html#multipart-form-data
:param name:
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:returns:
A string ``name="value"`` with the escaped value.
.. versionchanged:: 2.0.0
Matches the WHATWG HTML Standard as of 2021/06/10. Control
characters are no longer percent encoded.
.. versionchanged:: 2.0.0
Renamed from ``format_header_param_html5`` and
``format_header_param``. The old names will be removed in
urllib3 v2.1.0.
"""
if isinstance(value, bytes):
value = value.decode("utf-8")
# percent encode \n \r "
value = value.translate({10: "%0A", 13: "%0D", 34: "%22"})
return f'{name}="{value}"'
def format_header_param_html5(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param_html5' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
def format_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField:
"""
A data container for request body parameters.
:param name:
The name of this request field. Must be unicode.
:param data:
The data/value body.
:param filename:
An optional filename of the request field. Must be unicode.
:param headers:
An optional dict-like object of headers to initially use for the field.
.. versionchanged:: 2.0.0
The ``header_formatter`` parameter is deprecated and will
be removed in urllib3 v2.1.0.
"""
def __init__(
self,
name: str,
data: _TYPE_FIELD_VALUE,
filename: str | None = None,
headers: typing.Mapping[str, str] | None = None,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
):
self._name = name
self._filename = filename
self.data = data
self.headers: dict[str, str | None] = {}
if headers:
self.headers = dict(headers)
if header_formatter is not None:
import warnings
warnings.warn(
"The 'header_formatter' parameter is deprecated and "
"will be removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
self.header_formatter = header_formatter
else:
self.header_formatter = format_multipart_header_param
@classmethod
def from_tuples(
cls,
fieldname: str,
value: _TYPE_FIELD_VALUE_TUPLE,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
) -> RequestField:
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from
parameter of key/value strings AND key/filetuple. A filetuple is a
(filename, data, MIME type) tuple where the MIME type is optional.
For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
Field names and filenames must be unicode.
"""
filename: str | None
content_type: str | None
data: _TYPE_FIELD_VALUE
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE, str], value
)
else:
filename, data = typing.cast(
typing.Tuple[str, _TYPE_FIELD_VALUE], value
)
content_type = guess_content_type(filename)
else:
filename = None
content_type = None
data = value
request_param = cls(
fieldname, data, filename=filename, header_formatter=header_formatter
)
request_param.make_multipart(content_type=content_type)
return request_param
def _render_part(self, name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Override this method to change how each multipart header
parameter is formatted. By default, this calls
:func:`format_multipart_header_param`.
:param name:
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:meta public:
"""
return self.header_formatter(name, value)
def _render_parts(
self,
header_parts: (
dict[str, _TYPE_FIELD_VALUE | None]
| typing.Sequence[tuple[str, _TYPE_FIELD_VALUE | None]]
),
) -> str:
"""
Helper function to format and quote a single header.
Useful for single headers that are composed of multiple items. E.g.,
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
iterable: typing.Iterable[tuple[str, _TYPE_FIELD_VALUE | None]]
parts = []
if isinstance(header_parts, dict):
iterable = header_parts.items()
else:
iterable = header_parts
for name, value in iterable:
if value is not None:
parts.append(self._render_part(name, value))
return "; ".join(parts)
def render_headers(self) -> str:
"""
Renders the headers for this request field.
"""
lines = []
sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
for sort_key in sort_keys:
if self.headers.get(sort_key, False):
lines.append(f"{sort_key}: {self.headers[sort_key]}")
for header_name, header_value in self.headers.items():
if header_name not in sort_keys:
if header_value:
lines.append(f"{header_name}: {header_value}")
lines.append("\r\n")
return "\r\n".join(lines)
def make_multipart(
self,
content_disposition: str | None = None,
content_type: str | None = None,
content_location: str | None = None,
) -> None:
"""
Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter.
:param content_disposition:
The 'Content-Disposition' of the request body. Defaults to 'form-data'
:param content_type:
The 'Content-Type' of the request body.
:param content_location:
The 'Content-Location' of the request body.
"""
content_disposition = (content_disposition or "form-data") + "; ".join(
[
"",
self._render_parts(
(("name", self._name), ("filename", self._filename))
),
]
)
self.headers["Content-Disposition"] = content_disposition
self.headers["Content-Type"] = content_type
self.headers["Content-Location"] = content_location

View File

@ -0,0 +1,89 @@
from __future__ import annotations
import binascii
import codecs
import os
import typing
from io import BytesIO
from .fields import _TYPE_FIELD_VALUE_TUPLE, RequestField
writer = codecs.lookup("utf-8")[3]
_TYPE_FIELDS_SEQUENCE = typing.Sequence[
typing.Union[typing.Tuple[str, _TYPE_FIELD_VALUE_TUPLE], RequestField]
]
_TYPE_FIELDS = typing.Union[
_TYPE_FIELDS_SEQUENCE,
typing.Mapping[str, _TYPE_FIELD_VALUE_TUPLE],
]
def choose_boundary() -> str:
"""
Our embarrassingly-simple replacement for mimetools.choose_boundary.
"""
return binascii.hexlify(os.urandom(16)).decode()
def iter_field_objects(fields: _TYPE_FIELDS) -> typing.Iterable[RequestField]:
"""
Iterate over fields.
Supports list of (k, v) tuples and dicts, and lists of
:class:`~urllib3.fields.RequestField`.
"""
iterable: typing.Iterable[RequestField | tuple[str, _TYPE_FIELD_VALUE_TUPLE]]
if isinstance(fields, typing.Mapping):
iterable = fields.items()
else:
iterable = fields
for field in iterable:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def encode_multipart_formdata(
fields: _TYPE_FIELDS, boundary: str | None = None
) -> tuple[bytes, str]:
"""
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields:
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
Values are processed by :func:`urllib3.fields.RequestField.from_tuples`.
:param boundary:
If not specified, then a random boundary will be generated using
:func:`urllib3.filepost.choose_boundary`.
"""
body = BytesIO()
if boundary is None:
boundary = choose_boundary()
for field in iter_field_objects(fields):
body.write(f"--{boundary}\r\n".encode("latin-1"))
writer(body).write(field.render_headers())
data = field.data
if isinstance(data, int):
data = str(data) # Backwards compatibility
if isinstance(data, str):
writer(body).write(data)
else:
body.write(data)
body.write(b"\r\n")
body.write(f"--{boundary}--\r\n".encode("latin-1"))
content_type = f"multipart/form-data; boundary={boundary}"
return body.getvalue(), content_type

View File

@ -0,0 +1,638 @@
from __future__ import annotations
import functools
import logging
import typing
import warnings
from types import TracebackType
from urllib.parse import urljoin
from ._collections import HTTPHeaderDict, RecentlyUsedContainer
from ._request_methods import RequestMethods
from .connection import ProxyConfig
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
from .exceptions import (
LocationValueError,
MaxRetryError,
ProxySchemeUnknown,
URLSchemeUnknown,
)
from .response import BaseHTTPResponse
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.proxy import connection_requires_http_tunnel
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import Url, parse_url
if typing.TYPE_CHECKING:
import ssl
from typing import Literal
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
log = logging.getLogger(__name__)
SSL_KEYWORDS = (
"key_file",
"cert_file",
"cert_reqs",
"ca_certs",
"ca_cert_data",
"ssl_version",
"ssl_minimum_version",
"ssl_maximum_version",
"ca_cert_dir",
"ssl_context",
"key_password",
"server_hostname",
)
# Default value for `blocksize` - a new parameter introduced to
# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
_DEFAULT_BLOCKSIZE = 16384
_SelfT = typing.TypeVar("_SelfT")
class PoolKey(typing.NamedTuple):
"""
All known keyword arguments that could be provided to the pool manager, its
pools, or the underlying connections.
All custom key schemes should include the fields in this key at a minimum.
"""
key_scheme: str
key_host: str
key_port: int | None
key_timeout: Timeout | float | int | None
key_retries: Retry | bool | int | None
key_block: bool | None
key_source_address: tuple[str, int] | None
key_key_file: str | None
key_key_password: str | None
key_cert_file: str | None
key_cert_reqs: str | None
key_ca_certs: str | None
key_ca_cert_data: str | bytes | None
key_ssl_version: int | str | None
key_ssl_minimum_version: ssl.TLSVersion | None
key_ssl_maximum_version: ssl.TLSVersion | None
key_ca_cert_dir: str | None
key_ssl_context: ssl.SSLContext | None
key_maxsize: int | None
key_headers: frozenset[tuple[str, str]] | None
key__proxy: Url | None
key__proxy_headers: frozenset[tuple[str, str]] | None
key__proxy_config: ProxyConfig | None
key_socket_options: _TYPE_SOCKET_OPTIONS | None
key__socks_options: frozenset[tuple[str, str]] | None
key_assert_hostname: bool | str | None
key_assert_fingerprint: str | None
key_server_hostname: str | None
key_blocksize: int | None
def _default_key_normalizer(
key_class: type[PoolKey], request_context: dict[str, typing.Any]
) -> PoolKey:
"""
Create a pool key out of a request context dictionary.
According to RFC 3986, both the scheme and host are case-insensitive.
Therefore, this function normalizes both before constructing the pool
key for an HTTPS request. If you wish to change this behaviour, provide
alternate callables to ``key_fn_by_scheme``.
:param key_class:
The class to use when constructing the key. This should be a namedtuple
with the ``scheme`` and ``host`` keys at a minimum.
:type key_class: namedtuple
:param request_context:
A dictionary-like object that contain the context for a request.
:type request_context: dict
:return: A namedtuple that can be used as a connection pool key.
:rtype: PoolKey
"""
# Since we mutate the dictionary, make a copy first
context = request_context.copy()
context["scheme"] = context["scheme"].lower()
context["host"] = context["host"].lower()
# These are both dictionaries and need to be transformed into frozensets
for key in ("headers", "_proxy_headers", "_socks_options"):
if key in context and context[key] is not None:
context[key] = frozenset(context[key].items())
# The socket_options key may be a list and needs to be transformed into a
# tuple.
socket_opts = context.get("socket_options")
if socket_opts is not None:
context["socket_options"] = tuple(socket_opts)
# Map the kwargs to the names in the namedtuple - this is necessary since
# namedtuples can't have fields starting with '_'.
for key in list(context.keys()):
context["key_" + key] = context.pop(key)
# Default to ``None`` for keys missing from the context
for field in key_class._fields:
if field not in context:
context[field] = None
# Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
if context.get("key_blocksize") is None:
context["key_blocksize"] = _DEFAULT_BLOCKSIZE
return key_class(**context)
#: A dictionary that maps a scheme to a callable that creates a pool key.
#: This can be used to alter the way pool keys are constructed, if desired.
#: Each PoolManager makes a copy of this dictionary so they can be configured
#: globally here, or individually on the instance.
key_fn_by_scheme = {
"http": functools.partial(_default_key_normalizer, PoolKey),
"https": functools.partial(_default_key_normalizer, PoolKey),
}
pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
class PoolManager(RequestMethods):
"""
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you.
:param num_pools:
Number of connection pools to cache before discarding the least
recently used pool.
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
:param \\**connection_pool_kw:
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
Example:
.. code-block:: python
import urllib3
http = urllib3.PoolManager(num_pools=2)
resp1 = http.request("GET", "https://google.com/")
resp2 = http.request("GET", "https://google.com/mail")
resp3 = http.request("GET", "https://yahoo.com/")
print(len(http.pools))
# 2
"""
proxy: Url | None = None
proxy_config: ProxyConfig | None = None
def __init__(
self,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw: typing.Any,
) -> None:
super().__init__(headers)
self.connection_pool_kw = connection_pool_kw
self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
self.pools = RecentlyUsedContainer(num_pools)
# Locally set the pool classes and keys so other PoolManagers can
# override them.
self.pool_classes_by_scheme = pool_classes_by_scheme
self.key_fn_by_scheme = key_fn_by_scheme.copy()
def __enter__(self: _SelfT) -> _SelfT:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> Literal[False]:
self.clear()
# Return False to re-raise any potential exceptions
return False
def _new_pool(
self,
scheme: str,
host: str,
port: int,
request_context: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
"""
Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
any additional pool keyword arguments.
If ``request_context`` is provided, it is provided as keyword arguments
to the pool class used. This method is used to actually create the
connection pools handed out by :meth:`connection_from_url` and
companion methods. It is intended to be overridden for customization.
"""
pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
if request_context is None:
request_context = self.connection_pool_kw.copy()
# Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
# set to 'None' in the request_context.
if request_context.get("blocksize") is None:
request_context["blocksize"] = _DEFAULT_BLOCKSIZE
# Although the context has everything necessary to create the pool,
# this function has historically only used the scheme, host, and port
# in the positional args. When an API change is acceptable these can
# be removed.
for key in ("scheme", "host", "port"):
request_context.pop(key, None)
if scheme == "http":
for kw in SSL_KEYWORDS:
request_context.pop(kw, None)
return pool_cls(host, port, **request_context)
def clear(self) -> None:
"""
Empty our store of pools and direct them all to close.
This will not affect in-flight connections, but they will not be
re-used after completion.
"""
self.pools.clear()
def connection_from_host(
self,
host: str | None,
port: int | None = None,
scheme: str | None = "http",
pool_kwargs: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
provided, it is merged with the instance's ``connection_pool_kw``
variable and used to create the new connection pool, if one is
needed.
"""
if not host:
raise LocationValueError("No host specified.")
request_context = self._merge_pool_kwargs(pool_kwargs)
request_context["scheme"] = scheme or "http"
if not port:
port = port_by_scheme.get(request_context["scheme"].lower(), 80)
request_context["port"] = port
request_context["host"] = host
return self.connection_from_context(request_context)
def connection_from_context(
self, request_context: dict[str, typing.Any]
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
``request_context`` must at least contain the ``scheme`` key and its
value must be a key in ``key_fn_by_scheme`` instance variable.
"""
if "strict" in request_context:
warnings.warn(
"The 'strict' parameter is no longer needed on Python 3+. "
"This will raise an error in urllib3 v2.1.0.",
DeprecationWarning,
)
request_context.pop("strict")
scheme = request_context["scheme"].lower()
pool_key_constructor = self.key_fn_by_scheme.get(scheme)
if not pool_key_constructor:
raise URLSchemeUnknown(scheme)
pool_key = pool_key_constructor(request_context)
return self.connection_from_pool_key(pool_key, request_context=request_context)
def connection_from_pool_key(
self, pool_key: PoolKey, request_context: dict[str, typing.Any]
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
``pool_key`` should be a namedtuple that only contains immutable
objects. At a minimum it must have the ``scheme``, ``host``, and
``port`` fields.
"""
with self.pools.lock:
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool
# Make a fresh ConnectionPool of the desired type
scheme = request_context["scheme"]
host = request_context["host"]
port = request_context["port"]
pool = self._new_pool(scheme, host, port, request_context=request_context)
self.pools[pool_key] = pool
return pool
def connection_from_url(
self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
) -> HTTPConnectionPool:
"""
Similar to :func:`urllib3.connectionpool.connection_from_url`.
If ``pool_kwargs`` is not provided and a new pool needs to be
constructed, ``self.connection_pool_kw`` is used to initialize
the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
is provided, it is used instead. Note that if a new pool does not
need to be created for the request, the provided ``pool_kwargs`` are
not used.
"""
u = parse_url(url)
return self.connection_from_host(
u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
)
def _merge_pool_kwargs(
self, override: dict[str, typing.Any] | None
) -> dict[str, typing.Any]:
"""
Merge a dictionary of override values for self.connection_pool_kw.
This does not modify self.connection_pool_kw and returns a new dict.
Any keys in the override dictionary with a value of ``None`` are
removed from the merged dictionary.
"""
base_pool_kwargs = self.connection_pool_kw.copy()
if override:
for key, value in override.items():
if value is None:
try:
del base_pool_kwargs[key]
except KeyError:
pass
else:
base_pool_kwargs[key] = value
return base_pool_kwargs
def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
"""
Indicates if the proxy requires the complete destination URL in the
request. Normally this is only needed when not using an HTTP CONNECT
tunnel.
"""
if self.proxy is None:
return False
return not connection_requires_http_tunnel(
self.proxy, self.proxy_config, parsed_url.scheme
)
def urlopen( # type: ignore[override]
self, method: str, url: str, redirect: bool = True, **kw: typing.Any
) -> BaseHTTPResponse:
"""
Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
with custom cross-host redirect logic and only sends the request-uri
portion of the ``url``.
The given ``url`` parameter must be absolute, such that an appropriate
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
"""
u = parse_url(url)
if u.scheme is None:
warnings.warn(
"URLs without a scheme (ie 'https://') are deprecated and will raise an error "
"in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
"start with 'https://' or 'http://'. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2920",
category=DeprecationWarning,
stacklevel=2,
)
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
kw["assert_same_host"] = False
kw["redirect"] = False
if "headers" not in kw:
kw["headers"] = self.headers
if self._proxy_requires_url_absolute_form(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
redirect_location = redirect and response.get_redirect_location()
if not redirect_location:
return response
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
if response.status == 303:
# Change the method according to RFC 9110, Section 15.4.4.
method = "GET"
# And lose the body not to transfer anything sensitive.
kw["body"] = None
kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
retries = kw.get("retries")
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect)
# Strip headers marked as unsafe to forward to the redirected location.
# Check remove_headers_on_redirect to avoid a potential network call within
# conn.is_same_host() which may use socket.gethostbyname() in the future.
if retries.remove_headers_on_redirect and not conn.is_same_host(
redirect_location
):
new_headers = kw["headers"].copy()
for header in kw["headers"]:
if header.lower() in retries.remove_headers_on_redirect:
new_headers.pop(header, None)
kw["headers"] = new_headers
try:
retries = retries.increment(method, url, response=response, _pool=conn)
except MaxRetryError:
if retries.raise_on_redirect:
response.drain_conn()
raise
return response
kw["retries"] = retries
kw["redirect"] = redirect
log.info("Redirecting %s -> %s", url, redirect_location)
response.drain_conn()
return self.urlopen(method, redirect_location, **kw)
class ProxyManager(PoolManager):
"""
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
:param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:
A dictionary containing headers that will be sent to the proxy. In case
of HTTP they are being sent with each request, while in the
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
:param proxy_ssl_context:
The proxy SSL context is used to establish the TLS connection to the
proxy when using HTTPS proxies.
:param use_forwarding_for_https:
(Defaults to False) If set to True will forward requests to the HTTPS
proxy to be made on behalf of the client instead of creating a TLS
tunnel via the CONNECT method. **Enabling this flag means that request
and response headers and content will be visible from the HTTPS proxy**
whereas tunneling keeps request and response headers and content
private. IP address, target hostname, SNI, and port are always visible
to an HTTPS proxy even when this flag is disabled.
:param proxy_assert_hostname:
The hostname of the certificate to verify against.
:param proxy_assert_fingerprint:
The fingerprint of the certificate to verify against.
Example:
.. code-block:: python
import urllib3
proxy = urllib3.ProxyManager("https://localhost:3128/")
resp1 = proxy.request("GET", "https://google.com/")
resp2 = proxy.request("GET", "https://httpbin.org/")
print(len(proxy.pools))
# 1
resp3 = proxy.request("GET", "https://httpbin.org/")
resp4 = proxy.request("GET", "https://twitter.com/")
print(len(proxy.pools))
# 3
"""
def __init__(
self,
proxy_url: str,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
proxy_headers: typing.Mapping[str, str] | None = None,
proxy_ssl_context: ssl.SSLContext | None = None,
use_forwarding_for_https: bool = False,
proxy_assert_hostname: None | str | Literal[False] = None,
proxy_assert_fingerprint: str | None = None,
**connection_pool_kw: typing.Any,
) -> None:
if isinstance(proxy_url, HTTPConnectionPool):
str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
else:
str_proxy_url = proxy_url
proxy = parse_url(str_proxy_url)
if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
self.proxy_ssl_context = proxy_ssl_context
self.proxy_config = ProxyConfig(
proxy_ssl_context,
use_forwarding_for_https,
proxy_assert_hostname,
proxy_assert_fingerprint,
)
connection_pool_kw["_proxy"] = self.proxy
connection_pool_kw["_proxy_headers"] = self.proxy_headers
connection_pool_kw["_proxy_config"] = self.proxy_config
super().__init__(num_pools, headers, **connection_pool_kw)
def connection_from_host(
self,
host: str | None,
port: int | None = None,
scheme: str | None = "http",
pool_kwargs: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
if scheme == "https":
return super().connection_from_host(
host, port, scheme, pool_kwargs=pool_kwargs
)
return super().connection_from_host(
self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
)
def _set_proxy_headers(
self, url: str, headers: typing.Mapping[str, str] | None = None
) -> typing.Mapping[str, str]:
"""
Sets headers needed by proxies: specifically, the Accept and Host
headers. Only sets headers not provided by the user.
"""
headers_ = {"Accept": "*/*"}
netloc = parse_url(url).netloc
if netloc:
headers_["Host"] = netloc
if headers:
headers_.update(headers)
return headers_
def urlopen( # type: ignore[override]
self, method: str, url: str, redirect: bool = True, **kw: typing.Any
) -> BaseHTTPResponse:
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
u = parse_url(url)
if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
# For connections using HTTP CONNECT, httplib sets the necessary
# headers on the CONNECT to the proxy. If we're not using CONNECT,
# we'll definitely need to set 'Host' at the very least.
headers = kw.get("headers", self.headers)
kw["headers"] = self._set_proxy_headers(url, headers)
return super().urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
return ProxyManager(proxy_url=url, **kw)

2
Modules/urllib3/py.typed Normal file
View File

@ -0,0 +1,2 @@
# Instruct type checkers to look for inline type annotations in this package.
# See PEP 561.

1130
Modules/urllib3/response.py Normal file

File diff suppressed because it is too large Load Diff