Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Reading CONNECT headers

I'm using a proxy service (proxymesh) that puts useful information into the headers sent in response to a CONNECT request. For whatever reason, Python's httplib doesn't parse them:

> CONNECT example.com:443 HTTP/1.1
> Host: example.com:443
>
< HTTP/1.1 200 Connection established
< X-Useful-Header: value  # completely ignored
<

The requests module uses httplib internally, so it ignores them as well. How do I extract headers from a CONNECT request?

like image 452
Blender Avatar asked Sep 19 '25 21:09

Blender


1 Answers

Python's httplib actually ignores these headers when creating the tunnel. It's hacky, but you can intercept them and merge the "header" lines with the actual HTTP response's headers:

import socket
import httplib
import requests

from requests.packages.urllib3.connection import HTTPSConnection
from requests.packages.urllib3.connectionpool import HTTPSConnectionPool
from requests.packages.urllib3.poolmanager import ProxyManager

from requests.adapters import HTTPAdapter


class ProxyHeaderHTTPSConnection(HTTPSConnection):
    def __init__(self, *args, **kwargs):
        super(ProxyHeaderHTTPSConnection, self).__init__(*args, **kwargs)
        self._proxy_headers = []

    def _tunnel(self):
        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, self._tunnel_port))

        for header, value in self._tunnel_headers.iteritems():
            self.send("%s: %s\r\n" % (header, value))

        self.send("\r\n")

        response = self.response_class(self.sock, strict=self.strict, method=self._method)
        version, code, message = response._read_status()

        if version == "HTTP/0.9":
            # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
            # concluded HTTP/0.9 is being used something has gone wrong.
            self.close()
            raise socket.error("Invalid response from tunnel request")

        if code != 200:
            self.close()
            raise socket.error("Tunnel connection failed: %d %s" % (code, message.strip()))

        self._proxy_headers = []

        while True:
            line = response.fp.readline(httplib._MAXLINE + 1)

            if len(line) > httplib._MAXLINE:
                raise LineTooLong("header line")

            if not line or line == '\r\n':
                break

            # The line is a header, save it
            if ':' in line:
                self._proxy_headers.append(line)

    def getresponse(self, buffering=False):
        response = super(ProxyHeaderHTTPSConnection, self).getresponse(buffering)
        response.msg.headers.extend(self._proxy_headers)

        return response


class ProxyHeaderHTTPSConnectionPool(HTTPSConnectionPool):
    ConnectionCls = ProxyHeaderHTTPSConnection


class ProxyHeaderProxyManager(ProxyManager):
    def _new_pool(self, scheme, host, port):
        assert scheme == 'https'

        return ProxyHeaderHTTPSConnectionPool(host, port, **self.connection_pool_kw)


class ProxyHeaderHTTPAdapter(HTTPAdapter):
    def proxy_manager_for(self, proxy, **proxy_kwargs):
        if proxy in self.proxy_manager:
            manager = self.proxy_manager[proxy]
        else:
            proxy_headers = self.proxy_headers(proxy)
            manager = self.proxy_manager[proxy] = ProxyHeaderProxyManager(
                proxy_url=proxy,
                proxy_headers=proxy_headers,
                num_pools=self._pool_connections,
                maxsize=self._pool_maxsize,
                block=self._pool_block,
                **proxy_kwargs)

        return manager

You can then install the adapter onto a session:

session = requests.Session()
session.mount('https://', ProxyHeaderHTTPAdapter())

response = session.get('https://example.com', proxies={...})

The proxy's headers will be merged in with the response headers, so it should behave as if the proxy modified the response headers directly.

like image 151
Blender Avatar answered Sep 21 '25 13:09

Blender