Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-1405] Try generating HAR logs for in a Python SDK #373

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import multiprocessing
import sys
import urllib3
import tempfile
import os


import re
Expand Down Expand Up @@ -129,6 +131,11 @@ def __init__(self, host=None,
self.temp_folder_path = None
"""Temp file folder for downloading files
"""
self.har_log_file_path = os.path.join(os.path.expanduser("~"), '.python-pydantic', 'log.har')
print(self.har_log_file_path)
"""Temp file path for storing files in HAR format
Has a default value set to OS's temp folder path.
"""
# Authentication Settings
self.api_key = {}
if api_key:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from urllib.parse import urlencode
import typing
import aiohttp
import json
import os
from typing import Any, Dict, List
from urllib.parse import urlparse, parse_qsl

import certifi
import urllib3
Expand All @@ -41,6 +45,8 @@ def __init__(self, http_response: aiohttp.ClientResponse, round_trip_time: float
class RESTClientObject(object):

def __init__(self, configuration, pools_size=4, maxsize=None):
self.configuration = configuration

# urllib3.PoolManager will pass all kw parameters to connectionpool
# https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/poolmanager.py#L75
# https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/connectionpool.py#L680
Expand Down Expand Up @@ -213,6 +219,77 @@ def request(

t2 = time.time()

# compute queryString property from url for HAR entry
query_string = compute_query_string(url)

# compute postData property for HAR entry
post_data = {}
if method == 'POST':
if headers['Content-Type'] == 'application/x-www-form-urlencoded':
post_data = [
{
'name': k,
'value': v
}
for k, v in fields
]
elif headers['Content-Type'] == 'multipart/form-data':
post_data = [
{
'name': k,
'value': v
}
for k, v in fields
]
else:
post_data = body

base_url = url.split('?')[0]

har_entry = create_har_entry(
url=base_url,
method=method,
request_headers=headers,
response_headers=r.headers,
request_cookies=[],
response_cookies=[],
query_string=query_string,
post_data=post_data,
response_body=r.data.decode('utf-8'),
status=r.status,
status_text=r.reason,
started_date_time=time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(t1)),
time_send=0,
time_wait=(t2 - t1) * 1000,
time_receive=0
)
print(har_entry)

# Append HAR entry to existing temporary file
har_file_path = self.configuration.har_log_file_path

# Check if the file exists and read the existing content
existing_har_entries = {}
if os.path.exists(har_file_path):
with open(har_file_path, 'r') as file:
try:
existing_har_entries = json.load(file)
except json.JSONDecodeError:
# Handle the case where the file is empty or contains invalid JSON
existing_har_entries = {}

# Merge existing entries with the new entry
merged_har = merge_har_entries([existing_har_entries, har_entry])

# Create the file and directory if it doesn't exist
os.makedirs(os.path.dirname(har_file_path), exist_ok=True)
if not os.path.exists(har_file_path):
open(har_file_path, 'w').close()

# Write merged entries to the file
with open(har_file_path, 'w') as file:
json.dump(merged_har, file, indent=4)

return ResponseWrapper(r, t2 - t1)

def GET(self, url, headers=None, stream=False,
Expand Down Expand Up @@ -269,4 +346,181 @@ def PATCH(self, url, headers=None,
headers=headers,
stream=stream,
timeout=timeout,
body=body, fields=fields)
body=body, fields=fields)


# compute queryString property from url for HAR entry
def compute_query_string(url: str) -> List[Dict[str, str]]:
query_string = []
parsed_url = urlparse(url)
query_params = parse_qsl(parsed_url.query)

for k, v in query_params:
query_string.append({
'name': k,
'value': v
})

return query_string


def compute_size(items: Dict[str, str]) -> int:
"""
Computes the size of headers or POST data based on their content.
:param items: A dictionary of headers or POST data.
:return: The computed size in bytes.
"""
return sum(len(key) + len(value) + 4 for key, value in items.items()) # 4 bytes for ': ' and '\r\n'


def create_har_entry(url: str,
method: str,
request_headers: Dict[str, str],
response_headers: Dict[str, str],
request_cookies: List[Dict[str, str]],
response_cookies: List[Dict[str, str]],
query_string: List[Dict[str, str]],
post_data: Dict[str, Any],
response_body: str,
status: int,
status_text: str,
started_date_time: str,
time_send: float,
time_wait: float,
time_receive: float) -> Dict[str, Any]:
"""
Creates a HAR entry with detailed information.

Parameters:
url (str): URL of the request.
method (str): HTTP method (e.g., 'GET', 'POST').
request_headers (Dict[str, str]): Dictionary of request headers.
response_headers (Dict[str, str]): Dictionary of response headers.
request_cookies (List[Dict[str, str]]): List of request cookies.
response_cookies (List[Dict[str, str]]): List of response cookies.
query_string (List[Dict[str, str]]): Query string parameters.
post_data (Dict[str, Any]): POST data in case of POST requests.
response_body (str): Response body as a string.
status (int): HTTP response status code.
status_text (str): HTTP response status text.
started_date_time (str): The date and time stamp for the beginning of the request.
time_send (float): Time spent sending the request in milliseconds.
time_wait (float): Time spent waiting for a response in milliseconds.
time_receive (float): Time spent receiving the response in milliseconds.

Returns:
Dict[str, Any]: HAR entry as a dictionary.

Example Usage:
har_json = create_har_entry(
url="https://www.example.com",
method="GET",
request_headers={"User-Agent": "MyBrowser"},
response_headers={"Content-Type": "text/html"},
request_cookies=[{"name": "session", "value": "12345"}],
response_cookies=[{"name": "session", "value": "12345"}],
query_string=[{"name": "param", "value": "value"}],
post_data={}, # Add POST data if method is POST
response_body="<html></html>",
status=200,
status_text="OK",
started_date_time=datetime.utcnow().isoformat() + "Z",
time_send=20.0,
time_wait=30.0,
time_receive=10.0
)

# Writing to a file
with open('har_file.json', 'w') as file:
json.dump(har_json, file, indent=4)
"""

request_headers_size = compute_size(request_headers)
response_headers_size = compute_size(response_headers)
post_data_size = compute_size(post_data) if method == "POST" else 0

har_entry = {
"log": {
"version": "1.2",
"creator": {
"name": "Konfig",
"version": "1.0"
},
"entries": [{
"startedDateTime": started_date_time,
"time": time_send + time_wait + time_receive,
"request": {
"method": method,
"url": url,
"headers": [{"name": k, "value": v} for k, v in request_headers.items()],
"queryString": query_string,
"cookies": request_cookies,
"headersSize": request_headers_size,
"bodySize": post_data_size,
"postData": post_data if method == "POST" else None
},
"response": {
"status": status,
"statusText": status_text,
"headers": [{"name": k, "value": v} for k, v in response_headers.items()],
"cookies": response_cookies,
"content": {
"size": len(response_body),
"mimeType": response_headers.get("Content-Type", "text/plain"),
"text": response_body
},
"redirectURL": "",
"headersSize": response_headers_size,
"bodySize": len(response_body)
},
"cache": {},
"timings": {
"send": time_send,
"wait": time_wait,
"receive": time_receive
},
}]
}
}

return har_entry


def merge_har_entries(har_entries: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Merges multiple HAR entries into a single HAR log.

:param har_entries: A list of HAR entries.
:return: A HAR log containing all the provided entries.

Example Usage:
har_entry1 = create_har_entry(...)
har_entry2 = create_har_entry(...)
merged_har = merge_har_entries([har_entry1, har_entry2])

# Writing the merged HAR to a file
with open('merged_har_file.json', 'w') as file:
json.dump(merged_har, file, indent=4)
"""
if not har_entries:
return {"log": {"version": "1.2", "creator": {"name": "Konfig", "version": "1.0"}, "entries": []}}

# Find the first valid entry to use as a template for the merged HAR
template_entry = next((entry for entry in har_entries if "log" in entry and "entries" in entry["log"]), None)
if not template_entry:
# If no valid entry is found, return an empty HAR structure
return {"log": {"version": "1.2", "creator": {"name": "Konfig", "version": "1.0"}, "entries": []}}

merged_har = {
"log": {
"version": template_entry["log"]["version"],
"creator": template_entry["log"]["creator"],
"entries": []
}
}

for entry in har_entries:
if "log" in entry and "entries" in entry["log"]:
merged_har["log"]["entries"].extend(entry["log"]["entries"])

return merged_har