288 lines
11 KiB
Plaintext
288 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import seaborn as sns\n",
|
|
"import plotly.express as px\n",
|
|
"import plotly.graph_objects as pgo\n",
|
|
"import scipy as sp\n",
|
|
"from pymongo import MongoClient\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"client = MongoClient(\"mongodb://stats_user:%40z%5EVFhN7q%25vzit@192.168.86.120:27017/?authSource=statistics\")\n",
|
|
"db = client.statistics\n",
|
|
"\n",
|
|
"formatted_date = lambda date: {\n",
|
|
" \"unix\": {\"$toLong\": date},\n",
|
|
" \"iso\": {\"$toString\": date},\n",
|
|
"}\n",
|
|
"\n",
|
|
"pipeline = [\n",
|
|
" {\"$sort\": {\"timestamp\": 1}},\n",
|
|
" {\n",
|
|
" \"$group\": {\n",
|
|
" \"_id\": \"$tags.session\",\n",
|
|
" \"host\": {\"$first\": \"$tags.host\"},\n",
|
|
" \"firstTimestamp\": {\"$first\": \"$timestamp\"},\n",
|
|
" \"lastTimestamp\": {\"$last\": \"$timestamp\"},\n",
|
|
" \"firstTimestampWithPeers\": {\n",
|
|
" \"$min\": {\n",
|
|
" \"$cond\": {\n",
|
|
" \"if\": {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
|
|
" \"then\": \"$timestamp\",\n",
|
|
" \"else\": None,\n",
|
|
" }\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"maxNumberOfPeers\": {\n",
|
|
" \"$max\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
|
|
" },\n",
|
|
" \"minNumberOfPeers\": {\n",
|
|
" \"$min\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
|
|
" },\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"$lookup\": {\n",
|
|
" \"from\": \"peertube_ts\",\n",
|
|
" \"let\": {\"currentSession\": \"$_id\", \"ftp\": \"$firstTimestampWithPeers\", \"fst\": \"$firstTimestamp\"},\n",
|
|
" \"pipeline\": [\n",
|
|
" {\n",
|
|
" \"$match\": {\n",
|
|
" \"$expr\": {\n",
|
|
" \"$and\": [\n",
|
|
" {\"$ne\": [\"$tags.session\", \"$$currentSession\"]},\n",
|
|
" {\"$lt\": [\"$timestamp\", \"$$ftp\"]},\n",
|
|
" {\"$gte\": [\"$timestamp\", \"$$fst\"]},\n",
|
|
" {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" }\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"as\": \"concurrentSessions\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"$addFields\": {\n",
|
|
" \"concurrentSessions\": {\"$gt\": [{\"$size\": \"$concurrentSessions\"}, 0]}\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"$group\": {\n",
|
|
" \"_id\": \"$host\",\n",
|
|
" \"sessions\": {\n",
|
|
" \"$push\": {\n",
|
|
" \"id\": \"$_id\",\n",
|
|
" \"startTime\": formatted_date(\"$firstTimestamp\"),\n",
|
|
" \"endTime\": formatted_date(\"$lastTimestamp\"),\n",
|
|
" \"duration\": {\n",
|
|
" \"$divide\": [\n",
|
|
" {\"$subtract\": [\"$lastTimestamp\", \"$firstTimestamp\"]},\n",
|
|
" 1000,\n",
|
|
" ]\n",
|
|
" },\n",
|
|
" \"firstPeerConnection\": {\n",
|
|
" \"$cond\": {\n",
|
|
" \"if\": {\"$eq\": [\"$firstTimestampWithPeers\", None]},\n",
|
|
" \"then\": None,\n",
|
|
" \"else\": {\n",
|
|
" \"time\": {\n",
|
|
" \"date\": formatted_date(\"$firstTimestampWithPeers\"),\n",
|
|
" \"elapsedFromStart\": {\n",
|
|
" \"$divide\": [\n",
|
|
" {\"$subtract\": [\"$firstTimestampWithPeers\", \"$firstTimestamp\"]},\n",
|
|
" 1000,\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"concurrentSessions\": \"$concurrentSessions\",\n",
|
|
" }\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"maxPeers\": {\"$max\": \"$maxNumberOfPeers\"},\n",
|
|
" \"minPeers\": {\"$min\": \"$minNumberOfPeers\"},\n",
|
|
" }\n",
|
|
" }\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"$set\": {\n",
|
|
" \"sessions\": {\n",
|
|
" \"$sortArray\": {\n",
|
|
" \"input\": \"$sessions\",\n",
|
|
" \"sortBy\": {\"id\": 1},\n",
|
|
" }\n",
|
|
" }\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"$project\": {\n",
|
|
" \"_id\": 0,\n",
|
|
" \"host\": \"$_id\",\n",
|
|
" \"sessions\": \"$sessions\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" {\"$sort\": {\"host\": 1}},\n",
|
|
"]\n",
|
|
"\n",
|
|
"result = db.peertube_ts.aggregate(pipeline)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Extract data from the result cursor\n",
|
|
"data = []\n",
|
|
"for host in result:\n",
|
|
" for session in host['sessions']:\n",
|
|
" if session['firstPeerConnection'] and session['firstPeerConnection']['time']:\n",
|
|
" elapsed = session['firstPeerConnection']['time']['elapsedFromStart']\n",
|
|
" concurrent_sessions = session['firstPeerConnection']['concurrentSessions']\n",
|
|
" data.append((elapsed, concurrent_sessions))\n",
|
|
"\n",
|
|
"# Convert to a DataFrame for easier plotting\n",
|
|
"df = pd.DataFrame(data, columns=['Elapsed', 'ConcurrentSessions'])\n",
|
|
"\n",
|
|
"# Convert boolean column to integers\n",
|
|
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(int)\n",
|
|
"\n",
|
|
"# Print some statistics\n",
|
|
"print(\"Mean time until first peer connection: {:.2f}s\".format(df['Elapsed'].mean()))\n",
|
|
"print(\"Median time until first peer connection: {:.2f}s\".format(df['Elapsed'].median()))\n",
|
|
"print(\"Number of sessions with concurrent sessions: {}\".format(df['ConcurrentSessions'].sum()))\n",
|
|
"print(\"Number of sessions without concurrent sessions: {}\".format(df['ConcurrentSessions'].count() - df['ConcurrentSessions'].sum()))\n",
|
|
"\n",
|
|
"# Revert concurrent sessions column to boolean for plotting\n",
|
|
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(bool)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Plot the histogram of the elapsed time until first peer connection\n",
|
|
"# Color the bars based on the number of concurrent sessions and add a legend\n",
|
|
"fig = px.histogram(df, x='Elapsed', color='ConcurrentSessions', barmode='overlay', nbins=100)\n",
|
|
"fig.update_layout(\n",
|
|
" title='Elapsed time until first peer connection',\n",
|
|
" xaxis_title='Elapsed time (s)',\n",
|
|
" yaxis_title='Count',\n",
|
|
" legend_title='Had concurrent sessions',\n",
|
|
")\n",
|
|
"fig.show()\n",
|
|
"\n",
|
|
"# Plot the line chart of the elapsed time until first peer connection\n",
|
|
"fig = px.line(df, x=df.index, y='Elapsed', markers=True)\n",
|
|
"fig.update_layout(\n",
|
|
" title='Elapsed time until first peer connection',\n",
|
|
" xaxis_title='Session index',\n",
|
|
" yaxis_title='Elapsed time (s)',\n",
|
|
")\n",
|
|
"fig.show()\n",
|
|
"\n",
|
|
"# Plot the cumulative distribution of the elapsed time until first peer connection\n",
|
|
"# Color the lines based on the number of concurrent sessions and add a legend\n",
|
|
"fig = px.ecdf(df, x='Elapsed', color='ConcurrentSessions')\n",
|
|
"fig.update_layout(\n",
|
|
" title='Cumulative distribution of elapsed time until first peer connection',\n",
|
|
" xaxis_title='Elapsed time (s)',\n",
|
|
" yaxis_title='Cumulative probability',\n",
|
|
" legend_title='Had concurrent sessions',\n",
|
|
")\n",
|
|
"fig.show()\n",
|
|
"\n",
|
|
"# Plot the histogram of the number of concurrent sessions\n",
|
|
"fig = px.histogram(df, x='ConcurrentSessions', histnorm='percent')\n",
|
|
"fig.update_layout(\n",
|
|
" title='Number of concurrent sessions',\n",
|
|
" xaxis_title='Had concurrent sessions',\n",
|
|
" yaxis_title='Percentage',\n",
|
|
")\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Plot the histogram of the elapsed time until first peer connection using seaborn\n",
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
"sns.histplot(df, x='Elapsed', hue='ConcurrentSessions', multiple='stack', bins=100)\n",
|
|
"plt.title('Elapsed time until first peer connection')\n",
|
|
"plt.xlabel('Elapsed time (s)')\n",
|
|
"plt.ylabel('Count')\n",
|
|
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"# Plot the line chart of the elapsed time until first peer connection using seaborn\n",
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
"sns.lineplot(data=df, x=df.index, y='Elapsed', marker='o')\n",
|
|
"plt.title('Elapsed time until first peer connection')\n",
|
|
"plt.xlabel('Session index')\n",
|
|
"plt.ylabel('Elapsed time (s)')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"# Plot the cumulative distribution of the elapsed time until first peer connection using seaborn\n",
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
"sns.ecdfplot(df, x='Elapsed', hue='ConcurrentSessions')\n",
|
|
"plt.title('Cumulative distribution of elapsed time until first peer connection')\n",
|
|
"plt.xlabel('Elapsed time (s)')\n",
|
|
"plt.ylabel('Cumulative probability')\n",
|
|
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"# Plot the histogram of the number of concurrent sessions using seaborn\n",
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
"sns.histplot(df, x='ConcurrentSessions', stat='percent', discrete=True)\n",
|
|
"plt.title('Number of concurrent sessions')\n",
|
|
"plt.xlabel('Had concurrent sessions')\n",
|
|
"plt.ylabel('Percentage')\n",
|
|
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
|
"plt.show()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|