Files
Tesi/peertube/statnerd/plot.ipynb

288 lines
11 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import plotly.express as px\n",
"import plotly.graph_objects as pgo\n",
"import scipy as sp\n",
"from pymongo import MongoClient\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"client = MongoClient(\"mongodb://stats_user:%40z%5EVFhN7q%25vzit@192.168.86.120:27017/?authSource=statistics\")\n",
"db = client.statistics\n",
"\n",
"formatted_date = lambda date: {\n",
" \"unix\": {\"$toLong\": date},\n",
" \"iso\": {\"$toString\": date},\n",
"}\n",
"\n",
"pipeline = [\n",
" {\"$sort\": {\"timestamp\": 1}},\n",
" {\n",
" \"$group\": {\n",
" \"_id\": \"$tags.session\",\n",
" \"host\": {\"$first\": \"$tags.host\"},\n",
" \"firstTimestamp\": {\"$first\": \"$timestamp\"},\n",
" \"lastTimestamp\": {\"$last\": \"$timestamp\"},\n",
" \"firstTimestampWithPeers\": {\n",
" \"$min\": {\n",
" \"$cond\": {\n",
" \"if\": {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
" \"then\": \"$timestamp\",\n",
" \"else\": None,\n",
" }\n",
" }\n",
" },\n",
" \"maxNumberOfPeers\": {\n",
" \"$max\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
" },\n",
" \"minNumberOfPeers\": {\n",
" \"$min\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
" },\n",
" }\n",
" },\n",
" {\n",
" \"$lookup\": {\n",
" \"from\": \"peertube_ts\",\n",
" \"let\": {\"currentSession\": \"$_id\", \"ftp\": \"$firstTimestampWithPeers\", \"fst\": \"$firstTimestamp\"},\n",
" \"pipeline\": [\n",
" {\n",
" \"$match\": {\n",
" \"$expr\": {\n",
" \"$and\": [\n",
" {\"$ne\": [\"$tags.session\", \"$$currentSession\"]},\n",
" {\"$lt\": [\"$timestamp\", \"$$ftp\"]},\n",
" {\"$gte\": [\"$timestamp\", \"$$fst\"]},\n",
" {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
" ]\n",
" }\n",
" }\n",
" }\n",
" ],\n",
" \"as\": \"concurrentSessions\",\n",
" }\n",
" },\n",
" {\n",
" \"$addFields\": {\n",
" \"concurrentSessions\": {\"$gt\": [{\"$size\": \"$concurrentSessions\"}, 0]}\n",
" }\n",
" },\n",
" {\n",
" \"$group\": {\n",
" \"_id\": \"$host\",\n",
" \"sessions\": {\n",
" \"$push\": {\n",
" \"id\": \"$_id\",\n",
" \"startTime\": formatted_date(\"$firstTimestamp\"),\n",
" \"endTime\": formatted_date(\"$lastTimestamp\"),\n",
" \"duration\": {\n",
" \"$divide\": [\n",
" {\"$subtract\": [\"$lastTimestamp\", \"$firstTimestamp\"]},\n",
" 1000,\n",
" ]\n",
" },\n",
" \"firstPeerConnection\": {\n",
" \"$cond\": {\n",
" \"if\": {\"$eq\": [\"$firstTimestampWithPeers\", None]},\n",
" \"then\": None,\n",
" \"else\": {\n",
" \"time\": {\n",
" \"date\": formatted_date(\"$firstTimestampWithPeers\"),\n",
" \"elapsedFromStart\": {\n",
" \"$divide\": [\n",
" {\"$subtract\": [\"$firstTimestampWithPeers\", \"$firstTimestamp\"]},\n",
" 1000,\n",
" ]\n",
" }\n",
" },\n",
" \"concurrentSessions\": \"$concurrentSessions\",\n",
" }\n",
" }\n",
" },\n",
" \"maxPeers\": {\"$max\": \"$maxNumberOfPeers\"},\n",
" \"minPeers\": {\"$min\": \"$minNumberOfPeers\"},\n",
" }\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"$set\": {\n",
" \"sessions\": {\n",
" \"$sortArray\": {\n",
" \"input\": \"$sessions\",\n",
" \"sortBy\": {\"id\": 1},\n",
" }\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"$project\": {\n",
" \"_id\": 0,\n",
" \"host\": \"$_id\",\n",
" \"sessions\": \"$sessions\",\n",
" }\n",
" },\n",
" {\"$sort\": {\"host\": 1}},\n",
"]\n",
"\n",
"result = db.peertube_ts.aggregate(pipeline)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extract data from the result cursor\n",
"data = []\n",
"for host in result:\n",
" for session in host['sessions']:\n",
" if session['firstPeerConnection'] and session['firstPeerConnection']['time']:\n",
" elapsed = session['firstPeerConnection']['time']['elapsedFromStart']\n",
" concurrent_sessions = session['firstPeerConnection']['concurrentSessions']\n",
" data.append((elapsed, concurrent_sessions))\n",
"\n",
"# Convert to a DataFrame for easier plotting\n",
"df = pd.DataFrame(data, columns=['Elapsed', 'ConcurrentSessions'])\n",
"\n",
"# Convert boolean column to integers\n",
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(int)\n",
"\n",
"# Print some statistics\n",
"print(\"Mean time until first peer connection: {:.2f}s\".format(df['Elapsed'].mean()))\n",
"print(\"Median time until first peer connection: {:.2f}s\".format(df['Elapsed'].median()))\n",
"print(\"Number of sessions with concurrent sessions: {}\".format(df['ConcurrentSessions'].sum()))\n",
"print(\"Number of sessions without concurrent sessions: {}\".format(df['ConcurrentSessions'].count() - df['ConcurrentSessions'].sum()))\n",
"\n",
"# Revert concurrent sessions column to boolean for plotting\n",
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(bool)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot the histogram of the elapsed time until first peer connection\n",
"# Color the bars based on the number of concurrent sessions and add a legend\n",
"fig = px.histogram(df, x='Elapsed', color='ConcurrentSessions', barmode='overlay', nbins=100)\n",
"fig.update_layout(\n",
" title='Elapsed time until first peer connection',\n",
" xaxis_title='Elapsed time (s)',\n",
" yaxis_title='Count',\n",
" legend_title='Had concurrent sessions',\n",
")\n",
"fig.show()\n",
"\n",
"# Plot the line chart of the elapsed time until first peer connection\n",
"fig = px.line(df, x=df.index, y='Elapsed', markers=True)\n",
"fig.update_layout(\n",
" title='Elapsed time until first peer connection',\n",
" xaxis_title='Session index',\n",
" yaxis_title='Elapsed time (s)',\n",
")\n",
"fig.show()\n",
"\n",
"# Plot the cumulative distribution of the elapsed time until first peer connection\n",
"# Color the lines based on the number of concurrent sessions and add a legend\n",
"fig = px.ecdf(df, x='Elapsed', color='ConcurrentSessions')\n",
"fig.update_layout(\n",
" title='Cumulative distribution of elapsed time until first peer connection',\n",
" xaxis_title='Elapsed time (s)',\n",
" yaxis_title='Cumulative probability',\n",
" legend_title='Had concurrent sessions',\n",
")\n",
"fig.show()\n",
"\n",
"# Plot the histogram of the number of concurrent sessions\n",
"fig = px.histogram(df, x='ConcurrentSessions', histnorm='percent')\n",
"fig.update_layout(\n",
" title='Number of concurrent sessions',\n",
" xaxis_title='Had concurrent sessions',\n",
" yaxis_title='Percentage',\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot the histogram of the elapsed time until first peer connection using seaborn\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(df, x='Elapsed', hue='ConcurrentSessions', multiple='stack', bins=100)\n",
"plt.title('Elapsed time until first peer connection')\n",
"plt.xlabel('Elapsed time (s)')\n",
"plt.ylabel('Count')\n",
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
"plt.show()\n",
"\n",
"# Plot the line chart of the elapsed time until first peer connection using seaborn\n",
"plt.figure(figsize=(10, 6))\n",
"sns.lineplot(data=df, x=df.index, y='Elapsed', marker='o')\n",
"plt.title('Elapsed time until first peer connection')\n",
"plt.xlabel('Session index')\n",
"plt.ylabel('Elapsed time (s)')\n",
"plt.show()\n",
"\n",
"# Plot the cumulative distribution of the elapsed time until first peer connection using seaborn\n",
"plt.figure(figsize=(10, 6))\n",
"sns.ecdfplot(df, x='Elapsed', hue='ConcurrentSessions')\n",
"plt.title('Cumulative distribution of elapsed time until first peer connection')\n",
"plt.xlabel('Elapsed time (s)')\n",
"plt.ylabel('Cumulative probability')\n",
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
"plt.show()\n",
"\n",
"# Plot the histogram of the number of concurrent sessions using seaborn\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(df, x='ConcurrentSessions', stat='percent', discrete=True)\n",
"plt.title('Number of concurrent sessions')\n",
"plt.xlabel('Had concurrent sessions')\n",
"plt.ylabel('Percentage')\n",
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}