{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import plotly.express as px\n", "import plotly.graph_objects as pgo\n", "import scipy as sp\n", "from pymongo import MongoClient\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "client = MongoClient(\"mongodb://stats_user:%40z%5EVFhN7q%25vzit@192.168.86.120:27017/?authSource=statistics\")\n", "db = client.statistics\n", "\n", "formatted_date = lambda date: {\n", " \"unix\": {\"$toLong\": date},\n", " \"iso\": {\"$toString\": date},\n", "}\n", "\n", "pipeline = [\n", " {\"$sort\": {\"timestamp\": 1}},\n", " {\n", " \"$group\": {\n", " \"_id\": \"$tags.session\",\n", " \"host\": {\"$first\": \"$tags.host\"},\n", " \"firstTimestamp\": {\"$first\": \"$timestamp\"},\n", " \"lastTimestamp\": {\"$last\": \"$timestamp\"},\n", " \"firstTimestampWithPeers\": {\n", " \"$min\": {\n", " \"$cond\": {\n", " \"if\": {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n", " \"then\": \"$timestamp\",\n", " \"else\": None,\n", " }\n", " }\n", " },\n", " \"maxNumberOfPeers\": {\n", " \"$max\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n", " },\n", " \"minNumberOfPeers\": {\n", " \"$min\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n", " },\n", " }\n", " },\n", " {\n", " \"$lookup\": {\n", " \"from\": \"peertube_ts\",\n", " \"let\": {\"currentSession\": \"$_id\", \"ftp\": \"$firstTimestampWithPeers\", \"fst\": \"$firstTimestamp\"},\n", " \"pipeline\": [\n", " {\n", " \"$match\": {\n", " \"$expr\": {\n", " \"$and\": [\n", " {\"$ne\": [\"$tags.session\", \"$$currentSession\"]},\n", " {\"$lt\": [\"$timestamp\", \"$$ftp\"]},\n", " {\"$gte\": [\"$timestamp\", \"$$fst\"]},\n", " {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n", " ]\n", " }\n", " }\n", " }\n", " ],\n", " \"as\": \"concurrentSessions\",\n", " }\n", " },\n", " {\n", " \"$addFields\": {\n", " \"concurrentSessions\": {\"$gt\": [{\"$size\": \"$concurrentSessions\"}, 0]}\n", " }\n", " },\n", " {\n", " \"$group\": {\n", " \"_id\": \"$host\",\n", " \"sessions\": {\n", " \"$push\": {\n", " \"id\": \"$_id\",\n", " \"startTime\": formatted_date(\"$firstTimestamp\"),\n", " \"endTime\": formatted_date(\"$lastTimestamp\"),\n", " \"duration\": {\n", " \"$divide\": [\n", " {\"$subtract\": [\"$lastTimestamp\", \"$firstTimestamp\"]},\n", " 1000,\n", " ]\n", " },\n", " \"firstPeerConnection\": {\n", " \"$cond\": {\n", " \"if\": {\"$eq\": [\"$firstTimestampWithPeers\", None]},\n", " \"then\": None,\n", " \"else\": {\n", " \"time\": {\n", " \"date\": formatted_date(\"$firstTimestampWithPeers\"),\n", " \"elapsedFromStart\": {\n", " \"$divide\": [\n", " {\"$subtract\": [\"$firstTimestampWithPeers\", \"$firstTimestamp\"]},\n", " 1000,\n", " ]\n", " }\n", " },\n", " \"concurrentSessions\": \"$concurrentSessions\",\n", " }\n", " }\n", " },\n", " \"maxPeers\": {\"$max\": \"$maxNumberOfPeers\"},\n", " \"minPeers\": {\"$min\": \"$minNumberOfPeers\"},\n", " }\n", " }\n", " }\n", " },\n", " {\n", " \"$set\": {\n", " \"sessions\": {\n", " \"$sortArray\": {\n", " \"input\": \"$sessions\",\n", " \"sortBy\": {\"id\": 1},\n", " }\n", " }\n", " }\n", " },\n", " {\n", " \"$project\": {\n", " \"_id\": 0,\n", " \"host\": \"$_id\",\n", " \"sessions\": \"$sessions\",\n", " }\n", " },\n", " {\"$sort\": {\"host\": 1}},\n", "]\n", "\n", "result = db.peertube_ts.aggregate(pipeline)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Extract data from the result cursor\n", "data = []\n", "for host in result:\n", " for session in host['sessions']:\n", " if session['firstPeerConnection'] and session['firstPeerConnection']['time']:\n", " elapsed = session['firstPeerConnection']['time']['elapsedFromStart']\n", " concurrent_sessions = session['firstPeerConnection']['concurrentSessions']\n", " data.append((elapsed, concurrent_sessions))\n", "\n", "# Convert to a DataFrame for easier plotting\n", "df = pd.DataFrame(data, columns=['Elapsed', 'ConcurrentSessions'])\n", "\n", "# Convert boolean column to integers\n", "df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(int)\n", "\n", "# Print some statistics\n", "print(\"Mean time until first peer connection: {:.2f}s\".format(df['Elapsed'].mean()))\n", "print(\"Median time until first peer connection: {:.2f}s\".format(df['Elapsed'].median()))\n", "print(\"Number of sessions with concurrent sessions: {}\".format(df['ConcurrentSessions'].sum()))\n", "print(\"Number of sessions without concurrent sessions: {}\".format(df['ConcurrentSessions'].count() - df['ConcurrentSessions'].sum()))\n", "\n", "# Revert concurrent sessions column to boolean for plotting\n", "df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(bool)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the histogram of the elapsed time until first peer connection\n", "# Color the bars based on the number of concurrent sessions and add a legend\n", "fig = px.histogram(df, x='Elapsed', color='ConcurrentSessions', barmode='overlay', nbins=100)\n", "fig.update_layout(\n", " title='Elapsed time until first peer connection',\n", " xaxis_title='Elapsed time (s)',\n", " yaxis_title='Count',\n", " legend_title='Had concurrent sessions',\n", ")\n", "fig.show()\n", "\n", "# Plot the line chart of the elapsed time until first peer connection\n", "fig = px.line(df, x=df.index, y='Elapsed', markers=True)\n", "fig.update_layout(\n", " title='Elapsed time until first peer connection',\n", " xaxis_title='Session index',\n", " yaxis_title='Elapsed time (s)',\n", ")\n", "fig.show()\n", "\n", "# Plot the cumulative distribution of the elapsed time until first peer connection\n", "# Color the lines based on the number of concurrent sessions and add a legend\n", "fig = px.ecdf(df, x='Elapsed', color='ConcurrentSessions')\n", "fig.update_layout(\n", " title='Cumulative distribution of elapsed time until first peer connection',\n", " xaxis_title='Elapsed time (s)',\n", " yaxis_title='Cumulative probability',\n", " legend_title='Had concurrent sessions',\n", ")\n", "fig.show()\n", "\n", "# Plot the histogram of the number of concurrent sessions\n", "fig = px.histogram(df, x='ConcurrentSessions', histnorm='percent')\n", "fig.update_layout(\n", " title='Number of concurrent sessions',\n", " xaxis_title='Had concurrent sessions',\n", " yaxis_title='Percentage',\n", ")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the histogram of the elapsed time until first peer connection using seaborn\n", "plt.figure(figsize=(10, 6))\n", "sns.histplot(df, x='Elapsed', hue='ConcurrentSessions', multiple='stack', bins=100)\n", "plt.title('Elapsed time until first peer connection')\n", "plt.xlabel('Elapsed time (s)')\n", "plt.ylabel('Count')\n", "plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n", "plt.show()\n", "\n", "# Plot the line chart of the elapsed time until first peer connection using seaborn\n", "plt.figure(figsize=(10, 6))\n", "sns.lineplot(data=df, x=df.index, y='Elapsed', marker='o')\n", "plt.title('Elapsed time until first peer connection')\n", "plt.xlabel('Session index')\n", "plt.ylabel('Elapsed time (s)')\n", "plt.show()\n", "\n", "# Plot the cumulative distribution of the elapsed time until first peer connection using seaborn\n", "plt.figure(figsize=(10, 6))\n", "sns.ecdfplot(df, x='Elapsed', hue='ConcurrentSessions')\n", "plt.title('Cumulative distribution of elapsed time until first peer connection')\n", "plt.xlabel('Elapsed time (s)')\n", "plt.ylabel('Cumulative probability')\n", "plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n", "plt.show()\n", "\n", "# Plot the histogram of the number of concurrent sessions using seaborn\n", "plt.figure(figsize=(10, 6))\n", "sns.histplot(df, x='ConcurrentSessions', stat='percent', discrete=True)\n", "plt.title('Number of concurrent sessions')\n", "plt.xlabel('Had concurrent sessions')\n", "plt.ylabel('Percentage')\n", "plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 2 }