feat: Dockerize collector / CI-CD for PDF release (#1)
All checks were successful
Build LaTeX Document / build_latex (push) Successful in 1m35s
All checks were successful
Build LaTeX Document / build_latex (push) Successful in 1m35s
Reviewed-on: #1 Co-authored-by: Mirko Milovanovic <mir_ko@me.com> Co-committed-by: Mirko Milovanovic <mir_ko@me.com>
This commit was merged in pull request #1.
This commit is contained in:
63
peertube/datavis/CRUD/mongodb-convert-to.ts.js
Normal file
63
peertube/datavis/CRUD/mongodb-convert-to.ts.js
Normal file
@@ -0,0 +1,63 @@
|
||||
/* global use, db */
|
||||
// MongoDB Playground
|
||||
// To disable this template go to Settings | MongoDB | Use Default Template For Playground.
|
||||
// Make sure you are connected to enable completions and to be able to run a playground.
|
||||
// Use Ctrl+Space inside a snippet or a string literal to trigger completions.
|
||||
// The result of the last command run in a playground is shown on the results panel.
|
||||
// By default the first 20 documents will be returned with a cursor.
|
||||
// Use 'console.log()' to print to the debug output.
|
||||
// For more documentation on playgrounds please refer to
|
||||
// https://www.mongodb.com/docs/mongodb-vscode/playgrounds/
|
||||
use("statistics");
|
||||
|
||||
db.peertube.aggregate([
|
||||
{
|
||||
$set: {
|
||||
player: {
|
||||
$function: {
|
||||
lang: "js",
|
||||
args: ["$player"],
|
||||
body: function (str) {
|
||||
try {
|
||||
return JSON.parse(str);
|
||||
} catch (e) {
|
||||
if (typeof str === "object") {
|
||||
return str;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
peers: {
|
||||
$function: {
|
||||
lang: "js",
|
||||
args: ["$peers"],
|
||||
body: function (str) {
|
||||
try {
|
||||
return JSON.parse(str);
|
||||
} catch (e) {
|
||||
if (typeof str === "object") {
|
||||
return str;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
$out: {
|
||||
db: "statistics",
|
||||
coll: "peertube_ts",
|
||||
timeseries: {
|
||||
timeField: "timestamp",
|
||||
metaField: "tags",
|
||||
granularity: "seconds",
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
141
peertube/datavis/CRUD/sessions-metrics.js
Normal file
141
peertube/datavis/CRUD/sessions-metrics.js
Normal file
@@ -0,0 +1,141 @@
|
||||
/* global use, db */
|
||||
// MongoDB Playground
|
||||
// To disable this template go to Settings | MongoDB | Use Default Template For Playground.
|
||||
// Make sure you are connected to enable completions and to be able to run a playground.
|
||||
// Use Ctrl+Space inside a snippet or a string literal to trigger completions.
|
||||
// The result of the last command run in a playground is shown on the results panel.
|
||||
// By default the first 20 documents will be returned with a cursor.
|
||||
// Use 'console.log()' to print to the debug output.
|
||||
// For more documentation on playgrounds please refer to
|
||||
// https://www.mongodb.com/docs/mongodb-vscode/playgrounds/
|
||||
use("statistics");
|
||||
|
||||
let formattedDate = (date) => ({
|
||||
unix: { $toLong: date },
|
||||
iso: { $toString: date },
|
||||
});
|
||||
|
||||
db.getCollection("peertube_ts").aggregate([
|
||||
{
|
||||
$sort: { timestamp: 1 }
|
||||
},
|
||||
{
|
||||
$group: {
|
||||
_id: "$tags.session",
|
||||
host: { $first: "$tags.host" },
|
||||
firstTimestamp: { $first: "$timestamp" },
|
||||
lastTimestamp: { $last: "$timestamp" },
|
||||
firstTimestampWithPeers: {
|
||||
$min: {
|
||||
$cond: {
|
||||
if: { $gt: [{ $size: { $ifNull: ["$peers", []] } }, 0] },
|
||||
then: "$timestamp",
|
||||
else: null,
|
||||
}
|
||||
}
|
||||
},
|
||||
maxNumberOfPeers: {
|
||||
$max: {
|
||||
$size: { $ifNull: ["$peers", []] },
|
||||
}
|
||||
},
|
||||
minNumberOfPeers: {
|
||||
$min: {
|
||||
$size: { $ifNull: ["$peers", []] },
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
// Lookup other sessions that may have peers
|
||||
{
|
||||
$lookup: {
|
||||
from: "peertube_ts",
|
||||
let: { currentSession: "$_id", ftp: "$firstTimestampWithPeers", fst: "$firstTimestamp" },
|
||||
pipeline: [
|
||||
{
|
||||
$match: {
|
||||
$expr: {
|
||||
$and: [
|
||||
{ $ne: ["$tags.session", "$$currentSession"] },
|
||||
{ $lt: ["$timestamp", "$$ftp"] },
|
||||
{ $gte: ["$timestamp", "$$fst"] },
|
||||
{ $gt: [{ $size: { $ifNull: ["$peers", []] } }, 0] }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
as: "concurrentSessions"
|
||||
}
|
||||
},
|
||||
// Mark whether peers existed in other sessions before starting
|
||||
{
|
||||
$addFields: {
|
||||
concurrentSessions: {
|
||||
$gt: [{ $size: "$concurrentSessions" }, 0]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$group: {
|
||||
_id: "$host",
|
||||
sessions: {
|
||||
$push: {
|
||||
id: "$_id",
|
||||
startTime: formattedDate("$firstTimestamp"),
|
||||
endTime: formattedDate("$lastTimestamp"),
|
||||
duration: {
|
||||
$divide: [
|
||||
{
|
||||
$subtract: ["$lastTimestamp", "$firstTimestamp"],
|
||||
},
|
||||
1000,
|
||||
]
|
||||
},
|
||||
firstPeerConnection: {
|
||||
$cond: {
|
||||
if: { $eq: ["$firstTimestampWithPeers", null] },
|
||||
then: null,
|
||||
else: {
|
||||
time: {
|
||||
date: formattedDate("$firstTimestampWithPeers"),
|
||||
elapsedFromStart: {
|
||||
$divide: [
|
||||
{
|
||||
$subtract: ["$firstTimestampWithPeers", "$firstTimestamp"],
|
||||
},
|
||||
1000,
|
||||
]
|
||||
}
|
||||
},
|
||||
concurrentSessions: "$concurrentSessions",
|
||||
}
|
||||
}
|
||||
},
|
||||
maxPeers: { $max: "$maxNumberOfPeers" },
|
||||
minPeers: { $min: "$minNumberOfPeers" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
sessions: {
|
||||
$sortArray: {
|
||||
input: "$sessions",
|
||||
sortBy: { id: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 0,
|
||||
host: "$_id",
|
||||
sessions: "$sessions"
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: { host: 1 }
|
||||
}
|
||||
]);
|
||||
287
peertube/datavis/plot.ipynb
Normal file
287
peertube/datavis/plot.ipynb
Normal file
@@ -0,0 +1,287 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"import plotly.express as px\n",
|
||||
"import plotly.graph_objects as pgo\n",
|
||||
"import scipy as sp\n",
|
||||
"from pymongo import MongoClient\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = MongoClient(\"mongodb://stats_user:%40z%5EVFhN7q%25vzit@192.168.86.120:27017/?authSource=statistics\")\n",
|
||||
"db = client.statistics\n",
|
||||
"\n",
|
||||
"formatted_date = lambda date: {\n",
|
||||
" \"unix\": {\"$toLong\": date},\n",
|
||||
" \"iso\": {\"$toString\": date},\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"pipeline = [\n",
|
||||
" {\"$sort\": {\"timestamp\": 1}},\n",
|
||||
" {\n",
|
||||
" \"$group\": {\n",
|
||||
" \"_id\": \"$tags.session\",\n",
|
||||
" \"host\": {\"$first\": \"$tags.host\"},\n",
|
||||
" \"firstTimestamp\": {\"$first\": \"$timestamp\"},\n",
|
||||
" \"lastTimestamp\": {\"$last\": \"$timestamp\"},\n",
|
||||
" \"firstTimestampWithPeers\": {\n",
|
||||
" \"$min\": {\n",
|
||||
" \"$cond\": {\n",
|
||||
" \"if\": {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
|
||||
" \"then\": \"$timestamp\",\n",
|
||||
" \"else\": None,\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"maxNumberOfPeers\": {\n",
|
||||
" \"$max\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
|
||||
" },\n",
|
||||
" \"minNumberOfPeers\": {\n",
|
||||
" \"$min\": {\"$size\": {\"$ifNull\": [\"$peers\", []]}},\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"$lookup\": {\n",
|
||||
" \"from\": \"peertube_ts\",\n",
|
||||
" \"let\": {\"currentSession\": \"$_id\", \"ftp\": \"$firstTimestampWithPeers\", \"fst\": \"$firstTimestamp\"},\n",
|
||||
" \"pipeline\": [\n",
|
||||
" {\n",
|
||||
" \"$match\": {\n",
|
||||
" \"$expr\": {\n",
|
||||
" \"$and\": [\n",
|
||||
" {\"$ne\": [\"$tags.session\", \"$$currentSession\"]},\n",
|
||||
" {\"$lt\": [\"$timestamp\", \"$$ftp\"]},\n",
|
||||
" {\"$gte\": [\"$timestamp\", \"$$fst\"]},\n",
|
||||
" {\"$gt\": [{\"$size\": {\"$ifNull\": [\"$peers\", []]}}, 0]},\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"as\": \"concurrentSessions\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"$addFields\": {\n",
|
||||
" \"concurrentSessions\": {\"$gt\": [{\"$size\": \"$concurrentSessions\"}, 0]}\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"$group\": {\n",
|
||||
" \"_id\": \"$host\",\n",
|
||||
" \"sessions\": {\n",
|
||||
" \"$push\": {\n",
|
||||
" \"id\": \"$_id\",\n",
|
||||
" \"startTime\": formatted_date(\"$firstTimestamp\"),\n",
|
||||
" \"endTime\": formatted_date(\"$lastTimestamp\"),\n",
|
||||
" \"duration\": {\n",
|
||||
" \"$divide\": [\n",
|
||||
" {\"$subtract\": [\"$lastTimestamp\", \"$firstTimestamp\"]},\n",
|
||||
" 1000,\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"firstPeerConnection\": {\n",
|
||||
" \"$cond\": {\n",
|
||||
" \"if\": {\"$eq\": [\"$firstTimestampWithPeers\", None]},\n",
|
||||
" \"then\": None,\n",
|
||||
" \"else\": {\n",
|
||||
" \"time\": {\n",
|
||||
" \"date\": formatted_date(\"$firstTimestampWithPeers\"),\n",
|
||||
" \"elapsedFromStart\": {\n",
|
||||
" \"$divide\": [\n",
|
||||
" {\"$subtract\": [\"$firstTimestampWithPeers\", \"$firstTimestamp\"]},\n",
|
||||
" 1000,\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"concurrentSessions\": \"$concurrentSessions\",\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"maxPeers\": {\"$max\": \"$maxNumberOfPeers\"},\n",
|
||||
" \"minPeers\": {\"$min\": \"$minNumberOfPeers\"},\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"$set\": {\n",
|
||||
" \"sessions\": {\n",
|
||||
" \"$sortArray\": {\n",
|
||||
" \"input\": \"$sessions\",\n",
|
||||
" \"sortBy\": {\"id\": 1},\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"$project\": {\n",
|
||||
" \"_id\": 0,\n",
|
||||
" \"host\": \"$_id\",\n",
|
||||
" \"sessions\": \"$sessions\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\"$sort\": {\"host\": 1}},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"result = db.peertube_ts.aggregate(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Extract data from the result cursor\n",
|
||||
"data = []\n",
|
||||
"for host in result:\n",
|
||||
" for session in host['sessions']:\n",
|
||||
" if session['firstPeerConnection'] and session['firstPeerConnection']['time']:\n",
|
||||
" elapsed = session['firstPeerConnection']['time']['elapsedFromStart']\n",
|
||||
" concurrent_sessions = session['firstPeerConnection']['concurrentSessions']\n",
|
||||
" data.append((elapsed, concurrent_sessions))\n",
|
||||
"\n",
|
||||
"# Convert to a DataFrame for easier plotting\n",
|
||||
"df = pd.DataFrame(data, columns=['Elapsed', 'ConcurrentSessions'])\n",
|
||||
"\n",
|
||||
"# Convert boolean column to integers\n",
|
||||
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(int)\n",
|
||||
"\n",
|
||||
"# Print some statistics\n",
|
||||
"print(\"Mean time until first peer connection: {:.2f}s\".format(df['Elapsed'].mean()))\n",
|
||||
"print(\"Median time until first peer connection: {:.2f}s\".format(df['Elapsed'].median()))\n",
|
||||
"print(\"Number of sessions with concurrent sessions: {}\".format(df['ConcurrentSessions'].sum()))\n",
|
||||
"print(\"Number of sessions without concurrent sessions: {}\".format(df['ConcurrentSessions'].count() - df['ConcurrentSessions'].sum()))\n",
|
||||
"\n",
|
||||
"# Revert concurrent sessions column to boolean for plotting\n",
|
||||
"df['ConcurrentSessions'] = df['ConcurrentSessions'].astype(bool)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Plot the histogram of the elapsed time until first peer connection\n",
|
||||
"# Color the bars based on the number of concurrent sessions and add a legend\n",
|
||||
"fig = px.histogram(df, x='Elapsed', color='ConcurrentSessions', barmode='overlay', nbins=100)\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='Elapsed time until first peer connection',\n",
|
||||
" xaxis_title='Elapsed time (s)',\n",
|
||||
" yaxis_title='Count',\n",
|
||||
" legend_title='Had concurrent sessions',\n",
|
||||
")\n",
|
||||
"fig.show()\n",
|
||||
"\n",
|
||||
"# Plot the line chart of the elapsed time until first peer connection\n",
|
||||
"fig = px.line(df, x=df.index, y='Elapsed', markers=True)\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='Elapsed time until first peer connection',\n",
|
||||
" xaxis_title='Session index',\n",
|
||||
" yaxis_title='Elapsed time (s)',\n",
|
||||
")\n",
|
||||
"fig.show()\n",
|
||||
"\n",
|
||||
"# Plot the cumulative distribution of the elapsed time until first peer connection\n",
|
||||
"# Color the lines based on the number of concurrent sessions and add a legend\n",
|
||||
"fig = px.ecdf(df, x='Elapsed', color='ConcurrentSessions')\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='Cumulative distribution of elapsed time until first peer connection',\n",
|
||||
" xaxis_title='Elapsed time (s)',\n",
|
||||
" yaxis_title='Cumulative probability',\n",
|
||||
" legend_title='Had concurrent sessions',\n",
|
||||
")\n",
|
||||
"fig.show()\n",
|
||||
"\n",
|
||||
"# Plot the histogram of the number of concurrent sessions\n",
|
||||
"fig = px.histogram(df, x='ConcurrentSessions', histnorm='percent')\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='Number of concurrent sessions',\n",
|
||||
" xaxis_title='Had concurrent sessions',\n",
|
||||
" yaxis_title='Percentage',\n",
|
||||
")\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Plot the histogram of the elapsed time until first peer connection using seaborn\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.histplot(df, x='Elapsed', hue='ConcurrentSessions', multiple='stack', bins=100)\n",
|
||||
"plt.title('Elapsed time until first peer connection')\n",
|
||||
"plt.xlabel('Elapsed time (s)')\n",
|
||||
"plt.ylabel('Count')\n",
|
||||
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Plot the line chart of the elapsed time until first peer connection using seaborn\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.lineplot(data=df, x=df.index, y='Elapsed', marker='o')\n",
|
||||
"plt.title('Elapsed time until first peer connection')\n",
|
||||
"plt.xlabel('Session index')\n",
|
||||
"plt.ylabel('Elapsed time (s)')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Plot the cumulative distribution of the elapsed time until first peer connection using seaborn\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.ecdfplot(df, x='Elapsed', hue='ConcurrentSessions')\n",
|
||||
"plt.title('Cumulative distribution of elapsed time until first peer connection')\n",
|
||||
"plt.xlabel('Elapsed time (s)')\n",
|
||||
"plt.ylabel('Cumulative probability')\n",
|
||||
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Plot the histogram of the number of concurrent sessions using seaborn\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.histplot(df, x='ConcurrentSessions', stat='percent', discrete=True)\n",
|
||||
"plt.title('Number of concurrent sessions')\n",
|
||||
"plt.xlabel('Had concurrent sessions')\n",
|
||||
"plt.ylabel('Percentage')\n",
|
||||
"plt.legend(title='Had concurrent sessions', labels=['True', 'False'])\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
9
peertube/datavis/requirements.txt
Normal file
9
peertube/datavis/requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
pymongo
|
||||
pandas
|
||||
matplotlib
|
||||
seaborn
|
||||
numpy
|
||||
scipy
|
||||
plotly
|
||||
nbformat
|
||||
ipykernel
|
||||
Reference in New Issue
Block a user