mirror of
https://git.alemi.dev/fedicharter.git
synced 2024-11-23 00:44:48 +01:00
feat: basic peers collector functionality
This commit is contained in:
parent
341a0a77aa
commit
8c9c6a90d8
2 changed files with 168 additions and 41 deletions
141
src/main.rs
141
src/main.rs
|
@ -1,21 +1,50 @@
|
||||||
use std::net::SocketAddr;
|
use std::{net::SocketAddr, collections::HashSet};
|
||||||
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::{Parser, Subcommand};
|
||||||
|
|
||||||
use axum::{routing::get, extract::Query, Json, Router};
|
use axum::{routing::get, extract::Query, Json, Router};
|
||||||
|
use set::{SetHandle, InstanceRelation, create_set_collector};
|
||||||
|
|
||||||
use crate::{model::{MapResult, MapHandle, create_map_collector}, cache::CACHE};
|
use crate::{model::{MapResult, MapHandle, create_map_collector}, cache::CACHE};
|
||||||
|
|
||||||
mod model;
|
mod model;
|
||||||
|
mod set;
|
||||||
mod cache;
|
mod cache;
|
||||||
|
mod nodeinfo;
|
||||||
|
|
||||||
|
mod entities;
|
||||||
|
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
/// an API crawling akkoma bubble instances network and creating a map
|
/// an API crawling akkoma bubble instances network and creating a map
|
||||||
struct CliArgs {
|
struct CliArgs {
|
||||||
/// start the server listening on this host
|
#[clap(subcommand)]
|
||||||
host: Option<String>,
|
/// action to perform
|
||||||
|
action: CliAction,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Subcommand)]
|
||||||
|
/// available actions which the process can do
|
||||||
|
enum CliAction {
|
||||||
|
/// Serve an API providing routes for all actions
|
||||||
|
Serve {
|
||||||
|
/// start the server listening on this host
|
||||||
|
host: Option<String>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Crawl local bubble domains and construct local bubble map
|
||||||
|
Bubble { },
|
||||||
|
|
||||||
|
/// Crawl known peers and build network set and unknowns map
|
||||||
|
Peers {
|
||||||
|
/// starting domain
|
||||||
|
domain: String,
|
||||||
|
|
||||||
|
/// maximum recursion depth, leave 0 for unbounded crawling
|
||||||
|
#[arg(long, short, default_value_t = 10)]
|
||||||
|
maxdepth: usize,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
@ -24,19 +53,37 @@ async fn main() {
|
||||||
|
|
||||||
let args = CliArgs::parse();
|
let args = CliArgs::parse();
|
||||||
|
|
||||||
let app = Router::new()
|
match args.action {
|
||||||
.route("/crawl", get(route_crawl_domain));
|
CliAction::Serve { host } => {
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/crawl", get(route_crawl_domain));
|
||||||
|
|
||||||
let addr = match args.host {
|
let addr = match host {
|
||||||
Some(host) => host.parse().expect("could not parse provided host"),
|
Some(host) => host.parse().expect("could not parse provided host"),
|
||||||
None => SocketAddr::from(([127, 0, 0, 1], 18811)),
|
None => SocketAddr::from(([127, 0, 0, 1], 18811)),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
tracing::debug!("listening on {}", addr);
|
||||||
|
axum::Server::bind(&addr)
|
||||||
|
.serve(app.into_make_service())
|
||||||
|
.await
|
||||||
|
.expect("could not serve axum app");
|
||||||
|
},
|
||||||
|
|
||||||
|
CliAction::Peers { domain, maxdepth } => {
|
||||||
|
let (collector, handle) = create_set_collector();
|
||||||
|
peers_crawl_instance(&domain, handle, 0, maxdepth).await;
|
||||||
|
let results = collector.collect().await;
|
||||||
|
tracing::info!("discovered {} instances", results.whole.len());
|
||||||
|
// for instance in &results.instances {
|
||||||
|
// tracing::info!("instance {} doesn't know {} other instances", instance.domain, instance.relations.len());
|
||||||
|
// }
|
||||||
|
// println!("{}", serde_json::to_string(&results).expect("could not serialize set result"));
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {},
|
||||||
|
}
|
||||||
|
|
||||||
tracing::debug!("listening on {}", addr);
|
|
||||||
axum::Server::bind(&addr)
|
|
||||||
.serve(app.into_make_service())
|
|
||||||
.await
|
|
||||||
.expect("could not serve axum app");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
@ -47,44 +94,56 @@ struct Params {
|
||||||
async fn route_crawl_domain(Query(params): Query<Params>) -> Json<MapResult> {
|
async fn route_crawl_domain(Query(params): Query<Params>) -> Json<MapResult> {
|
||||||
tracing::info!("starting new crawl from {}", params.domain);
|
tracing::info!("starting new crawl from {}", params.domain);
|
||||||
let (collector, handle) = create_map_collector();
|
let (collector, handle) = create_map_collector();
|
||||||
scan_instance(¶ms.domain, handle).await;
|
bubble_crawl_instance(¶ms.domain, handle).await;
|
||||||
axum::Json(collector.collect().await)
|
axum::Json(collector.collect().await)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[async_recursion::async_recursion]
|
#[async_recursion::async_recursion]
|
||||||
async fn scan_instance(domain: &str, map: MapHandle) {
|
async fn peers_crawl_instance(domain: &str, set: SetHandle, depth: usize, maxdepth: usize) {
|
||||||
|
if depth >= maxdepth { return };
|
||||||
|
if set.already_scanned(domain).await { return };
|
||||||
|
|
||||||
|
match reqwest::get(format!("https://{}/api/v1/instance/peers", domain)).await {
|
||||||
|
Err(e) => return tracing::error!("could not fetch peer list for {} : {}", domain, e),
|
||||||
|
Ok(x) => {
|
||||||
|
let peers : Vec<String> = x.json().await.unwrap_or(vec![]);
|
||||||
|
set.add_instance(InstanceRelation {
|
||||||
|
domain: domain.to_string(),
|
||||||
|
// relations: HashSet::from_iter(peers.iter().cloned()),
|
||||||
|
relations: HashSet::new(),
|
||||||
|
});
|
||||||
|
tracing::info!("{} found: {} peers", domain, peers.len());
|
||||||
|
for peer in peers {
|
||||||
|
let _set = set.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
peers_crawl_instance(&peer, _set, depth + 1, maxdepth).await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_recursion::async_recursion]
|
||||||
|
async fn bubble_crawl_instance(domain: &str, map: MapHandle) {
|
||||||
if map.already_scanned(domain).await { return };
|
if map.already_scanned(domain).await { return };
|
||||||
|
|
||||||
tracing::debug!("scanning instance {}", domain);
|
tracing::debug!("scanning instance {}", domain);
|
||||||
let response = match CACHE.instance_metadata(domain).await {
|
let info = match CACHE.instance_metadata(domain).await {
|
||||||
Ok(Some(r)) => r,
|
Ok(r) => r,
|
||||||
Ok(None) => {
|
Err(e) => return tracing::warn!("could not fetch metadata for {}: {}", domain, e),
|
||||||
tracing::info!("instance {} doesn't provide nodeinfo api", domain);
|
|
||||||
return map.add_node(domain.to_string(), domain.to_string());
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
return tracing::warn!("could not fetch metadata for {}: {}", domain, e);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let metadata = match response.get("metadata") {
|
let node_name = info.metadata
|
||||||
Some(m) => m,
|
.get("nodeName")
|
||||||
None => {
|
.map(|x| x.as_str().expect("nodeName is not a string"))
|
||||||
tracing::info!("instance {} doesn't provide metadata", domain);
|
.unwrap_or(domain);
|
||||||
return map.add_node(domain.to_string(), domain.to_string());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let node_name = match metadata.get("nodeName") {
|
|
||||||
Some(v) => v.as_str().unwrap_or("").to_string(),
|
|
||||||
None => domain.to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
tracing::info!("adding instance {} ({})", node_name, domain);
|
tracing::info!("adding instance {} ({})", node_name, domain);
|
||||||
|
|
||||||
map.add_node(domain.to_string(), node_name);
|
map.add_node(domain.to_string(), node_name.to_string());
|
||||||
|
|
||||||
let local_bubble = match metadata.get("localBubbleInstances") {
|
let local_bubble = match info.metadata.get("localBubbleInstances") {
|
||||||
None => return tracing::info!("instance {} doesn't provide local bubble data", domain),
|
None => return tracing::info!("instance {} doesn't provide local bubble data", domain),
|
||||||
Some(b) => match b.as_array() {
|
Some(b) => match b.as_array() {
|
||||||
None => return tracing::warn!("instance {} local bubble is not an array", domain),
|
None => return tracing::warn!("instance {} local bubble is not an array", domain),
|
||||||
|
@ -97,6 +156,6 @@ async fn scan_instance(domain: &str, map: MapHandle) {
|
||||||
for bubble_instance in local_bubble.iter().filter_map(|x| x.as_str().map(|x| x.to_string())) {
|
for bubble_instance in local_bubble.iter().filter_map(|x| x.as_str().map(|x| x.to_string())) {
|
||||||
let _map = map.clone();
|
let _map = map.clone();
|
||||||
map.add_vertex(domain.to_string(), bubble_instance.clone());
|
map.add_vertex(domain.to_string(), bubble_instance.clone());
|
||||||
tasks.push(tokio::spawn(async move { scan_instance(&bubble_instance, _map).await; }));
|
tasks.push(tokio::spawn(async move { bubble_crawl_instance(&bubble_instance, _map).await; }));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
68
src/set.rs
Normal file
68
src/set.rs
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
use std::{collections::{HashSet, HashMap}, sync::Arc};
|
||||||
|
|
||||||
|
use serde::Serialize;
|
||||||
|
use tokio::sync::{mpsc, RwLock};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SetCollector {
|
||||||
|
known_network: HashSet<String>,
|
||||||
|
instance_rx: mpsc::UnboundedReceiver<InstanceRelation>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_set_collector() -> (SetCollector, SetHandle) {
|
||||||
|
let (instance_tx, instance_rx) = mpsc::unbounded_channel();
|
||||||
|
let known_network = HashSet::new();
|
||||||
|
let scanned = Arc::new(RwLock::new(HashSet::new()));
|
||||||
|
(
|
||||||
|
SetCollector { known_network, instance_rx },
|
||||||
|
SetHandle { scanned, instance_tx },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SetCollector {
|
||||||
|
pub async fn collect(mut self) -> SetResult {
|
||||||
|
let mut in_instances : Vec<InstanceRelation> = Vec::new();
|
||||||
|
let mut out_instances = Vec::new();
|
||||||
|
while let Some(instance) = self.instance_rx.recv().await {
|
||||||
|
self.known_network.insert(instance.domain.clone());
|
||||||
|
// in_instances.push(instance);
|
||||||
|
}
|
||||||
|
for instance in in_instances {
|
||||||
|
out_instances.push(InstanceRelation {
|
||||||
|
domain: instance.domain,
|
||||||
|
relations: self.known_network.difference(&instance.relations).map(|x| x.clone()).collect(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
SetResult { whole: self.known_network, instances: out_instances }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SetHandle {
|
||||||
|
scanned: Arc<RwLock<HashSet<String>>>,
|
||||||
|
instance_tx: mpsc::UnboundedSender<InstanceRelation>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SetHandle {
|
||||||
|
pub async fn already_scanned(&self, domain: &str) -> bool {
|
||||||
|
if self.scanned.read().await.contains(domain) { return true; }
|
||||||
|
self.scanned.write().await.insert(domain.to_string());
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_instance(&self, instance: InstanceRelation) {
|
||||||
|
self.instance_tx.send(instance).expect("could not send instance to collector")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
pub struct InstanceRelation {
|
||||||
|
pub domain: String,
|
||||||
|
pub relations: HashSet<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
pub struct SetResult {
|
||||||
|
pub whole: HashSet<String>,
|
||||||
|
pub instances: Vec<InstanceRelation>,
|
||||||
|
}
|
Loading…
Reference in a new issue