1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
use log::debug;
use log_derive::logfn;
use serde::{Deserialize, Serialize};
use std::cmp::min;
#[derive(Debug, Deserialize, Serialize)]
struct Crate {
#[serde(rename = "id")]
name: String,
}
#[derive(Debug, Deserialize)]
struct CratesList {
crates: Vec<Crate>,
}
fn get(url: &str) -> reqwest::Result<reqwest::blocking::Response> {
reqwest::blocking::ClientBuilder::new()
.user_agent("Rust Corpus - Top Crates Scrapper")
.build()?
.get(url)
.send()
}
#[logfn(Trace)]
pub fn top_crates_by_download_count(mut count: usize) -> Vec<String> {
const PAGE_SIZE: usize = 100;
let page_count = count / PAGE_SIZE + 2;
let mut sources = Vec::new();
debug!("page count: {}", page_count);
for page in 1..page_count {
debug!("page: {} page_size: {} count: {}", page, PAGE_SIZE, count);
let url = format!(
"https://crates.io/api/v1/crates?page={}&per_page={}&sort=downloads",
page, PAGE_SIZE
);
let resp = get(&url).expect("Could not fetch top crates");
assert!(
resp.status().is_success(),
"Response status: {}",
resp.status()
);
let page_crates: CratesList = serde_json::from_reader(resp).expect("Invalid JSON");
sources.extend(page_crates.crates.into_iter().take(count).map(|c| c.name));
count -= min(PAGE_SIZE, count);
}
sources
}