You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
3.1 KiB
93 lines
3.1 KiB
|
|
use hyper::Body;
|
|
use hyper::Request;
|
|
use hyper::Client;
|
|
use hyper::Uri;
|
|
|
|
use chrono::naive::NaiveDate;
|
|
use chrono::Datelike;
|
|
|
|
use hyper_tls::HttpsConnector;
|
|
|
|
use scraper::Html;
|
|
use scraper::Selector;
|
|
use scraper::ElementRef;
|
|
use scraper::Node;
|
|
|
|
use ego_tree::NodeRef;
|
|
|
|
use tokio::runtime::Runtime;
|
|
|
|
#[derive(Debug)]
|
|
struct Question {
|
|
question: String,
|
|
answer: String,
|
|
}
|
|
|
|
struct Anki {
|
|
uri: Uri,
|
|
deck_id: String,
|
|
}
|
|
|
|
fn main() {
|
|
//ensure datasets are loaded
|
|
// download pages
|
|
// parse questions
|
|
//ensure questions are in anki
|
|
println!("Hello, world!");
|
|
// TODO: cache downloaded pages
|
|
// TODO: have an overall cacheing strategy
|
|
let rt = Runtime::new().unwrap();
|
|
// the window of available data is like today - 15 days or something
|
|
let for_date = NaiveDate::from_ymd_opt(2023, 7, 27).unwrap(); // TODO: replace with today
|
|
let questions = rt.block_on(scrape_questions(for_date));
|
|
println!("Questions: {:?}", questions);
|
|
rt.block_on(upload_to_anki(questions));
|
|
}
|
|
|
|
async fn scrape_questions(date: NaiveDate) -> Option<Vec<Question>> {
|
|
let uri = Uri::builder()
|
|
.scheme("https")
|
|
.authority("mainichikanji.com")
|
|
.path_and_query(format!("/{}gatu{}.html", date.month(), date.day()))
|
|
.build()
|
|
.unwrap();
|
|
let client = Client::builder().build::<_, hyper::Body>(HttpsConnector::new());
|
|
let resp = client.get(uri).await.unwrap();
|
|
println!("Status: {:?}", resp.status());
|
|
let page_contents = hyper::body::to_bytes(resp).await.unwrap();
|
|
// We have page contents, now parse out the bits we care about
|
|
let document = Html::parse_document(std::str::from_utf8(&page_contents).unwrap());
|
|
let question_selector = Selector::parse(".question").unwrap();
|
|
let results = document.select(&question_selector)
|
|
.map(parse_question)
|
|
.map(Option::unwrap)
|
|
.collect::<Vec<_>>();
|
|
Some(results)
|
|
}
|
|
|
|
fn parse_question(element: ElementRef) -> Option<Question> {
|
|
//println!("Element: {}", element.text());
|
|
let question_end_selector = Selector::parse("div.answer-box").unwrap();
|
|
let answer_selector = Selector::parse(".answer").unwrap();
|
|
let question_end = element.select(&question_end_selector).next();
|
|
let question = element.children()
|
|
.take_while(|x| ElementRef::wrap(*x) != question_end)
|
|
.map(|node| match node.value() {
|
|
Node::Text(text) => String::from(text.text.trim()),
|
|
Node::Element(_element) => format!("<em>{}</em>", node.first_child().unwrap().value().as_text().unwrap().text.trim()),
|
|
_ => panic!("fresh peach heart shower"),
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join(""); //TODO: remove number prefix here
|
|
let answer = element.select(&answer_selector).next().unwrap().inner_html();
|
|
Some(Question{question, answer})
|
|
}
|
|
|
|
//async fn upload_to_anki(client: &Client<hyper::Body>, questions: &dyn Iterator<Item = Question>) {
|
|
// let request = Request::builder()
|
|
// .uri(Uri::from_static("http://localhost:8765"))
|
|
// .body(Body::from(""))
|
|
// .build();
|
|
// let resp = client.request(request).await.unwrap();
|
|
//}
|