day 17 @rust reqwestでスクレイピング少しできたx)
use reqwest; use std::convert::TryInto; use reqwest::Error; use tokio; extern crate regex; use regex::{Regex, Captures}; const VALID_CAR: [char; 16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']; #[tokio::main] async fn main() -> Result<(), Error> { let mut body = reqwest::get("https://www.google.com/search?q=ヒカキン&num=30") .await? .text_with_charset("utf-8") .await?; let re = Regex::new(r"%[0-9A-Z]{2,2}%[0-9A-Z]{2,2}%[0-9A-Z]{2,2}").unwrap(); let result = re.replace_all(&body, |caps: &Captures| { // println!("catch {}", &caps[0]); let mut s: Vec<&str> = (&caps[0]).split('%').collect(); let mut s = String::from_utf8([ hex_str_to_bin(&s[1]), hex_str_to_bin(&s[2]), hex_str_to_bin(&s[3]), ].to_vec()); let res = match s { Ok(o) => o, Err(_) => String::from("x_x"), }; res }); let re = Regex::new(r"&#([0-9]+);").unwrap(); let result = re.replace_all(&result, |caps: &Captures| { // println!("catch {}", &caps[0]); let num: u32 = (&caps[1]).parse().unwrap(); let res = std::char::from_u32(num).unwrap(); res.to_string() }); println!("{}", result); Ok(()) } fn hex_to_bin(c: &char) -> u8 { let b = match *c { '0' => 0b0000, '1' => 0b0001, '2' => 0b0010, '3' => 0b0011, '4' => 0b0100, '5' => 0b0101, '6' => 0b0110, '7' => 0b0111, '8' => 0b1000, '9' => 0b1001, 'A' => 0b1010, 'B' => 0b1011, 'C' => 0b1100, 'D' => 0b1101, 'E' => 0b1110, 'F' => 0b1111, _ => 0b1111 }; b } fn hex_str_to_bin(s: &str) -> u8{ let mut result = 0; // println!("string {}", s); for (i, c) in s.chars().enumerate() { let keta = s.len() - i; result += hex_to_bin(&c) * pow(16, ((keta - 1)).try_into().unwrap()); } result } fn pow(a: u8, b: u8) -> u8{ if b == 0 { 1 } else { a * pow(a, b-1) } }
原始人のような事をしている。
簡単にデコードできるライブラリはないのか。
そもそもrubyやpythonなどそれ方面で優秀なライブラリがある物を使った方が良いのか...