use reqwest;
use std::convert::TryInto;
use reqwest::Error;
use tokio;
extern crate regex;
use regex::{Regex, Captures};
const VALID_CAR: [char; 16] = ['0', '1', '2', '3',
'4', '5', '6', '7',
'8', '9', 'A', 'B',
'C', 'D', 'E', 'F'];
#[tokio::main]
async fn main() -> Result<(), Error> {
let mut body = reqwest::get("https://www.google.com/search?q=ヒカキン&num=30")
.await?
.text_with_charset("utf-8")
.await?;
let re = Regex::new(r"%[0-9A-Z]{2,2}%[0-9A-Z]{2,2}%[0-9A-Z]{2,2}").unwrap();
let result = re.replace_all(&body,
|caps: &Captures| {
let mut s: Vec<&str> = (&caps[0]).split('%').collect();
let mut s = String::from_utf8([
hex_str_to_bin(&s[1]),
hex_str_to_bin(&s[2]),
hex_str_to_bin(&s[3]),
].to_vec());
let res = match s {
Ok(o) => o,
Err(_) => String::from("x_x"),
};
res
});
println!("{}", result);
Ok(())
}
fn hex_to_bin(c: &char) -> u8 {
let b = match *c {
'0' => 0b0000,
'1' => 0b0001,
'2' => 0b0010,
'3' => 0b0011,
'4' => 0b0100,
'5' => 0b0101,
'6' => 0b0110,
'7' => 0b0111,
'8' => 0b1000,
'9' => 0b1001,
'A' => 0b1010,
'B' => 0b1011,
'C' => 0b1100,
'D' => 0b1101,
'E' => 0b1110,
'F' => 0b1111,
_ => 0b1111
};
b
}
fn hex_str_to_bin(s: &str) -> u8{
let mut result = 0;
for (i, c) in s.chars().enumerate() {
let keta = s.len() - i;
result += hex_to_bin(&c) * pow(16, ((keta - 1)).try_into().unwrap());
}
result
}
fn pow(a: u8, b: u8) -> u8{
if b == 0 {
1
} else {
a * pow(a, b-1)
}
}
rustでスクレイピングしようとしたが文字コードで詰まっている。
途中経過だけでも載せておくなり。