在Linux下查询单词一直不太方便,现在利用python写一个简单爬虫,到有道词典Bing上去查询英文单词。
实现英翻汉。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import requests | |
import sys | |
from bs4 import BeautifulSoup | |
import functools | |
import re | |
from threading import Thread | |
RESET_COLOR = "\033[0m" | |
COLOR_CODES = { | |
"blue" : "\033[1;34m", # blue | |
"green" : "\033[1;32m", # green | |
"yellow" : "\033[1;33m", # yellow | |
"red" : "\033[1;31m", # red | |
"background_red" : "\033[1;41m", # background red | |
} | |
def color_msg(color, *messages): | |
retval = '' | |
for msg in messages: | |
retval += COLOR_CODES[color] + msg + RESET_COLOR | |
return retval | |
def output(text): | |
def decorator(func): | |
@functools.wraps(func) | |
def wrapper(*args, **kwargs): | |
print(text) | |
print("-" * 50) | |
func(*args, **kwargs) | |
print("-" * 50) | |
return wrapper | |
return decorator | |
class Dict: | |
base_url = 'http://cn.bing.com/dict/search?' | |
@classmethod | |
def search(cls, input_word): | |
payload = {'q': input_word} | |
r = requests.get(cls.base_url, params=payload) | |
return r.text | |
@classmethod | |
@output(color_msg('green', "[{0}] has following meaning: ".format(sys.argv[1]))) | |
def parse(cls, input_word, verbose=False): | |
response = cls.search(input_word) | |
soup = BeautifulSoup(response) | |
pron_us = soup.find('div', 'hd_prUS') | |
pron_uk = soup.find('div', 'hd_pr') | |
definitions = soup.find_all('span', 'def') | |
positions = soup.find_all('span', 'pos') | |
print(color_msg('red', pron_us.contents[0])) | |
print(color_msg('red', pron_uk.contents[0])) | |
print('-' * 50) | |
for pos, definition in zip(positions, definitions): | |
print(color_msg('green', pos.contents[0], ':\t', definition.find('span').contents[0])) | |
@classmethod | |
@output(color_msg('green', "[{0}] has following meaning: ".format(sys.argv[1]))) | |
def cn_parse(cls, input_word, verbose=False): | |
response = cls.search(input_word) | |
soup = BeautifulSoup(response) | |
definitions = soup.find_all('span', 'def') | |
for definition in definitions: | |
for def_ in definition.children: | |
tmp = def_.contents[0] | |
if not tmp.startswith(';'): | |
print(tmp) | |
def main(): | |
def show_wrong(): | |
print("Usage: {} word-to-query".format(__file__)) | |
sys.exit() | |
if len(sys.argv) != 2: | |
show_wrong() | |
else: | |
if re.match(r'[\u4e00-\u9fa5]+', sys.argv[1]) is None: | |
t = Thread(target=Dict.cn_parse, args=(sys.argv[1],)) | |
t.start() | |
else: | |
t = Thread(target=Dict.parse, args=(sys.argv[1],)) | |
t.start() | |
if __name__ == '__main__': | |
main() |