python 翻译模块 翻译API使用(百度、有道、谷歌) 您所在的位置:网站首页 网页翻译接口 python 翻译模块 翻译API使用(百度、有道、谷歌)

python 翻译模块 翻译API使用(百度、有道、谷歌)

2023-10-04 12:24| 来源: 网络整理| 查看: 265

1、翻译模块、api使用分析

1、translate库:使用简单,但是有次数限制,翻译的准确性中等; 2、百度api(推荐使用): 代码简单,有模块,但是需要注册,获取key值,翻译的准确性中下; 3、chrome翻译api:代码复杂,次数限制, 但是翻译的准确性较高; 4、有道翻译:代码复杂,次数限制,准确性中等;

2、使用方式: translate库: from translate import Translator #在任何两种语言之间,中文翻译成英文 translator=Translator(from_lang="chinese",to_lang="english") translation = translator.translate("床前明月光,疑是地上霜;举头望明月,低头思故乡") print(translation)

2、 百度api: 需要先去百度翻译进行注册,获取appid、secretkey值,记得填写可访问的服务器ip

# 百度通用翻译API,不包含词典、tts语音合成等资源,如有相关需求请联系[email protected] # coding=utf-8 import http.client import hashlib import urllib import random import json def trans_lang(q): trans_result = q # 百度appid和密钥需要通过注册百度【翻译开放平台】账号后获得 appid = 'xxx' # 填写你的appid secretKey = 'xxx' # 填写你的密钥 httpClient = None myurl = '/api/trans/vip/translate' # 通用翻译API HTTP地址 fromLang = 'en' # 原文语种 toLang = 'zh' # 译文语种 salt = random.randint(32768, 65536) # 手动录入翻译内容,q存放 sign = appid + q + str(salt) + secretKey sign = hashlib.md5(sign.encode()).hexdigest() myurl = myurl + '?appid=' + appid + '&q=' + urllib.parse.quote(q) + '&from=' + fromLang + \ '&to=' + toLang + '&salt=' + str(salt) + '&sign=' + sign # 建立会话,返回结果 try: httpClient = http.client.HTTPConnection('api.fanyi.baidu.com') httpClient.request('GET', myurl) # response是HTTPResponse对象 response = httpClient.getresponse() result_all = response.read().decode("utf-8") result = json.loads(result_all) trans_result = result['trans_result'][0]['dst'] except Exception as e: print(e) finally: if httpClient: httpClient.close() return trans_result if __name__ == '__main__': a = 'Customer Not Available & Mobile not reachable Customer Not Available & Mobile not reachable by SR: ANIL KUMAR (170435) (117510), MobileNo: 9996366909' print(trans_lang(a))

3、chrome翻译api:

import requests import re import json import time class GoogleTranslator (): _host = 'translate.google.cn' _headers = { 'Host': _host, 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Mobile Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding': 'gzip, deflate, br', 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', 'Referer': 'https://' + _host, 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0' } _language = { 'afrikaans': 'af', 'arabic': 'ar', 'belarusian': 'be', 'bulgarian': 'bg', 'catalan': 'ca', 'czech': 'cs', 'welsh': 'cy', 'danish': 'da', 'german': 'de', 'greek': 'el', 'english': 'en', 'esperanto': 'eo', 'spanish': 'es', 'estonian': 'et', 'persian': 'fa', 'finnish': 'fi', 'french': 'fr', 'irish': 'ga', 'galician': 'gl', 'hindi': 'hi', 'croatian': 'hr', 'hungarian': 'hu', 'indonesian': 'id', 'icelandic': 'is', 'italian': 'it', 'hebrew': 'iw', 'japanese': 'ja', 'korean': 'ko', 'latin': 'la', 'lithuanian': 'lt', 'latvian': 'lv', 'macedonian': 'mk', 'malay': 'ms', 'maltese': 'mt', 'dutch': 'nl', 'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt', 'romanian': 'ro', 'russian': 'ru', 'slovak': 'sk', 'slovenian': 'sl', 'albanian': 'sq', 'serbian': 'sr', 'swedish': 'sv', 'swahili': 'sw', 'thai': 'th', 'filipino': 'tl', 'turkish': 'tr', 'ukrainian': 'uk', 'vietnamese': 'vi', 'yiddish': 'yi', 'chinese_simplified': 'zh-CN', 'chinese_traditional': 'zh-TW', 'auto': 'auto' } _url = 'https://' + _host + '/translate_a/single' _params = { 'client': 'webapp', 'sl': 'en', 'tl': 'zh-CN', 'hl': 'zh-CN', 'dt': 'at', 'dt': 'bd', 'dt': 'ex', 'dt': 'ld', 'dt': 'md', 'dt': 'qca', 'dt': 'rw', 'dt': 'rm', 'dt': 'ss', 'dt': 't', 'otf': '1', 'ssel': '0', 'tsel': '0', 'kc': '1' } __cookies = None __googleTokenKey = '376032.257956' __googleTokenKeyUpdataTime = 600.0 __googleTokenKeyRetireTime = time.time() + 600.0 def __init__(self, src = 'en', dest = 'zh-CN', tkkUpdataTime = 600.0): if src not in self._language and src not in self._language.values(): src = 'auto' if dest not in self._language and dest not in self._language.values(): dest = 'auto' self._params['sl'] = src self._params['tl'] = dest self.googleTokenKeyUpdataTime = tkkUpdataTime self.__updateGoogleTokenKey() def __updateGoogleTokenKey(self): self.__googleTokenKey = self.__getGoogleTokenKey() self.__googleTokenKeyRetireTime = time.time() + self.__googleTokenKeyUpdataTime def __getGoogleTokenKey(self): """Get the Google TKK from https://translate.google.cn""" # TKK example: '435075.3634891900' result = '' try: res = requests.get('https://' + self._host, timeout = 3) res.raise_for_status() self.__cookies = res.cookies result = re.search(r'tkk\:\'(\d+\.\d+)?\'', res.text).group(1) except requests.exceptions.ReadTimeout as ex: print('ERROR: ' + str(ex)) time.sleep(1) return result def __getGoogleToken(self, a, TKK): """Calculate Google tk from TKK """ # https://www.cnblogs.com/chicsky/p/7443830.html # if text = 'Tablet Developer' and TKK = '435102.3120524463', then tk = '315066.159012' def RL(a, b): for d in range(0, len(b)-2, 3): c = b[d + 2] c = ord(c[0]) - 87 if 'a' > c if '+' == b[d + 1] else a > 6) | 192) else: if (55296 == (c & 64512)) and (f + 1 > 12) & 63 | 128) else: g.append((c >> 12) | 224) g.append((c >> 6) & 63 | 128) g.append((c & 63) | 128) f += 1 e = TKK.split('.') h = int(e[0]) or 0 t = h for item in g: t += item t = RL(t, '+-a^+6') t = RL(t, '+-3^+b+-f') t ^= int(e[1]) or 0 if 0 > t: t = (t & 2147483647) + 2147483648 result = t % 1000000 return str(result) + '.' + str(result ^ h) def translate(self, text): if time.time() > self.__googleTokenKeyRetireTime: self.__updateGoogleTokenKey() data = {'q': text} self._params['tk'] = self.__getGoogleToken(text, self.__googleTokenKey) result = '' try: res = requests.post(self._url, headers = self._headers, cookies = self.__cookies, data = data, params = self._params, timeout = 6) res.raise_for_status() jsonText = res.text if len(jsonText)>0: jsonResult = json.loads(jsonText) if len(jsonResult[0])>0: for item in jsonResult[0]: result += item[0] return result except Exception as ex: print('ERROR: ' + str(ex)) return '' import time from GoogleTranslator import GoogleTranslator def readFile(fileName): with open(fileName, 'r') as f: paragraph = '' for line in f: if line[0]!='\n': paragraph += line.strip('\n') else: if len(paragraph)>0: yield paragraph paragraph = '' if len(paragraph)>0: yield paragraph

4、有道翻译api:

from urllib import request, parse import json from faker import Faker class trans(object): def __init__(self): self.url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule' def tran(self, text): index = text.find("http") text = text[:index] text = text.replace('\n', '').replace('#', '').replace('RT ', '').replace(':', '') ua = Faker().user_agent() headers = { 'User-Agent': ua, 'Host': 'fanyi.youdao.com', 'Origin': 'http://fanyi.youdao.com', 'Referer': 'http://fanyi.youdao.com/', } # 表单数据 from_data = { 'i': text, 'from': 'UTO', 'to': 'UTO', 'smartresult': 'dict', 'client': 'fanyideskweb', 'doctype': 'json', 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_REALTlME' } from_data = parse.urlencode(from_data).encode('utf-8') req = request.Request(self.url, from_data, headers) res = request.urlopen(req).read().decode("utf-8") target = json.loads(res) try: result = target['translateResult'][0][0]['tgt'] except: result = "Translate failed" return result if __name__ == '__main__': Obj = trans() for i in range(1000): print(i) text = "Egypt disinfecting streets in Cairo to combat the spread of Coronavirus.\n#COVID19Africa #CoronavirusPandemic\n https://t…" res = Obj.tran(text) print(res)


【本文地址】

公司简介

联系我们

今日新闻

    推荐新闻

    专题文章
      CopyRight 2018-2019 实验室设备网 版权所有