好记忆不如烂笔头,不如趁热记录下,给未来的自己
前语
大模型年代的到来,模模糊糊感觉到:大模型的呈现,提高了AI算法研讨的门槛,却降低了工程开发运用AI的难度。传统工程技术开发者的春天来了。
大模型社区,百家争鸣,争相变着法地降低大模型的运用难度,恨不得只需一句代码,就能跑一个大模型的推理服务。 最为代表性的 AI 社区是 huggingface (应该没有之一?),俨然已经成为全球开源大模型的“集散地”:
上面有很多好玩、有用、“国外一开源,国内遥遥领先” 的大模型,都可以拿回来做一些有意思的工作。
今天,我们运用 HF 上由 facebook 开源的翻译大模型,来搭建和部署一个简单的翻译APP。(根据 openxlab-apps 和 gradio)
完结
模型获取
方法1 – 直接从 HF 上 git clone
git clone https://huggingface.co/facebook/m2m100_1.2B
注意:
- 需要先装置git lfs, 装置参考点我。
- 国内访问 HF 和下载模型比较慢/不稳定
方法2 – 从 OpenXLab-Models 下载
OpenXLab-Models 是国内的大模型HUB渠道,用户可以自己上传/下载模型,因为部署在国内,所以下载会很稳定,速度也很快。
下载方法有两种,一种直接通过url在浏览器或者wget等方法下载;另一种方法运用 openxlab 提供的 python sdk (装置 pip install openxlab
) 在代码里在线下载模型(这将会是本文将要运用的方法)
模型推理
直接运用 HF 的 transformers 包,即可完结模型装载和推理:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
chinese_text = "日子就像一盒巧克力。"
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
# translate Hindi to French
tokenizer.src_lang = "hi"
encoded_hi = tokenizer(hi_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
# => "La vie est comme une bote de chocolat."
# translate Chinese to English
tokenizer.src_lang = "zh"
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
# => "Life is like a box of chocolate."
封装到 gradio APP
源码已 开源 。
import torch
import gradio as gr
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
from openxlab.model import download
import os
class Language:
def __init__(self, name, code):
self.name = name
self.code = code
lang_id = [
Language("Afrikaans", "af"),
Language("Shqip", "sq"), # Albanian
Language("አማርኛ", "am"), # Amharic
Language("", "ar"), # Arabic
# 列表太长,缩略... 源码: https://github.com/fly2tomato/Translate-100_link
]
d_lang = lang_id[21]
#d_lang_code = d_lang.code
def trans_page(input,trg):
src_lang = d_lang.code
for lang in lang_id:
if lang.name == trg:
trg_lang = lang.code
if trg_lang != src_lang:
tokenizer.src_lang = src_lang
with torch.no_grad():
encoded_input = tokenizer(input, return_tensors="pt").to(device)
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(trg_lang))
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
else:
translated_text=input
pass
"""
if trg_lang != src_lang:
tokenizer.src_lang = src_lang
with torch.no_grad():
#lang_tr = lang_id
encoded_input = tokenizer(lang_id, return_tensors="pt").to(device)
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(trg_lang))
translated_text1 = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
else:
translated_text1=input1
pass
#return translated_text,gr.Dropdown.update(choices=list(translated_text1.keys()))
"""
return translated_text
def trans_to(input,src,trg):
print(f"input={input}, src={src}, target={trg}")
if not input:
return ""
for lang in lang_id:
if lang.name == trg:
trg_lang = lang.code
for lang in lang_id:
if lang.name == src:
src_lang = lang.code
if trg_lang != src_lang:
tokenizer.src_lang = src_lang
with torch.no_grad():
encoded_input = tokenizer(input, return_tensors="pt").to(device)
# print(f"encoded_input = {encoded_input}")
generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.get_lang_id(trg_lang), max_length=1024000)
# print(f"generated_tokens = {generated_tokens}")
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# print(f"translated_text = {translated_text}")
else:
translated_text=input
pass
return translated_text
def download_models(models_path: str):
# download models from openxlab-models by openxlab sdk
print("start to download models from xlab-models")
download(model_repo='xj/facebook_m2m100_1.2billion', output=models_path)
print("end to download models from xlab-models")
# 支持 GPU 和 CPU 双算力推理
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
# 下载模型(假如存在,则越过)
pwd = os.getcwd()
models_path = pwd+"/model/12b"
download_models(models_path)
# 推理Pipeline
tokenizer = M2M100Tokenizer.from_pretrained(models_path)
model = M2M100ForConditionalGeneration.from_pretrained(models_path).to(device)
model.eval()
with gr.Blocks(title="百语翻译-运用中心-OpenXLab", theme="soft") as transbot:
md = gr.HTML("""<h1><center>百语翻译</center></h1><h4><center>根据Facebook开源模型: <a href="https://openxlab.org.cn/models/detail/xj/facebook_100-Translate_1.2billion">m2m100_1.2B</a></center></h4>""")
with gr.Row():
with gr.Column():
lang_from = gr.Dropdown(show_label=False, choices=[l.name for l in lang_id],value="English")
message = gr.Textbox(label="原文", placeholder="请输入原文", lines=4)
with gr.Column():
lang_to = gr.Dropdown(show_label=False, choices=[l.name for l in lang_id],value="中文")
translated = gr.Textbox(label="翻译", lines=4, interactive=False)
with gr.Column():
submit = gr.Button(value="翻译", variant="primary")
submit.click(trans_to, inputs=[message,lang_from,lang_to], outputs=[translated])
def launch_app():
transbot.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
launch_app()
大概就长这样:
以上。