|
--- |
|
license: apache-2.0 |
|
language: |
|
- multilingual |
|
- ar |
|
- bg |
|
- ca |
|
- cs |
|
- da |
|
- de |
|
- el |
|
- en |
|
- es |
|
- et |
|
- fa |
|
- fi |
|
- fr |
|
- gl |
|
- gu |
|
- he |
|
- hi |
|
- hr |
|
- hu |
|
- hy |
|
- id |
|
- it |
|
- ja |
|
- ka |
|
- ko |
|
- ku |
|
- lt |
|
- lv |
|
- mk |
|
- mn |
|
- mr |
|
- ms |
|
- my |
|
- nb |
|
- nl |
|
- pl |
|
- pt |
|
- ro |
|
- ru |
|
- sk |
|
- sl |
|
- sq |
|
- sr |
|
- sv |
|
- th |
|
- tr |
|
- uk |
|
- ur |
|
- vi |
|
--- |
|
|
|
# paraphrase-multilingual-MiniLM-L12-v2.gguf |
|
|
|
```py |
|
import torch |
|
from llama_cpp import Llama |
|
from sentence_transformers import SentenceTransformer |
|
from scipy.spatial.distance import cosine |
|
|
|
model = SentenceTransformer( |
|
"paraphrase-multilingual-MiniLM-L12-v2", |
|
model_kwargs={"torch_dtype": torch.float16} |
|
) |
|
llm = Llama.from_pretrained( |
|
"mykor/paraphrase-multilingual-MiniLM-L12-v2.gguf", |
|
filename="paraphrase-multilingual-MiniLM-L12-118M-v2-F16.gguf", |
|
embedding=True, |
|
verbose=False, |
|
) |
|
|
|
text = "์์ธ ๋ฌ๋ ์ด๊นจ๋ฅผ ๋ฐ๋ผ์ ๋ค์ ์ ๋ฌผ์ด๊ฐ๋ ์ค๋์ ๋ ๋ฐค์ด ์กฐ์ฉํ ๋๋ฅผ ์์ผ๋ฉด ๋ฌด๋์ ธ๊ฐ๋ ๋ ์์ด๋ฒ๋ฆด ์ ์์ด" |
|
embed1 = model.encode(text) |
|
embed2 = llm.embed(text) |
|
print(cosine(embed1, embed2)) |
|
``` |
|
|
|
```sh |
|
0.005355771841081269 |
|
``` |
|
|