metadata
license: apache-2.0
language:
- multilingual
- ar
- bg
- ca
- cs
- da
- de
- el
- en
- es
- et
- fa
- fi
- fr
- gl
- gu
- he
- hi
- hr
- hu
- hy
- id
- it
- ja
- ka
- ko
- ku
- lt
- lv
- mk
- mn
- mr
- ms
- my
- nb
- nl
- pl
- pt
- ro
- ru
- sk
- sl
- sq
- sr
- sv
- th
- tr
- uk
- ur
- vi
paraphrase-multilingual-MiniLM-L12-v2.gguf
import torch
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
model = SentenceTransformer(
"paraphrase-multilingual-MiniLM-L12-v2",
model_kwargs={"torch_dtype": torch.float16}
)
llm = Llama.from_pretrained(
"mykor/paraphrase-multilingual-MiniLM-L12-v2.gguf",
filename="paraphrase-multilingual-MiniLM-L12-118M-v2-F16.gguf",
embedding=True,
verbose=False,
)
text = "์์ธ ๋ฌ๋ ์ด๊นจ๋ฅผ ๋ฐ๋ผ์ ๋ค์ ์ ๋ฌผ์ด๊ฐ๋ ์ค๋์ ๋ ๋ฐค์ด ์กฐ์ฉํ ๋๋ฅผ ์์ผ๋ฉด ๋ฌด๋์ ธ๊ฐ๋ ๋ ์์ด๋ฒ๋ฆด ์ ์์ด"
embed1 = model.encode(text)
embed2 = llm.embed(text)
print(cosine(embed1, embed2))
0.005355771841081269