Bingsu's picture
Update README.md
41d7b14 verified
metadata
license: apache-2.0
language:
  - multilingual
  - ar
  - bg
  - ca
  - cs
  - da
  - de
  - el
  - en
  - es
  - et
  - fa
  - fi
  - fr
  - gl
  - gu
  - he
  - hi
  - hr
  - hu
  - hy
  - id
  - it
  - ja
  - ka
  - ko
  - ku
  - lt
  - lv
  - mk
  - mn
  - mr
  - ms
  - my
  - nb
  - nl
  - pl
  - pt
  - ro
  - ru
  - sk
  - sl
  - sq
  - sr
  - sv
  - th
  - tr
  - uk
  - ur
  - vi

paraphrase-multilingual-MiniLM-L12-v2.gguf

import torch
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

model = SentenceTransformer(
    "paraphrase-multilingual-MiniLM-L12-v2",
    model_kwargs={"torch_dtype": torch.float16}
)
llm = Llama.from_pretrained(
    "mykor/paraphrase-multilingual-MiniLM-L12-v2.gguf",
    filename="paraphrase-multilingual-MiniLM-L12-118M-v2-F16.gguf",
    embedding=True,
    verbose=False,
)

text = "์›€์ธ ๋Ÿฌ๋“  ์–ด๊นจ๋ฅผ ๋”ฐ๋ผ์„œ ๋‹ค์‹œ ์ €๋ฌผ์–ด๊ฐ€๋Š” ์˜ค๋Š˜์˜ ๋ ๋ฐค์ด ์กฐ์šฉํžˆ ๋‚˜๋ฅผ ์•ˆ์œผ๋ฉด ๋ฌด๋„ˆ์ ธ๊ฐ€๋Š” ๋‚  ์žŠ์–ด๋ฒ„๋ฆด ์ˆ˜ ์žˆ์–ด"
embed1 = model.encode(text)
embed2 = llm.embed(text)
print(cosine(embed1, embed2))
0.005355771841081269