You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# import
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from transformers import AutoTokenizer, AutoModel
from silly import no_ssl_verification
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with no_ssl_verification():
# load the document and split it into chunks
loader = TextLoader("paul_graham/paul_graham_essay_tr.txt")
documents = loader.load()
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
# create the Turkish embedding function
# tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
# model = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
embedding_function = SentenceTransformerEmbeddings(model_name="dbmdz/bert-base-turkish-cased")
# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)
# query it
query = "Yazarın üniversiteden önce üzerinde çalıştığı iki ana şey neydi?"
docs = db.similarity_search(query)
# print results
print(docs[0].page_content)
how can i fix my code to do qa retrieval with langchain with using turkish-bert embeddings? please help me.
The text was updated successfully, but these errors were encountered:
i have the following code:
how can i fix my code to do qa retrieval with langchain with using turkish-bert embeddings? please help me.
The text was updated successfully, but these errors were encountered: