import os
import re
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, GRU, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Layer, MultiHeadAttention, LayerNormalization
max_worker = os.cpu_count()
vector_size = 1000
window_size = 500
min_count = 1
context_length = 4096
sentence_length = 5
embed_dim = 128
num_heads = 80
feed_forward_dim = 512
dropout_rate_transformer = 0.1
epsilon = 1e-6
lstm_units = [16000, 16000, 16000, 16000, 8000, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]
gru_units = [2048, 2048, 2048, 2048, 1024, 1024, 512, 512, 512, 512, 512, 512, 512, 512, 256, 256, 256, 256, 128, 64, 32, 16, 8, 4, 2, 1]
dropout_rate_rnn = 0.2
return_sequences = True
input_dim = 10000
output_dim = 1000
input_length = context_length
dense_units = input_dim
activation = "softmax"
loss = "sparse_categorical_crossentropy"
optimizer = "adam"
metrics = ["accuracy"]
epochs = 60
batch_size = 64
class TransformerEncoder(Layer):
def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
super(TransformerEncoder, self).__init__()
self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.dropout1 = Dropout(dropout_rate)
self.norm1 = LayerNormalization(epsilon=epsilon)
self.dense1 = Dense(feed_forward_dim, activation="relu")
self.dense2 = Dense(embed_dim)
self.dropout2 = Dropout(dropout_rate)
self.norm2 = LayerNormalization(epsilon=epsilon)
def call(self, inputs, training=None):
attention_output = self.attention(inputs, inputs)
attention_output = self.dropout1(attention_output, training=training)
out1 = self.norm1(inputs + attention_output)
dense_output = self.dense1(out1)
dense_output = self.dense2(dense_output)
dense_output = self.dropout2(dense_output, training=training)
return self.norm2(out1 + dense_output)
def train_model(X, y, tokenizer):
nn_model = Sequential([
Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_length),
*[LSTM(units, return_sequences=True, dropout=dropout_rate_rnn) for units in lstm_units[:-1]],
LSTM(lstm_units[-1], return_sequences=True, dropout=dropout_rate_rnn),
*[GRU(units, return_sequences=True, dropout=dropout_rate_rnn) for units in gru_units[:-1]],
GRU(gru_units[-1], return_sequences=False, dropout=dropout_rate_rnn),
Dense(dense_units, activation=activation)
nn_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
print("Model eğitiliyor...")
nn_model.fit(X, y, epochs=epochs, batch_size=batch_size)
return nn_model
def generate_sentence(model, tokenizer, start_word, sentence_length, temperature=1.0):
sentence = [start_word]
for _ in range(sentence_length - 1):
sequence = tokenizer.texts_to_sequences([' '.join(sentence)])
sequence = pad_sequences(sequence, maxlen=context_length, padding='post')
predicted_probs = model.predict(sequence)[0]
predicted_probs = np.asarray(predicted_probs).astype('float64')
predicted_probs = np.log(predicted_probs + 1e-10) / temperature
predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
next_word = tokenizer.index_word.get(predicted_index, '')
if not next_word:
return ' '.join(sentence)
file_path = input("Veri setinin dosya yolunu giriniz: ")
with open(file_path, "r", encoding="utf-8") as f:
dataset = f.readlines()
except FileNotFoundError:
print("Dosya bulunamadı!")
tokenized_sentences = [re.findall(r'\b\w+\b', sentence.lower()) for sentence in dataset]
word2vec_model = Word2Vec(tokenized_sentences, vector_size=vector_size, window=window_size, min_count=min_count, workers=max_worker)
tokenizer = Tokenizer()
tokenizer.fit_on_texts([' '.join(sentence) for sentence in tokenized_sentences])
sequences = tokenizer.texts_to_sequences([' '.join(sentence) for sentence in tokenized_sentences])
X = pad_sequences(sequences, maxlen=context_length, padding='post')
y = np.array([seq[-1] if len(seq) > 0 else 0 for seq in sequences])
model = train_model(X, y, tokenizer)
start_word = input("Başlangıç kelimesi giriniz: ")
print("\nÜretilen Cümle:", generate_sentence(model, tokenizer, start_word, sentence_length, temperature=1.0))
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, Dataset
data = [
("Bu harika bir gün", 1),
("Bugün çok mutluyum", 1),
("Kötü bir deneyim yaşadım", 0),
("Berbat bir gündü", 0),
("Güzel bir film izledim", 1),
("Bugün hiç iyi hissetmiyorum", 0)
word_to_index = {"<PAD>": 0, "<UNK>": 1}
for sentence, _ in data:
for word in sentence.split():
if word not in word_to_index:
word_to_index[word] = len(word_to_index)
VOCAB_SIZE = len(word_to_index)
def tokenize(sentence):
return [word_to_index.get(word, word_to_index["<UNK>"]) for word in sentence.split()]
class TextDataset(Dataset):
def __init__(self, data):
self.data = [(torch.tensor(tokenize(sentence), dtype=torch.long), label) for sentence, label in data]
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx][0], torch.tensor(self.data[idx][1], dtype=torch.float)
dataset = TextDataset(data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: (torch.nn.utils.rnn.pad_sequence([s[0] for s in x], batch_first=True), torch.tensor([s[1] for s in x])))
class DusunceMotoru(nn.Module):
def __init__(self, vocab_size, embed_dim, num_heads, num_layers, hidden_dim, output_dim):
super(DusunceMotoru, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.transformer = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads),
self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x = self.embedding(x)
x = self.transformer(x)
x, _ = self.lstm(x)
x, _ = self.gru(x)
x = self.fc(x[:, -1, :])
return torch.sigmoid(x)
model = DusunceMotoru(VOCAB_SIZE, embed_dim=128, num_heads=8, num_layers=2, hidden_dim=256, output_dim=1)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCELoss()
EPOCHS = 100
for epoch in range(EPOCHS):
total_loss = 0
for inputs, labels in dataloader:
outputs = model(inputs).squeeze()
loss = loss_fn(outputs, labels)
total_loss += loss.item()
print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss:.4f}")
def tahmin_yap(sentence):
tokens = tokenize(sentence)
input_tensor = torch.tensor([tokens], dtype=torch.long)
with torch.no_grad():
output = model(input_tensor)
return "Pozitif" if output.item() > 0.5 else "Negatif"
test_sentence = start_word
düşünme_sonucu = f"Cümle: {test_sentence} -> Tahmin: {tahmin_yap(test_sentence)}"
print("\nÜretilen Cümle:", generate_sentence(model, tokenizer, düşünme_sonucu, sentence_length, temperature=1.0))