Spaces:

ierhon
/

neural-chatbot-constructor

Sleeping

ierhon commited on Aug 25, 2023

Commit

76b74a3

1 Parent(s): ba1fa51

Improving generalization on small datasets

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
 emb_size = 128
 inp_len = 16
 def train(data: str, message: str):
     if "→" not in data or "\n" not in data:
@@ -34,11 +35,12 @@ def train(data: str, message: str):
     y = []
     for key in dset:
-        tokens = tokenizer.texts_to_sequences([key,])[0]
-        X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len]))
-        output_array = np.zeros(resps_len)
-        output_array[dset[key]] = 1
-        y.append(output_array)
     X = np.array(X)
     y = np.array(y)

 emb_size = 128
 inp_len = 16
+maxshift = 4
 def train(data: str, message: str):
     if "→" not in data or "\n" not in data:
     y = []
     for key in dset:
+        for p in range(maxshift):
+            tokens = tokenizer.texts_to_sequences([key,])[0]
+            X.append(np.array(([0,]*p+list(tokens)+[0,]*inp_len)[:inp_len]))
+            output_array = np.zeros(resps_len)
+            output_array[dset[key]] = 1
+            y.append(output_array)
     X = np.array(X)
     y = np.array(y)