Spaces:

ssalb
/

story_generator

Paused

App Files Files Community

ssalb commited on Jan 3

Commit

f75feb2

1 Parent(s): d2e0b39

Update space with latest code and dependencies on Fri Jan 3 18:00:11 UTC 2025

Browse files

Files changed (6) hide show

LICENSE +1 -1
app.py +11 -7
requirements.txt +2 -2
story_beam_search/beam_search.py +44 -21
story_beam_search/scoring.py +172 -93
story_beam_search/stories_generator.py +2 -2

LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2023 Salvador Salazar
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

 MIT License
+Copyright (c) 2025 Salvador Salazar
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import gradio as gr
-from typing import Literal
-from pydantic import BaseModel, Field, constr
 from story_beam_search.stories_generator import StoryGenerationSystem
-from typing import Tuple, List
 genre_choices = [
     "children",
@@ -29,10 +27,10 @@ def create_story_generation_interface() -> gr.Interface:
     # Initialize the story generation system
     system = StoryGenerationSystem()
     system.initialize()
     def generate_stories(
         prompt: str, genre: str, num_stories: int, temperature: float, max_length: int
-    ) -> Tuple[str, List[str]]:
         """
         Generate and evaluate stories based on user input.
         Returns a tuple of (detailed_scores, story_texts).
@@ -96,7 +94,7 @@ def create_story_generation_interface() -> gr.Interface:
     )
     max_length_input = gr.Slider(
-        minimum=30, maximum=200, value=60, step=30, label="Maximum Length"
     )
     # Output components
@@ -122,7 +120,13 @@ def create_story_generation_interface() -> gr.Interface:
         fluency, and genre alignment.
         """,
         examples=[
-            ["Once upon a time in a magical forest,", "fantasy", 3, 1.8, 150],
             [
                 "The detective knelt beside the bloodstained carpet, her gaze sharp as she traced the faint outline of a shoeprint.",
                 "mystery",

 import gradio as gr
+from pydantic import BaseModel, Field
 from story_beam_search.stories_generator import StoryGenerationSystem
 genre_choices = [
     "children",
     # Initialize the story generation system
     system = StoryGenerationSystem()
     system.initialize()
     def generate_stories(
         prompt: str, genre: str, num_stories: int, temperature: float, max_length: int
+    ) -> tuple[str, list[str]]:
         """
         Generate and evaluate stories based on user input.
         Returns a tuple of (detailed_scores, story_texts).
     )
     max_length_input = gr.Slider(
+        minimum=40, maximum=200, value=60, step=20, label="Maximum Length"
     )
     # Output components
         fluency, and genre alignment.
         """,
         examples=[
+            [
+                "Once upon a time in a magical forest, the trees whispered secrets, and moonlight revealed hidden paths to a realm where time stood still.",
+                "fantasy",
+                3,
+                1.8,
+                150,
+            ],
             [
                 "The detective knelt beside the bloodstained carpet, her gaze sharp as she traced the faint outline of a shoeprint.",
                 "mystery",

requirements.txt CHANGED Viewed

@@ -47,7 +47,7 @@ ruff==0.8.5 ; sys_platform != "emscripten" and python_full_version == "3.10.13"
 safehttpx==0.1.6 ; python_full_version == "3.10.13"
 safetensors==0.5.0 ; python_full_version == "3.10.13"
 scikit-learn==1.6.0 ; python_full_version == "3.10.13"
-scipy==1.14.1 ; python_full_version == "3.10.13"
 semantic-version==2.10.0 ; python_full_version == "3.10.13"
 shellingham==1.5.4 ; sys_platform != "emscripten" and python_full_version == "3.10.13"
 six==1.17.0 ; python_full_version == "3.10.13"
@@ -59,7 +59,7 @@ tokenizers==0.21.0 ; python_full_version == "3.10.13"
 tomlkit==0.13.2 ; python_full_version == "3.10.13"
 torch==2.4.0 ; python_full_version == "3.10.13"
 tqdm==4.67.1 ; python_full_version == "3.10.13"
-transformers @ git+https://github.com/huggingface/transformers.git@42865860ec6dc135972d9555753cb7ee17f51fb4 ; python_full_version == "3.10.13"
 triton==3.0.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_full_version == "3.10.13"
 typer==0.15.1 ; sys_platform != "emscripten" and python_full_version == "3.10.13"
 typing-extensions==4.12.2 ; python_full_version == "3.10.13"

 safehttpx==0.1.6 ; python_full_version == "3.10.13"
 safetensors==0.5.0 ; python_full_version == "3.10.13"
 scikit-learn==1.6.0 ; python_full_version == "3.10.13"
+scipy==1.15.0 ; python_full_version == "3.10.13"
 semantic-version==2.10.0 ; python_full_version == "3.10.13"
 shellingham==1.5.4 ; sys_platform != "emscripten" and python_full_version == "3.10.13"
 six==1.17.0 ; python_full_version == "3.10.13"
 tomlkit==0.13.2 ; python_full_version == "3.10.13"
 torch==2.4.0 ; python_full_version == "3.10.13"
 tqdm==4.67.1 ; python_full_version == "3.10.13"
+transformers @ git+https://github.com/huggingface/transformers.git@e5fd865ebae062b7cf03a81b8c6affeb39f30bec ; python_full_version == "3.10.13"
 triton==3.0.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_full_version == "3.10.13"
 typer==0.15.1 ; sys_platform != "emscripten" and python_full_version == "3.10.13"
 typing-extensions==4.12.2 ; python_full_version == "3.10.13"

story_beam_search/beam_search.py CHANGED Viewed

@@ -36,31 +36,27 @@ class BeamSearchGenerator:
         self, prompt: str, genre: str, evaluator: StoryScorer
     ) -> list[str]:
         """
-        Generate story continuations using multiple iterations of beam search.
         """
-        # Adding some instructions to the prompt. These are removed in the end
         instructions = (
             f"Continue the following story in the {genre} genre, "
             "ensuring coherence with the tone, characters, and narrative established so far:\n"
         )
         instructions_len = len(instructions)
-        stories = self._generate_single_iteration(instructions + prompt)
         ranked_stories = evaluator.evaluate_multiple(
             [story[instructions_len:] for story in stories]
         )
         stories = [story for story, _ in ranked_stories[: self.config.num_beams]]
         if stories:
             for _ in range(self.config.num_iterations):
-                all_stories = []
-                for story in stories:
-                    continuations = self._generate_single_iteration(
-                        instructions + story
-                    )
-                    all_stories.extend(continuations)
                 ranked_stories = evaluator.evaluate_multiple(
                     [story[instructions_len:] for story in all_stories]
                 )
@@ -70,23 +66,50 @@ class BeamSearchGenerator:
         return stories
-    def _generate_single_iteration(self, prompt: str) -> list[str]:
         """
-        Generate multiple continuations for a single iteration using beam search.
         """
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-        input_ids = inputs["input_ids"]
-        attention_mask = inputs["attention_mask"]
-        self.config.continuation_length = (
-            len(input_ids[0]) + self.config.max_length // self.config.num_iterations
         )
         with torch.no_grad():
             outputs = self.model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                max_length=self.config.continuation_length,
                 num_beams=self.config.num_beams,
                 num_return_sequences=self.config.num_return_sequences,
                 early_stopping=True,

         self, prompt: str, genre: str, evaluator: StoryScorer
     ) -> list[str]:
         """
+        Generate story continuations using parallel beam search iterations.
         """
         instructions = (
             f"Continue the following story in the {genre} genre, "
             "ensuring coherence with the tone, characters, and narrative established so far:\n"
         )
         instructions_len = len(instructions)
+        stories = self._generate_batch([instructions + prompt])
         ranked_stories = evaluator.evaluate_multiple(
             [story[instructions_len:] for story in stories]
         )
         stories = [story for story, _ in ranked_stories[: self.config.num_beams]]
         if stories:
             for _ in range(self.config.num_iterations):
+                # Prepare all prompts for batch processing
+                all_prompts = [instructions + story for story in stories]
+                # Generate all continuations in one batch
+                all_stories = self._generate_batch(all_prompts)
                 ranked_stories = evaluator.evaluate_multiple(
                     [story[instructions_len:] for story in all_stories]
                 )
         return stories
+    def _generate_batch(self, prompts: list[str]) -> list[str]:
         """
+        Generate multiple continuations for multiple prompts in a single batch.
         """
+        # Tokenize all prompts
+        tokenized = [self.tokenizer(prompt, return_tensors="pt") for prompt in prompts]
+        # Pad input_ids and attention_masks to same length
+        max_length = max(inputs["input_ids"].size(1) for inputs in tokenized)
+        padded_input_ids = []
+        padded_attention_masks = []
+        for inputs in tokenized:
+            input_ids = inputs["input_ids"][0]
+            attention_mask = inputs["attention_mask"][0]
+            # Pad to max_length
+            padding_length = max_length - input_ids.size(0)
+            if padding_length > 0:
+                input_ids = torch.cat(
+                    [input_ids, torch.zeros(padding_length, dtype=torch.long)]
+                )
+                attention_mask = torch.cat(
+                    [attention_mask, torch.zeros(padding_length, dtype=torch.long)]
+                )
+            padded_input_ids.append(input_ids)
+            padded_attention_masks.append(attention_mask)
+        # Stack into batches
+        input_ids_batch = torch.stack(padded_input_ids).to(self.device)
+        attention_mask_batch = torch.stack(padded_attention_masks).to(self.device)
+        # Calculate continuation length
+        continuation_length = (
+            max_length + self.config.max_length // self.config.num_iterations
         )
+        # Generate all continuations in one pass
         with torch.no_grad():
             outputs = self.model.generate(
+                input_ids=input_ids_batch,
+                attention_mask=attention_mask_batch,
+                max_length=continuation_length,
                 num_beams=self.config.num_beams,
                 num_return_sequences=self.config.num_return_sequences,
                 early_stopping=True,

story_beam_search/scoring.py CHANGED Viewed

@@ -9,7 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity
 class StoryScorer(Protocol):
     """Protocol defining the interface for story scoring components."""
-    def score(self, story: str) -> float:
         """Return a score between 0 and 1."""
         ...
@@ -28,40 +28,72 @@ class CoherenceScorer(StoryScorer):
         model: PreTrainedModel,
         tokenizer: PreTrainedTokenizer,
         device: torch.device,
-        max_pairs: int = 3,
     ):
         self.model = model
         self.tokenizer = tokenizer
         self.device = device
-        self.max_pairs = max_pairs
-    def score(self, story: str) -> float:
         """Calculate coherence score based on sentences cosine similarity."""
-        sentences = [s.strip() for s in story.split(".") if s.strip()]
-        embeddings = []
-        # Generate embeddings for each sentence
-        for sentence in sentences:
-            inputs = self.tokenizer(sentence, return_tensors="pt").to(self.device)
             with torch.no_grad():
-                outputs = self.model(**inputs)
-                last_hidden_state = outputs.hidden_states[-1]
-                emb = last_hidden_state[:, 0, :]
-                embeddings.append(emb.cpu().numpy())
-        # Calculate cosine similarity between adjacent embeddings
-        coherence_scores = []
-        for i in range(len(embeddings) - 1):
-            sim = cosine_similarity(embeddings[i], embeddings[i + 1])[0][0]
-            coherence_scores.append(sim)
-        # Average coherence score
-        avg_coherence = (
-            sum(coherence_scores) / len(coherence_scores) if coherence_scores else 0.0
-        )
-        return avg_coherence
 class FluencyScorer(StoryScorer):
@@ -70,61 +102,119 @@ class FluencyScorer(StoryScorer):
         model: PreTrainedModel,
         tokenizer: PreTrainedTokenizer,
         device: torch.device,
     ):
         self.model = model
         self.tokenizer = tokenizer
         self.device = device
-    def score(self, story: str) -> float:
-        # Mask each token in the story and calculate the probability of the original token
-        # Fluency is measured by the average probability of each token in the story
-        inputs = self.tokenizer(story, return_tensors="pt").to(self.device)
-        input_ids = inputs.input_ids
         mask_token_id = self.tokenizer.mask_token_id
-        if mask_token_id is None:
-            self.tokenizer.mask_token = "[MASK]"
-            mask_token_id = self.tokenizer.encode(self.tokenizer.mask_token)[0]
-        fluency_scores = []
-        for i in range(1, input_ids.size(1) - 1):
-            masked_input_ids = input_ids.clone()
-            masked_input_ids[0, i] = mask_token_id
-            with torch.no_grad():
-                outputs = self.model(input_ids=masked_input_ids)
-                logits = outputs.logits
-            original_token_id = input_ids[0, i]
-            token_probability = logits[0, i].softmax(dim=-1)[original_token_id].item()
-            fluency_scores.append(token_probability)
-        avg_fluency = (
-            sum(fluency_scores) / len(fluency_scores) if fluency_scores else 0.0
-        )
-        return avg_fluency
 class GenreAlignmentScorer(StoryScorer):
-    def __init__(self, pipeline: Pipeline, genre: str):
         self.pipeline = pipeline
         self.genre = genre
-    def score(self, story: str) -> float:
         if not self.genre:
-            return 0.5
-        # Evaluate by sentence to check whether the genre is maintained throughout
-        sentences = [s.strip() for s in story.split(".") if s.strip()]
-        results = []
-        for sentence in sentences:
-            result = self.pipeline(
-                sentence, candidate_labels=[self.genre], multi_label=True
             )
-            results.append(result["scores"][0])
-        avg_core = sum(results) / len(results) if results else 0.0
-        return avg_core
 class StoryEvaluator:
@@ -140,43 +230,32 @@ class StoryEvaluator:
         self.genre_scorer = genre_scorer
         self.weights = weights
-    def evaluate(self, story: str, max_scores: list[float]) -> CombinedScore:
-        coherence = self.coherence_scorer.score(story)
-        fluency = self.fluency_scorer.score(story)
-        genre_alignment = self.genre_scorer.score(story)
-        max_scores[0] = np.max([max_scores[0], coherence])
-        max_scores[1] = np.max([max_scores[1], fluency])
-        max_scores[2] = np.max([max_scores[2], genre_alignment])
-        return CombinedScore(
-            coherence=coherence,
-            fluency=fluency,
-            genre_alignment=genre_alignment,
-            total=0,
-        )
     def evaluate_multiple(self, stories: list[str]) -> list[tuple[str, CombinedScore]]:
-        """Evaluate multiple stories and return them sorted by total score."""
-        # Scores are normalized by the max scores on every evaluation
-        # This is to ensure that the scores are comparable between each other, as they are originally on different scales
-        # Reset max scores
-        max_scores = [0.0, 0.0, 0.0]
-        scored_stories = [
-            (story, self.evaluate(story, max_scores)) for story in stories
-        ]
-        # Normalize scores
-        for _, scores in scored_stories:
-            scores.coherence, scores.fluency, scores.genre_alignment = np.divide(
-                [scores.coherence, scores.fluency, scores.genre_alignment],
-                max_scores,
             )
             scores.total = np.dot(
                 [scores.coherence, scores.fluency, scores.genre_alignment], self.weights
             )
         return sorted(scored_stories, key=lambda x: x[1].total, reverse=True)

 class StoryScorer(Protocol):
     """Protocol defining the interface for story scoring components."""
+    def score(self, stories: list[str]) -> float:
         """Return a score between 0 and 1."""
         ...
         model: PreTrainedModel,
         tokenizer: PreTrainedTokenizer,
         device: torch.device,
+        batch_size: int = 32,
     ):
         self.model = model
         self.tokenizer = tokenizer
         self.device = device
+        self.batch_size = batch_size
+    def score(self, stories: list[str]) -> list[float]:
         """Calculate coherence score based on sentences cosine similarity."""
+        all_scores = []
+        # Split stories into sentences for coherence scoring
+        sentences_list = [
+            [s.strip() for s in story.split(".") if s.strip()] for story in stories
+        ]
+        # Collect all sentence pairs that need embedding
+        all_sentence_pairs = []
+        story_boundaries = []  # Track where each story's sentences end
+        current_position = 0
+        for sentences in sentences_list:
+            pairs_count = len(sentences) - 1
+            all_sentence_pairs.extend(zip(sentences[:-1], sentences[1:]))
+            story_boundaries.append(current_position + pairs_count)
+            current_position += pairs_count
+        # Process sentence pairs in batches
+        all_embeddings = []
+        for i in range(0, len(all_sentence_pairs), self.batch_size):
+            batch_pairs = all_sentence_pairs[i : i + self.batch_size]
+            # Flatten pairs for batch processing
+            batch_sentences = [sent for pair in batch_pairs for sent in pair]
+            # Tokenize batch
+            inputs = self.tokenizer(
+                batch_sentences, padding=True, truncation=True, return_tensors="pt"
+            ).to(self.device)
             with torch.no_grad():
+                outputs = self.model(**inputs, output_hidden_states=True)
+                batch_embeddings = outputs.hidden_states[-1][
+                    :, 0, :
+                ]  # Get CLS token embeddings
+                all_embeddings.extend(batch_embeddings.cpu().numpy())
+        # Calculate coherence scores for each story
+        current_idx = 0
+        for boundary in story_boundaries:
+            story_pairs_count = boundary - current_idx
+            story_scores = []
+            for i in range(story_pairs_count):
+                idx = current_idx + i
+                first_emb = all_embeddings[idx * 2].reshape(1, -1)
+                second_emb = all_embeddings[idx * 2 + 1].reshape(1, -1)
+                sim = cosine_similarity(first_emb, second_emb)[0][0]
+                story_scores.append(sim)
+            avg_coherence = (
+                sum(story_scores) / len(story_scores) if story_scores else 0.0
+            )
+            all_scores.append(avg_coherence)
+            current_idx = boundary
+        return all_scores
 class FluencyScorer(StoryScorer):
         model: PreTrainedModel,
         tokenizer: PreTrainedTokenizer,
         device: torch.device,
+        batch_size: int = 32,
     ):
         self.model = model
         self.tokenizer = tokenizer
         self.device = device
+        self.batch_size = batch_size
+        # Set up padding token if it doesn't exist
+        if self.tokenizer.pad_token is None:
+            if self.tokenizer.eos_token is not None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            else:
+                # Add padding token to tokenizer only
+                self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
+        # Set up mask token if it doesn't exist
+        if self.tokenizer.mask_token is None:
+            self.tokenizer.add_special_tokens({"mask_token": "[MASK]"})
+    def score(self, stories: list[str]) -> list[float]:
+        all_scores = []
         mask_token_id = self.tokenizer.mask_token_id
+        # Process stories in batches
+        for i in range(0, len(stories), self.batch_size):
+            batch_stories = stories[i : i + self.batch_size]
+            batch_inputs = self.tokenizer(
+                batch_stories, padding=True, truncation=True, return_tensors="pt"
+            ).to(self.device)
+            batch_scores = []
+            # For each story in the batch
+            for j in range(len(batch_stories)):
+                story_scores = []
+                input_ids = batch_inputs.input_ids[j : j + 1]  # Keep batch dimension
+                attention_mask = batch_inputs.attention_mask[
+                    j : j + 1
+                ]  # Get attention mask
+                # Only process tokens that aren't padding
+                valid_tokens = attention_mask[0].sum().item()
+                # For each token in the story (excluding padding)
+                for k in range(1, valid_tokens - 1):
+                    masked_input_ids = input_ids.clone()
+                    masked_input_ids[0, k] = mask_token_id
+                    with torch.no_grad():
+                        outputs = self.model(
+                            input_ids=masked_input_ids, attention_mask=attention_mask
+                        )
+                        logits = outputs.logits
+                    original_token_id = input_ids[0, k]
+                    token_probability = (
+                        logits[0, k].softmax(dim=-1)[original_token_id].item()
+                    )
+                    story_scores.append(token_probability)
+                avg_fluency = (
+                    sum(story_scores) / len(story_scores) if story_scores else 0.0
+                )
+                batch_scores.append(avg_fluency)
+            all_scores.extend(batch_scores)
+        return all_scores
 class GenreAlignmentScorer(StoryScorer):
+    def __init__(self, pipeline: Pipeline, genre: str, batch_size: int = 32):
         self.pipeline = pipeline
         self.genre = genre
+        self.batch_size = batch_size
+    def score(self, stories: list[str]) -> list[float]:
         if not self.genre:
+            return [0.5] * len(stories)
+        all_scores = []
+        # Split all stories into sentences
+        all_sentences = []
+        story_boundaries = []
+        current_position = 0
+        for story in stories:
+            sentences = [s.strip() for s in story.split(".") if s.strip()]
+            all_sentences.extend(sentences)
+            story_boundaries.append(current_position + len(sentences))
+            current_position += len(sentences)
+        # Process sentences in batches
+        all_sentence_scores = []
+        for i in range(0, len(all_sentences), self.batch_size):
+            batch_sentences = all_sentences[i : i + self.batch_size]
+            results = self.pipeline(
+                batch_sentences,
+                candidate_labels=[self.genre],
+                multi_label=True,
+                batch_size=self.batch_size,
             )
+            all_sentence_scores.extend([r["scores"][0] for r in results])
+        # Calculate average score for each story
+        current_idx = 0
+        for boundary in story_boundaries:
+            story_scores = all_sentence_scores[current_idx:boundary]
+            avg_score = sum(story_scores) / len(story_scores) if story_scores else 0.0
+            all_scores.append(avg_score)
+            current_idx = boundary
+        return all_scores
 class StoryEvaluator:
         self.genre_scorer = genre_scorer
         self.weights = weights
     def evaluate_multiple(self, stories: list[str]) -> list[tuple[str, CombinedScore]]:
+        """Evaluate multiple stories in batches and return them sorted by total score."""
+        # Get all scores in parallel using batch processing
+        coherence_scores = self.coherence_scorer.score(stories)
+        fluency_scores = self.fluency_scorer.score(stories)
+        genre_scores = self.genre_scorer.score(stories)
+        # Find max scores for normalization
+        max_scores = [max(coherence_scores), max(fluency_scores), max(genre_scores)]
+        # Create scored stories
+        scored_stories = []
+        for i, story in enumerate(stories):
+            scores = CombinedScore(
+                coherence=(
+                    coherence_scores[i] / max_scores[0] if max_scores[0] != 0 else 0
+                ),
+                fluency=fluency_scores[i] / max_scores[1] if max_scores[1] != 0 else 0,
+                genre_alignment=(
+                    genre_scores[i] / max_scores[2] if max_scores[2] != 0 else 0
+                ),
             )
             scores.total = np.dot(
                 [scores.coherence, scores.fluency, scores.genre_alignment], self.weights
             )
+            scored_stories.append((story, scores))
         return sorted(scored_stories, key=lambda x: x[1].total, reverse=True)

story_beam_search/stories_generator.py CHANGED Viewed

@@ -160,10 +160,10 @@ class StoryGenerationSystem:
         prompt_segments = re.split(r"[^a-zA-Z0-9 ]+", prompt)
         prompt_segments = list(set(prompt_segments))
-        storyness_score = self.storyness.score(prompt)
         for segment in prompt_segments:
             if segment.strip():
-                injection_score = self.injection_guard.score(segment)
                 if storyness_score < 0.2 or injection_score > 0.2:
                     print("Potential prompt injection detected.")
                     print(f"storyness_score: {storyness_score}")

         prompt_segments = re.split(r"[^a-zA-Z0-9 ]+", prompt)
         prompt_segments = list(set(prompt_segments))
+        storyness_score = self.storyness.score([prompt])[0]
         for segment in prompt_segments:
             if segment.strip():
+                injection_score = self.injection_guard.score([segment])[0]
                 if storyness_score < 0.2 or injection_score > 0.2:
                     print("Potential prompt injection detected.")
                     print(f"storyness_score: {storyness_score}")