jbilcke-hf HF staff commited on
Commit
9bcdb59
·
1 Parent(s): fc2842d

work on a new approach: generate small chunks of a story instead of big one

Browse files
src/app/main.tsx CHANGED
@@ -11,6 +11,7 @@ import { getStory } from "./queries/getStory"
11
  import { BottomBar } from "./interface/bottom-bar"
12
  import { Page } from "./interface/page"
13
  import { LLMResponse } from "@/types"
 
14
 
15
  export default function Main() {
16
  const [_isPending, startTransition] = useTransition()
@@ -50,14 +51,7 @@ export default function Main() {
50
  try {
51
  llmResponse = await getStory({
52
  preset,
53
- prompt: [
54
- `${userStoryPrompt}`,
55
-
56
- // not necessary + it confuses the LLM if we use custom
57
- // + the LLM may reject some of the styles
58
- // stylePrompt ? `in the following context: ${stylePrompt}` : ''
59
-
60
- ].map(x => x.trim()).filter(x => x).join(", "),
61
  nbTotalPanels
62
  })
63
  console.log("LLM responded:", llmResponse)
@@ -70,11 +64,11 @@ export default function Main() {
70
  for (let p = 0; p < nbTotalPanels; p++) {
71
  llmResponse.push({
72
  panel: p,
73
- instructions: [
74
  stylePrompt,
75
  userStoryPrompt,
76
  `${".".repeat(p)}`,
77
- ].map(x => x.trim()).filter(x => x).join(", "),
78
  caption: "(Sorry, LLM generation failed: using degraded mode)"
79
  })
80
  }
@@ -89,15 +83,15 @@ export default function Main() {
89
  }
90
 
91
  // new experimental prompt: let's drop the user prompt, and only use the style
92
- const lightPanelPromptPrefix = preset.imagePrompt(limitedStylePrompt).map(x => x.trim()).filter(x => x).join(", ")
93
 
94
  // this prompt will be used if the LLM generation failed
95
- const degradedPanelPromptPrefix = [
96
  ...preset.imagePrompt(limitedStylePrompt),
97
 
98
  // we re-inject the story, then
99
- userStoryPrompt.trim(),
100
- ].filter(x => x).join(", ")
101
 
102
  const newPanels: string[] = []
103
  const newCaptions: string[] = []
@@ -105,7 +99,7 @@ export default function Main() {
105
  console.log("Panel prompts for SDXL:")
106
  for (let p = 0; p < nbTotalPanels; p++) {
107
  newCaptions.push(llmResponse[p]?.caption.trim() || "...")
108
- const newPanel = [
109
 
110
  // what we do here is that ideally we give full control to the LLM for prompting,
111
  // unless there was a catastrophic failure, in that case we preserve the original prompt
@@ -113,8 +107,8 @@ export default function Main() {
113
  ? lightPanelPromptPrefix
114
  : degradedPanelPromptPrefix,
115
 
116
- llmResponse[p]?.instructions || ""
117
- ].map(x => x.trim()).filter(x => x).join(", ")
118
  newPanels.push(newPanel)
119
  console.log(newPanel)
120
  }
 
11
  import { BottomBar } from "./interface/bottom-bar"
12
  import { Page } from "./interface/page"
13
  import { LLMResponse } from "@/types"
14
+ import { joinWords } from "@/lib/joinWords"
15
 
16
  export default function Main() {
17
  const [_isPending, startTransition] = useTransition()
 
51
  try {
52
  llmResponse = await getStory({
53
  preset,
54
+ prompt: joinWords([ userStoryPrompt ]),
 
 
 
 
 
 
 
55
  nbTotalPanels
56
  })
57
  console.log("LLM responded:", llmResponse)
 
64
  for (let p = 0; p < nbTotalPanels; p++) {
65
  llmResponse.push({
66
  panel: p,
67
+ instructions: joinWords([
68
  stylePrompt,
69
  userStoryPrompt,
70
  `${".".repeat(p)}`,
71
+ ]),
72
  caption: "(Sorry, LLM generation failed: using degraded mode)"
73
  })
74
  }
 
83
  }
84
 
85
  // new experimental prompt: let's drop the user prompt, and only use the style
86
+ const lightPanelPromptPrefix = joinWords(preset.imagePrompt(limitedStylePrompt))
87
 
88
  // this prompt will be used if the LLM generation failed
89
+ const degradedPanelPromptPrefix = joinWords([
90
  ...preset.imagePrompt(limitedStylePrompt),
91
 
92
  // we re-inject the story, then
93
+ userStoryPrompt
94
+ ])
95
 
96
  const newPanels: string[] = []
97
  const newCaptions: string[] = []
 
99
  console.log("Panel prompts for SDXL:")
100
  for (let p = 0; p < nbTotalPanels; p++) {
101
  newCaptions.push(llmResponse[p]?.caption.trim() || "...")
102
+ const newPanel = joinWords([
103
 
104
  // what we do here is that ideally we give full control to the LLM for prompting,
105
  // unless there was a catastrophic failure, in that case we preserve the original prompt
 
107
  ? lightPanelPromptPrefix
108
  : degradedPanelPromptPrefix,
109
 
110
+ llmResponse[p]?.instructions
111
+ ])
112
  newPanels.push(newPanel)
113
  console.log(newPanel)
114
  }
src/app/queries/getStory.ts CHANGED
@@ -37,7 +37,7 @@ export const getStory = async ({
37
  role: "user",
38
  content: `The story is: ${prompt}`,
39
  }
40
- ]) + "[{"
41
 
42
 
43
  let result = ""
 
37
  role: "user",
38
  content: `The story is: ${prompt}`,
39
  }
40
+ ]) + "\n[{"
41
 
42
 
43
  let result = ""
src/app/queries/getStoryContinuation.ts ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { dirtyLLMJsonParser } from "@/lib/dirtyLLMJsonParser"
3
+ import { dirtyCaptionCleaner } from "@/lib/dirtyCaptionCleaner"
4
+
5
+ import { predict } from "./predict"
6
+ import { Preset } from "../engine/presets"
7
+ import { LLMResponse } from "@/types"
8
+ import { cleanJson } from "@/lib/cleanJson"
9
+ import { createZephyrPrompt } from "@/lib/createZephyrPrompt"
10
+
11
+ export const getStoryContinuation = async ({
12
+ preset,
13
+ prompt = "",
14
+ nbTotalPanels = 2,
15
+ previousCaptions = [],
16
+ }: {
17
+ preset: Preset;
18
+ prompt: string;
19
+ nbTotalPanels: number;
20
+ previousCaptions: string[];
21
+ }): Promise<LLMResponse> => {
22
+ // throw new Error("Planned maintenance")
23
+
24
+ // In case you need to quickly debug the RENDERING engine you can uncomment this:
25
+ // return mockLLMResponse
26
+
27
+ const previousCaptionsTemplate = previousCaptions.length
28
+ ? `To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(previousCaptions, null, 2)}`
29
+ : ''
30
+
31
+ const query = createZephyrPrompt([
32
+ {
33
+ role: "system",
34
+ content: [
35
+ `You are a writer specialized in ${preset.llmPrompt}`,
36
+ `Please write detailed drawing instructions and a short (2-3 sentences long) speech caption for the next ${nbTotalPanels} panels of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbTotalPanels} panels include info about character gender, age, origin, clothes, colors, location, lights, etc.`,
37
+ `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
38
+ // `Give your response as Markdown bullet points.`,
39
+ `Be brief in your ${nbTotalPanels} instructions and narrative captions, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
40
+ ].filter(item => item).join("\n")
41
+ },
42
+ {
43
+ role: "user",
44
+ content: `The story is about: ${prompt}.${previousCaptionsTemplate}`,
45
+ }
46
+ ]) + "\n[{"
47
+
48
+
49
+ let result = ""
50
+
51
+ try {
52
+ // console.log(`calling predict(${query}, ${nbTotalPanels})`)
53
+ result = `${await predict(query, nbTotalPanels) || ""}`.trim()
54
+ if (!result.length) {
55
+ throw new Error("empty result!")
56
+ }
57
+ } catch (err) {
58
+ // console.log(`prediction of the story failed, trying again..`)
59
+ try {
60
+ result = `${await predict(query+".", nbTotalPanels) || ""}`.trim()
61
+ if (!result.length) {
62
+ throw new Error("empty result!")
63
+ }
64
+ } catch (err) {
65
+ console.error(`prediction of the story failed again 💩`)
66
+ throw new Error(`failed to generate the story ${err}`)
67
+ }
68
+ }
69
+
70
+ // console.log("Raw response from LLM:", result)
71
+ const tmp = cleanJson(result)
72
+
73
+ let llmResponse: LLMResponse = []
74
+
75
+ try {
76
+ llmResponse = dirtyLLMJsonParser(tmp)
77
+ } catch (err) {
78
+ // console.log(`failed to read LLM response: ${err}`)
79
+ // console.log(`original response was:`, result)
80
+
81
+ // in case of failure here, it might be because the LLM hallucinated a completely different response,
82
+ // such as markdown. There is no real solution.. but we can try a fallback:
83
+
84
+ llmResponse = (
85
+ tmp.split("*")
86
+ .map(item => item.trim())
87
+ .map((cap, i) => ({
88
+ panel: i,
89
+ caption: cap,
90
+ instructions: cap,
91
+ }))
92
+ )
93
+ }
94
+
95
+ return llmResponse.map(res => dirtyCaptionCleaner(res))
96
+ }
src/lib/createZephyrPrompt.ts CHANGED
@@ -7,7 +7,6 @@
7
  /**
8
  * Formats the messages for the chat with the LLM model in the style of a pirate.
9
  * @param messages - Array of message objects with role and content.
10
- * @param systemMessage - The system's initial instructions, defaulted to a friendly pirate-style chatbot.
11
  * @returns The formatted chat prompt.
12
  */
13
  export function createZephyrPrompt(messages: Message[]): string {
 
7
  /**
8
  * Formats the messages for the chat with the LLM model in the style of a pirate.
9
  * @param messages - Array of message objects with role and content.
 
10
  * @returns The formatted chat prompt.
11
  */
12
  export function createZephyrPrompt(messages: Message[]): string {
src/lib/joinWords.ts ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ // clean a list of words (which may be null, undefined or empty)
2
+ // into one clean string of separator-divided (by default comma-separated) words
3
+ // the words will be trimmed, and empty elements will be removed
4
+ export function joinWords(inputs: any[] = [], separator = ", "): string {
5
+ return inputs.map(x => `${x || ""}`.trim()).filter(x => x).join(separator)
6
+ }