jbilcke-hf HF staff commited on
Commit
95a4e14
·
1 Parent(s): b194c9d

working on an experimental speech bubble display

Browse files
package-lock.json CHANGED
@@ -12,6 +12,7 @@
12
  "@anthropic-ai/sdk": "^0.19.1",
13
  "@huggingface/hub": "^0.15.1",
14
  "@huggingface/inference": "^2.6.1",
 
15
  "@radix-ui/react-accordion": "^1.1.2",
16
  "@radix-ui/react-avatar": "^1.0.3",
17
  "@radix-ui/react-checkbox": "^1.0.4",
@@ -828,6 +829,11 @@
828
  "@jridgewell/sourcemap-codec": "^1.4.14"
829
  }
830
  },
 
 
 
 
 
831
  "node_modules/@next/env": {
832
  "version": "14.2.3",
833
  "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",
 
12
  "@anthropic-ai/sdk": "^0.19.1",
13
  "@huggingface/hub": "^0.15.1",
14
  "@huggingface/inference": "^2.6.1",
15
+ "@mediapipe/tasks-vision": "^0.10.14",
16
  "@radix-ui/react-accordion": "^1.1.2",
17
  "@radix-ui/react-avatar": "^1.0.3",
18
  "@radix-ui/react-checkbox": "^1.0.4",
 
829
  "@jridgewell/sourcemap-codec": "^1.4.14"
830
  }
831
  },
832
+ "node_modules/@mediapipe/tasks-vision": {
833
+ "version": "0.10.14",
834
+ "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.14.tgz",
835
+ "integrity": "sha512-vOifgZhkndgybdvoRITzRkIueWWSiCKuEUXXK6Q4FaJsFvRJuwgg++vqFUMlL0Uox62U5aEXFhHxlhV7Ja5e3Q=="
836
+ },
837
  "node_modules/@next/env": {
838
  "version": "14.2.3",
839
  "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",
package.json CHANGED
@@ -13,6 +13,7 @@
13
  "@anthropic-ai/sdk": "^0.19.1",
14
  "@huggingface/hub": "^0.15.1",
15
  "@huggingface/inference": "^2.6.1",
 
16
  "@radix-ui/react-accordion": "^1.1.2",
17
  "@radix-ui/react-avatar": "^1.0.3",
18
  "@radix-ui/react-checkbox": "^1.0.4",
 
13
  "@anthropic-ai/sdk": "^0.19.1",
14
  "@huggingface/hub": "^0.15.1",
15
  "@huggingface/inference": "^2.6.1",
16
+ "@mediapipe/tasks-vision": "^0.10.14",
17
  "@radix-ui/react-accordion": "^1.1.2",
18
  "@radix-ui/react-avatar": "^1.0.3",
19
  "@radix-ui/react-checkbox": "^1.0.4",
src/app/interface/panel/bubble/index.tsx CHANGED
@@ -15,6 +15,7 @@ export function Bubble({ children, onChange }: {
15
 
16
  const ref = useRef<HTMLDivElement>(null)
17
  const zoomLevel = useStore(s => s.zoomLevel)
 
18
  const showCaptions = useStore(s => s.showCaptions)
19
 
20
  const text = useRef(`${children || ''}`)
 
15
 
16
  const ref = useRef<HTMLDivElement>(null)
17
  const zoomLevel = useStore(s => s.zoomLevel)
18
+ const showSpeeches = useStore(s => s.showSpeeches)
19
  const showCaptions = useStore(s => s.showCaptions)
20
 
21
  const text = useRef(`${children || ''}`)
src/app/interface/panel/index.tsx CHANGED
@@ -2,22 +2,23 @@
2
 
3
  import { useEffect, useRef, useState, useTransition } from "react"
4
  import { RxReload, RxPencil2 } from "react-icons/rx"
 
5
 
6
  import { RenderedScene, RenderingModelVendor } from "@/types"
7
-
8
  import { getRender, newRender } from "@/app/engine/render"
9
  import { useStore } from "@/app/store"
10
-
11
  import { cn } from "@/lib/utils"
12
  import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
13
  import { Progress } from "@/app/interface/progress"
 
14
  import { EditModal } from "../edit-modal"
15
- import { Bubble } from "./bubble"
16
  import { getSettings } from "../settings-dialog/getSettings"
17
- import { useLocalStorage } from "usehooks-ts"
18
  import { localStorageKeys } from "../settings-dialog/localStorageKeys"
19
  import { defaultSettings } from "../settings-dialog/defaultSettings"
20
 
 
 
21
  export function Panel({
22
  page,
23
  nbPanels,
@@ -35,22 +36,18 @@ export function Panel({
35
  // panel id, between 0 and (nbPanels - 1)
36
  panel: number
37
 
38
-
39
  className?: string
40
  width?: number
41
  height?: number
42
  }) {
43
-
44
  // index of the panel in the whole app
45
  const panelIndex = page * nbPanels + panel
46
 
47
-
48
  // the panel Id must be unique across all pages
49
  const panelId = `${panelIndex}`
50
 
51
  // console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
52
 
53
-
54
  const [mouseOver, setMouseOver] = useState(false)
55
  const ref = useRef<HTMLImageElement>(null)
56
  const font = useStore(s => s.font)
@@ -63,6 +60,10 @@ export function Panel({
63
 
64
  const setPanelPrompt = useStore(s => s.setPanelPrompt)
65
 
 
 
 
 
66
  const captions = useStore(s => s.captions)
67
  const caption = captions[panelIndex] || ""
68
  const setPanelCaption = useStore(s => s.setPanelCaption)
@@ -95,6 +96,28 @@ export function Panel({
95
 
96
  let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  /*
99
  console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
100
  page,
@@ -204,6 +227,7 @@ export function Panel({
204
  if (newRendered.status === "completed") {
205
  setGeneratingImages(panelId, false)
206
  addToUpscaleQueue(panelId, newRendered)
 
207
  } else if (!newRendered.status || newRendered.status === "error") {
208
  setGeneratingImages(panelId, false)
209
  } else {
@@ -274,6 +298,7 @@ export function Panel({
274
  console.log("panel finished!")
275
  setGeneratingImages(panelId, false)
276
  addToUpscaleQueue(panelId, newRendered)
 
277
 
278
  }
279
  } catch (err) {
 
2
 
3
  import { useEffect, useRef, useState, useTransition } from "react"
4
  import { RxReload, RxPencil2 } from "react-icons/rx"
5
+ import { useLocalStorage } from "usehooks-ts"
6
 
7
  import { RenderedScene, RenderingModelVendor } from "@/types"
 
8
  import { getRender, newRender } from "@/app/engine/render"
9
  import { useStore } from "@/app/store"
10
+ import { injectSpeechBubbleInTheBackground } from "@/lib/bubble/injectSpeechBubbleInTheBackground"
11
  import { cn } from "@/lib/utils"
12
  import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
13
  import { Progress } from "@/app/interface/progress"
14
+
15
  import { EditModal } from "../edit-modal"
 
16
  import { getSettings } from "../settings-dialog/getSettings"
 
17
  import { localStorageKeys } from "../settings-dialog/localStorageKeys"
18
  import { defaultSettings } from "../settings-dialog/defaultSettings"
19
 
20
+ import { Bubble } from "./bubble"
21
+
22
  export function Panel({
23
  page,
24
  nbPanels,
 
36
  // panel id, between 0 and (nbPanels - 1)
37
  panel: number
38
 
 
39
  className?: string
40
  width?: number
41
  height?: number
42
  }) {
 
43
  // index of the panel in the whole app
44
  const panelIndex = page * nbPanels + panel
45
 
 
46
  // the panel Id must be unique across all pages
47
  const panelId = `${panelIndex}`
48
 
49
  // console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
50
 
 
51
  const [mouseOver, setMouseOver] = useState(false)
52
  const ref = useRef<HTMLImageElement>(null)
53
  const font = useStore(s => s.font)
 
60
 
61
  const setPanelPrompt = useStore(s => s.setPanelPrompt)
62
 
63
+ const speeches = useStore(s => s.speeches)
64
+ const speech = speeches[panelIndex] || ""
65
+ const setPanelSpeech = useStore(s => s.setPanelSpeech)
66
+
67
  const captions = useStore(s => s.captions)
68
  const caption = captions[panelIndex] || ""
69
  const setPanelCaption = useStore(s => s.setPanelCaption)
 
96
 
97
  let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
98
 
99
+ const addSpeechBubble = async () => {
100
+ if (!renderedRef.current) { return }
101
+
102
+ // story generation failed
103
+ if (speech.trim() === "...") { return }
104
+
105
+ console.log('Generating speech bubble...')
106
+ try {
107
+ const result = await injectSpeechBubbleInTheBackground({
108
+ inputImageInBase64: renderedRef.current.assetUrl,
109
+ text: speech,
110
+ shape: "oval",
111
+ line: "straight", // "straight", "bubble", "chaotic"
112
+ // font?: string;
113
+ // debug: true,
114
+ })
115
+ renderedRef.current.assetUrl = result
116
+ setRendered(panelId, renderedRef.current)
117
+ } catch (err) {
118
+ console.log(`error: failed to inject the speech bubble: ${err}`)
119
+ }
120
+ }
121
  /*
122
  console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
123
  page,
 
227
  if (newRendered.status === "completed") {
228
  setGeneratingImages(panelId, false)
229
  addToUpscaleQueue(panelId, newRendered)
230
+ addSpeechBubble()
231
  } else if (!newRendered.status || newRendered.status === "error") {
232
  setGeneratingImages(panelId, false)
233
  } else {
 
298
  console.log("panel finished!")
299
  setGeneratingImages(panelId, false)
300
  addToUpscaleQueue(panelId, newRendered)
301
+ addSpeechBubble()
302
 
303
  }
304
  } catch (err) {
src/app/interface/top-menu/index.tsx CHANGED
@@ -45,6 +45,9 @@ export function TopMenu() {
45
  const layout = useStore(s => s.layout)
46
  const setLayout = useStore(s => s.setLayout)
47
 
 
 
 
48
  const setShowCaptions = useStore(s => s.setShowCaptions)
49
  const showCaptions = useStore(s => s.showCaptions)
50
 
@@ -170,6 +173,16 @@ export function TopMenu() {
170
  <span className="inline md:hidden">Cap.</span>
171
  </Label>
172
  </div>
 
 
 
 
 
 
 
 
 
 
173
  {/*
174
  <div className={cn(
175
  `transition-all duration-200 ease-in-out`,
 
45
  const layout = useStore(s => s.layout)
46
  const setLayout = useStore(s => s.setLayout)
47
 
48
+ const setShowSpeeches = useStore(s => s.setShowSpeeches)
49
+ const showSpeeches = useStore(s => s.showSpeeches)
50
+
51
  const setShowCaptions = useStore(s => s.setShowCaptions)
52
  const showCaptions = useStore(s => s.showCaptions)
53
 
 
173
  <span className="inline md:hidden">Cap.</span>
174
  </Label>
175
  </div>
176
+ <div className="flex flex-row items-center space-x-3">
177
+ <Switch
178
+ checked={showSpeeches}
179
+ onCheckedChange={setShowSpeeches}
180
+ />
181
+ <Label className="text-gray-200 dark:text-gray-200">
182
+ <span className="hidden md:inline">Bubbles</span>
183
+ <span className="inline md:hidden">Bub.</span>
184
+ </Label>
185
+ </div>
186
  {/*
187
  <div className={cn(
188
  `transition-all duration-200 ease-in-out`,
src/app/main.tsx CHANGED
@@ -49,8 +49,11 @@ export default function Main() {
49
 
50
  // do we need those?
51
  const renderedScenes = useStore(s => s.renderedScenes)
52
- const captions = useStore(s => s.captions)
53
 
 
 
 
 
54
  const setCaptions = useStore(s => s.setCaptions)
55
 
56
  const zoomLevel = useStore(s => s.zoomLevel)
@@ -101,6 +104,7 @@ export default function Main() {
101
  const ref = useRef({
102
  existingPanels: [] as GeneratedPanel[],
103
  newPanelsPrompts: [] as string[],
 
104
  newCaptions: [] as string[],
105
  prompt: "",
106
  preset: "",
@@ -142,6 +146,7 @@ export default function Main() {
142
  ref.current = {
143
  existingPanels: [],
144
  newPanelsPrompts: [],
 
145
  newCaptions: [],
146
  prompt,
147
  preset: preset?.label || "",
@@ -214,6 +219,7 @@ export default function Main() {
214
  const endAt = currentPanel + nbPanelsToGenerate
215
  for (let p = startAt; p < endAt; p++) {
216
  ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
 
217
  const newPanel = joinWords([
218
 
219
  // what we do here is that ideally we give full control to the LLM for prompting,
@@ -231,6 +237,7 @@ export default function Main() {
231
 
232
  // update the frontend
233
  // console.log("updating the frontend..")
 
234
  setCaptions(ref.current.newCaptions)
235
  setPanels(ref.current.newPanelsPrompts)
236
  setGeneratingStory(false)
 
49
 
50
  // do we need those?
51
  const renderedScenes = useStore(s => s.renderedScenes)
 
52
 
53
+ const speeches = useStore(s => s.speeches)
54
+ const setSpeeches = useStore(s => s.setSpeeches)
55
+
56
+ const captions = useStore(s => s.captions)
57
  const setCaptions = useStore(s => s.setCaptions)
58
 
59
  const zoomLevel = useStore(s => s.zoomLevel)
 
104
  const ref = useRef({
105
  existingPanels: [] as GeneratedPanel[],
106
  newPanelsPrompts: [] as string[],
107
+ newSpeeches: [] as string[],
108
  newCaptions: [] as string[],
109
  prompt: "",
110
  preset: "",
 
146
  ref.current = {
147
  existingPanels: [],
148
  newPanelsPrompts: [],
149
+ newSpeeches: [],
150
  newCaptions: [],
151
  prompt,
152
  preset: preset?.label || "",
 
219
  const endAt = currentPanel + nbPanelsToGenerate
220
  for (let p = startAt; p < endAt; p++) {
221
  ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
222
+ ref.current.newSpeeches.push(ref.current.existingPanels[p]?.speech.trim() || "...")
223
  const newPanel = joinWords([
224
 
225
  // what we do here is that ideally we give full control to the LLM for prompting,
 
237
 
238
  // update the frontend
239
  // console.log("updating the frontend..")
240
+ setSpeeches(ref.current.newSpeeches)
241
  setCaptions(ref.current.newCaptions)
242
  setPanels(ref.current.newPanelsPrompts)
243
  setGeneratingStory(false)
src/app/queries/getStoryContinuation.ts CHANGED
@@ -48,6 +48,7 @@ export const getStoryContinuation = async ({
48
  panels.push({
49
  panel: startAt + i,
50
  instructions: `${panelCandidates[i]?.instructions || ""}`,
 
51
  caption: `${panelCandidates[i]?.caption || ""}`,
52
  })
53
  }
@@ -64,6 +65,7 @@ export const getStoryContinuation = async ({
64
  userStoryPrompt,
65
  `${".".repeat(p)}`,
66
  ]),
 
67
  caption: "(Sorry, LLM generation failed: using degraded mode)"
68
  })
69
  }
 
48
  panels.push({
49
  panel: startAt + i,
50
  instructions: `${panelCandidates[i]?.instructions || ""}`,
51
+ speech: `${panelCandidates[i]?.speech || ""}`,
52
  caption: `${panelCandidates[i]?.caption || ""}`,
53
  })
54
  }
 
65
  userStoryPrompt,
66
  `${".".repeat(p)}`,
67
  ]),
68
+ speech: "...",
69
  caption: "(Sorry, LLM generation failed: using degraded mode)"
70
  })
71
  }
src/app/queries/getSystemPrompt.ts CHANGED
@@ -19,9 +19,9 @@ export function getSystemPrompt({
19
  }) {
20
  return [
21
  `You are a writer specialized in ${preset.llmPrompt}`,
22
- `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
23
- `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
24
  // `Give your response as Markdown bullet points.`,
25
- `Be brief in the instructions and narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
26
  ].filter(item => item).join("\n")
27
  }
 
19
  }) {
20
  return [
21
  `You are a writer specialized in ${preset.llmPrompt}`,
22
+ `Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
23
+ `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
24
  // `Give your response as Markdown bullet points.`,
25
+ `Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
26
  ].filter(item => item).join("\n")
27
  }
src/app/queries/mockLLMResponse.ts CHANGED
@@ -3,41 +3,49 @@ import { GeneratedPanels } from "@/types"
3
  export const mockGeneratedPanels: GeneratedPanels = [{
4
  "panel": 1,
5
  "instructions": "wide shot of detective walking towards a UFO crash site",
 
6
  "caption": "Detective Jameson investigates a UFO crash in the desert"
7
  },
8
  {
9
  "panel": 2,
10
  "instructions": "close-up of detective's face, determined expression",
 
11
  "caption": "He's been tracking this case for weeks"
12
  },
13
  {
14
  "panel": 3,
15
  "instructions": "medium shot of detective examining UFO debris",
 
16
  "caption": "The evidence is scattered all over the desert"
17
  },
18
  {
19
  "panel": 4,
20
  "instructions": "close-up of strange symbol on UFO debris",
21
- "caption": "But what does this symbol mean?"
 
22
  },
23
  {
24
  "panel": 5,
25
  "instructions": "wide shot of detective walking towards a strange rock formation",
 
26
  "caption": "Jameson follows a trail that leads him deeper into the desert"
27
  },
28
  {
29
  "panel": 6,
30
  "instructions": "medium shot of detective discovering an alien body",
31
- "caption": "He's not alone in the desert"
 
32
  },
33
  {
34
  "panel": 7,
35
  "instructions": "close-up of alien's face, eyes closed, peaceful expression",
 
36
  "caption": "An alien life form, deceased"
37
  },
38
  {
39
  "panel": 8,
40
  "instructions": "wide shot of detective standing over the alien body, looking up at the sky",
41
- "caption": "Jameson wonders, what other secrets lie beyond the stars?"
 
42
  }
43
  ]
 
3
  export const mockGeneratedPanels: GeneratedPanels = [{
4
  "panel": 1,
5
  "instructions": "wide shot of detective walking towards a UFO crash site",
6
+ "speech": "Hmm.. interesting.",
7
  "caption": "Detective Jameson investigates a UFO crash in the desert"
8
  },
9
  {
10
  "panel": 2,
11
  "instructions": "close-up of detective's face, determined expression",
12
+ "speech": "I've been tracking this case for weeks",
13
  "caption": "He's been tracking this case for weeks"
14
  },
15
  {
16
  "panel": 3,
17
  "instructions": "medium shot of detective examining UFO debris",
18
+ "speech": "...",
19
  "caption": "The evidence is scattered all over the desert"
20
  },
21
  {
22
  "panel": 4,
23
  "instructions": "close-up of strange symbol on UFO debris",
24
+ "speech": " what does this symbol mean?",
25
+ "caption": "strange symbols"
26
  },
27
  {
28
  "panel": 5,
29
  "instructions": "wide shot of detective walking towards a strange rock formation",
30
+ "speech": "I've been tracking this case for weeks",
31
  "caption": "Jameson follows a trail that leads him deeper into the desert"
32
  },
33
  {
34
  "panel": 6,
35
  "instructions": "medium shot of detective discovering an alien body",
36
+ "speech": "I'm not alone in the desert",
37
+ "caption": "He's not alone"
38
  },
39
  {
40
  "panel": 7,
41
  "instructions": "close-up of alien's face, eyes closed, peaceful expression",
42
+ "speech": "...?",
43
  "caption": "An alien life form, deceased"
44
  },
45
  {
46
  "panel": 8,
47
  "instructions": "wide shot of detective standing over the alien body, looking up at the sky",
48
+ "speech": "what other secrets lie beyond the stars?",
49
+ "caption": "Jameson wonders"
50
  }
51
  ]
src/app/queries/predictNextPanels.ts CHANGED
@@ -31,7 +31,7 @@ export const predictNextPanels = async ({
31
  // return mockGeneratedPanels
32
 
33
  const existingPanelsTemplate = existingPanels.length
34
- ? ` To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
35
  : ''
36
 
37
  const firstNextOrLast =
@@ -55,9 +55,9 @@ export const predictNextPanels = async ({
55
 
56
  let result = ""
57
 
58
- // we don't require a lot of token for our task
59
- // but to be safe, let's count ~130 tokens per panel
60
- const nbTokensPerPanel = 130
61
 
62
  const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
63
 
@@ -115,6 +115,7 @@ export const predictNextPanels = async ({
115
  .map((cap, i) => ({
116
  panel: i,
117
  caption: cap,
 
118
  instructions: cap,
119
  }))
120
  )
 
31
  // return mockGeneratedPanels
32
 
33
  const existingPanelsTemplate = existingPanels.length
34
+ ? ` To help you, here are the previous panels, their speeches and captions (note: if you see an anomaly here eg. no speech, no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
35
  : ''
36
 
37
  const firstNextOrLast =
 
55
 
56
  let result = ""
57
 
58
+ // we don't require a lot of token for our task,
59
+ // but to be safe, let's count ~200 tokens per panel
60
+ const nbTokensPerPanel = 200
61
 
62
  const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
63
 
 
115
  .map((cap, i) => ({
116
  panel: i,
117
  caption: cap,
118
+ speech: cap,
119
  instructions: cap,
120
  }))
121
  )
src/app/store/index.ts CHANGED
@@ -26,8 +26,10 @@ export const useStore = create<{
26
  currentNbPanels: number
27
  maxNbPanels: number
28
  panels: string[]
 
29
  captions: string[]
30
  upscaleQueue: Record<string, RenderedScene>
 
31
  showCaptions: boolean
32
  renderedScenes: Record<string, RenderedScene>
33
  layout: LayoutName
@@ -55,9 +57,12 @@ export const useStore = create<{
55
  setPreset: (preset: Preset) => void
56
  setPanels: (panels: string[]) => void
57
  setPanelPrompt: (newPrompt: string, index: number) => void
58
- setShowCaptions: (showCaptions: boolean) => void
59
  setLayout: (layout: LayoutName, index?: number) => void
60
  setLayouts: (layouts: LayoutName[]) => void
 
 
 
 
61
  setCaptions: (captions: string[]) => void
62
  setPanelCaption: (newCaption: string, index: number) => void
63
  setZoomLevel: (zoomLevel: number) => void
@@ -85,6 +90,7 @@ export const useStore = create<{
85
  stylePrompt: string
86
  panels: string[]
87
  renderedScenes: Record<string, RenderedScene>
 
88
  captions: string[]
89
  }>
90
  loadClap: (blob: Blob) => Promise<void>
@@ -107,9 +113,11 @@ export const useStore = create<{
107
  maxNbPanels: 4,
108
 
109
  panels: [],
 
110
  captions: [],
111
  upscaleQueue: {} as Record<string, RenderedScene>,
112
  renderedScenes: {} as Record<string, RenderedScene>,
 
113
  showCaptions: getParam("showCaptions", false),
114
 
115
  // deprecated?
@@ -284,6 +292,24 @@ export const useStore = create<{
284
  ))
285
  })
286
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  setCaptions: (captions: string[]) => {
288
  set({
289
  captions,
@@ -324,6 +350,7 @@ export const useStore = create<{
324
  currentNbPages: 1,
325
  currentNbPanels: currentNbPanelsPerPage,
326
  panels: [],
 
327
  captions: [],
328
  upscaleQueue: {},
329
  renderedScenes: {},
@@ -408,6 +435,7 @@ export const useStore = create<{
408
  currentNbPages: 1,
409
  currentNbPanels: currentNbPanelsPerPage,
410
  panels: [],
 
411
  captions: [],
412
  upscaleQueue: {},
413
  renderedScenes: {},
@@ -431,6 +459,7 @@ export const useStore = create<{
431
  prompt,
432
  panels,
433
  renderedScenes,
 
434
  captions
435
  } = get()
436
 
@@ -459,7 +488,7 @@ export const useStore = create<{
459
  for (let i = 0; i < panels.length; i++) {
460
 
461
  const panel = panels[i]
462
-
463
  const caption = captions[i]
464
 
465
  const renderedScene = renderedScenes[`${i}`]
@@ -492,7 +521,7 @@ export const useStore = create<{
492
  startTimeInMs: currentElapsedTimeInMs,
493
  assetDurationInMs: defaultSegmentDurationInMs,
494
  category: ClapSegmentCategory.DIALOGUE,
495
- prompt: caption,
496
  outputType: ClapOutputType.AUDIO,
497
  status: ClapSegmentStatus.TO_GENERATE,
498
  }))
@@ -525,6 +554,7 @@ export const useStore = create<{
525
  stylePrompt: string
526
  panels: string[]
527
  renderedScenes: Record<string, RenderedScene>
 
528
  captions: string[]
529
  }> => {
530
 
@@ -534,6 +564,7 @@ export const useStore = create<{
534
  const panels: string[] = []
535
  const renderedScenes: Record<string, RenderedScene> = {}
536
  const captions: string[] = []
 
537
 
538
  const panelGenerationStatus: Record<number, boolean> = {}
539
 
@@ -552,14 +583,21 @@ export const useStore = create<{
552
  cameraShot,
553
  clap.segments,
554
  ClapSegmentCategory.INTERFACE,
 
 
 
 
 
 
555
  ).at(0) as (ClapSegment | undefined)
556
  })).filter(item => item.storyboard && item.ui) as {
557
  camera: ClapSegment
558
  storyboard: ClapSegment
559
  ui: ClapSegment
 
560
  }[]
561
 
562
- shots.forEach(({ camera, storyboard, ui }, id) => {
563
 
564
  panels.push(storyboard.prompt)
565
 
@@ -582,6 +620,8 @@ export const useStore = create<{
582
 
583
  panelGenerationStatus[id] = false
584
 
 
 
585
  captions.push(ui?.prompt || "")
586
  })
587
 
@@ -595,6 +635,7 @@ export const useStore = create<{
595
  stylePrompt,
596
  panels,
597
  renderedScenes,
 
598
  captions,
599
 
600
  }
@@ -614,6 +655,7 @@ export const useStore = create<{
614
  stylePrompt,
615
  panels,
616
  renderedScenes,
 
617
  captions,
618
  } = await convertClapToComic(currentClap)
619
 
@@ -629,6 +671,7 @@ export const useStore = create<{
629
  // layout,
630
  panels,
631
  renderedScenes,
 
632
  captions,
633
  currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
634
  upscaleQueue: {},
 
26
  currentNbPanels: number
27
  maxNbPanels: number
28
  panels: string[]
29
+ speeches: string[]
30
  captions: string[]
31
  upscaleQueue: Record<string, RenderedScene>
32
+ showSpeeches: boolean
33
  showCaptions: boolean
34
  renderedScenes: Record<string, RenderedScene>
35
  layout: LayoutName
 
57
  setPreset: (preset: Preset) => void
58
  setPanels: (panels: string[]) => void
59
  setPanelPrompt: (newPrompt: string, index: number) => void
 
60
  setLayout: (layout: LayoutName, index?: number) => void
61
  setLayouts: (layouts: LayoutName[]) => void
62
+ setShowSpeeches: (showSpeeches: boolean) => void
63
+ setSpeeches: (speeches: string[]) => void
64
+ setPanelSpeech: (newSpeech: string, index: number) => void
65
+ setShowCaptions: (showCaptions: boolean) => void
66
  setCaptions: (captions: string[]) => void
67
  setPanelCaption: (newCaption: string, index: number) => void
68
  setZoomLevel: (zoomLevel: number) => void
 
90
  stylePrompt: string
91
  panels: string[]
92
  renderedScenes: Record<string, RenderedScene>
93
+ speeches: string[]
94
  captions: string[]
95
  }>
96
  loadClap: (blob: Blob) => Promise<void>
 
113
  maxNbPanels: 4,
114
 
115
  panels: [],
116
+ speeches: [],
117
  captions: [],
118
  upscaleQueue: {} as Record<string, RenderedScene>,
119
  renderedScenes: {} as Record<string, RenderedScene>,
120
+ showSpeeches: getParam("showSpeeches", false),
121
  showCaptions: getParam("showCaptions", false),
122
 
123
  // deprecated?
 
292
  ))
293
  })
294
  },
295
+ setSpeeches: (speeches: string[]) => {
296
+ set({
297
+ speeches,
298
+ })
299
+ },
300
+ setShowSpeeches: (showSpeeches: boolean) => {
301
+ set({
302
+ showSpeeches,
303
+ })
304
+ },
305
+ setPanelSpeech: (newSpeech, index) => {
306
+ const { speeches } = get()
307
+ set({
308
+ speeches: speeches.map((c, i) => (
309
+ index === i ? newSpeech : c
310
+ ))
311
+ })
312
+ },
313
  setCaptions: (captions: string[]) => {
314
  set({
315
  captions,
 
350
  currentNbPages: 1,
351
  currentNbPanels: currentNbPanelsPerPage,
352
  panels: [],
353
+ speeches: [],
354
  captions: [],
355
  upscaleQueue: {},
356
  renderedScenes: {},
 
435
  currentNbPages: 1,
436
  currentNbPanels: currentNbPanelsPerPage,
437
  panels: [],
438
+ speeches: [],
439
  captions: [],
440
  upscaleQueue: {},
441
  renderedScenes: {},
 
459
  prompt,
460
  panels,
461
  renderedScenes,
462
+ speeches,
463
  captions
464
  } = get()
465
 
 
488
  for (let i = 0; i < panels.length; i++) {
489
 
490
  const panel = panels[i]
491
+ const speech = speeches[i]
492
  const caption = captions[i]
493
 
494
  const renderedScene = renderedScenes[`${i}`]
 
521
  startTimeInMs: currentElapsedTimeInMs,
522
  assetDurationInMs: defaultSegmentDurationInMs,
523
  category: ClapSegmentCategory.DIALOGUE,
524
+ prompt: speech,
525
  outputType: ClapOutputType.AUDIO,
526
  status: ClapSegmentStatus.TO_GENERATE,
527
  }))
 
554
  stylePrompt: string
555
  panels: string[]
556
  renderedScenes: Record<string, RenderedScene>
557
+ speeches: string[]
558
  captions: string[]
559
  }> => {
560
 
 
564
  const panels: string[] = []
565
  const renderedScenes: Record<string, RenderedScene> = {}
566
  const captions: string[] = []
567
+ const speeches: string[] = []
568
 
569
  const panelGenerationStatus: Record<number, boolean> = {}
570
 
 
583
  cameraShot,
584
  clap.segments,
585
  ClapSegmentCategory.INTERFACE,
586
+ ).at(0) as (ClapSegment | undefined),
587
+ dialogue: filterSegments(
588
+ ClapSegmentFilteringMode.START,
589
+ cameraShot,
590
+ clap.segments,
591
+ ClapSegmentCategory.DIALOGUE,
592
  ).at(0) as (ClapSegment | undefined)
593
  })).filter(item => item.storyboard && item.ui) as {
594
  camera: ClapSegment
595
  storyboard: ClapSegment
596
  ui: ClapSegment
597
+ dialogue: ClapSegment
598
  }[]
599
 
600
+ shots.forEach(({ camera, storyboard, ui, dialogue }, id) => {
601
 
602
  panels.push(storyboard.prompt)
603
 
 
620
 
621
  panelGenerationStatus[id] = false
622
 
623
+ speeches.push(dialogue?.prompt || "")
624
+
625
  captions.push(ui?.prompt || "")
626
  })
627
 
 
635
  stylePrompt,
636
  panels,
637
  renderedScenes,
638
+ speeches,
639
  captions,
640
 
641
  }
 
655
  stylePrompt,
656
  panels,
657
  renderedScenes,
658
+ speeches,
659
  captions,
660
  } = await convertClapToComic(currentClap)
661
 
 
671
  // layout,
672
  panels,
673
  renderedScenes,
674
+ speeches,
675
  captions,
676
  currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
677
  upscaleQueue: {},
src/lib/bubble/injectSpeechBubbleInTheBackground.ts ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
2
+
3
+ export async function injectSpeechBubbleInTheBackground(params: {
4
+ inputImageInBase64: string;
5
+ text?: string;
6
+ shape?: "oval" | "rectangular" | "cloud" | "thought";
7
+ line?: "handdrawn" | "straight" | "bubble" | "chaotic";
8
+ font?: string;
9
+ debug?: boolean;
10
+ }): Promise<string> {
11
+ const {
12
+ inputImageInBase64,
13
+ text,
14
+ shape = "oval",
15
+ line = "handdrawn",
16
+ font = "Arial",
17
+ debug = false,
18
+ } = params;
19
+
20
+ // If no text is provided, return the original image
21
+ if (!text) {
22
+ return inputImageInBase64;
23
+ }
24
+
25
+ // Load the image
26
+ const image = await loadImage(inputImageInBase64);
27
+
28
+ // Set up canvas
29
+ const canvas = document.createElement('canvas');
30
+ canvas.width = image.width;
31
+ canvas.height = image.height;
32
+ const ctx = canvas.getContext('2d')!;
33
+ ctx.drawImage(image, 0, 0);
34
+
35
+ // Set up MediaPipe Image Segmenter
36
+ const vision = await FilesetResolver.forVisionTasks(
37
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
38
+ );
39
+ const imageSegmenter = await ImageSegmenter.createFromOptions(vision, {
40
+ baseOptions: {
41
+ modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/deeplab_v3/float32/1/deeplab_v3.tflite",
42
+ delegate: "GPU"
43
+ },
44
+ outputCategoryMask: true,
45
+ outputConfidenceMasks: false
46
+ });
47
+
48
+ const segmentationResult = imageSegmenter.segment(image);
49
+ let characterBoundingBox: { top: number, left: number, width: number, height: number } | null = null;
50
+
51
+ if (segmentationResult.categoryMask) {
52
+ const mask = segmentationResult.categoryMask.getAsUint8Array();
53
+ const detectedItems = analyzeSegmentationMask(mask, image.width, image.height);
54
+ console.log("Detected items:", detectedItems);
55
+
56
+ if (detectedItems.length > 0) {
57
+ characterBoundingBox = findCharacterBoundingBox(mask, image.width, image.height);
58
+ }
59
+
60
+ if (debug) {
61
+ drawSegmentationMask(ctx, mask, image.width, image.height);
62
+ }
63
+ }
64
+
65
+ const bubbleLocation = characterBoundingBox
66
+ ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top }
67
+ : { x: image.width / 2, y: image.height / 2 };
68
+
69
+ drawSpeechBubble(ctx, bubbleLocation, text, shape, line, font, !!characterBoundingBox, image.width, image.height, characterBoundingBox);
70
+
71
+ return canvas.toDataURL('image/png');
72
+ }
73
+ function loadImage(base64: string): Promise<HTMLImageElement> {
74
+ return new Promise((resolve, reject) => {
75
+ const img = new Image();
76
+ img.onload = () => resolve(img);
77
+ img.onerror = reject;
78
+ img.src = base64;
79
+ });
80
+ }
81
+
82
+
83
+
84
+ function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number): string[] {
85
+ const categories = new Set<number>();
86
+ for (let i = 0; i < mask.length; i++) {
87
+ if (mask[i] > 0) {
88
+ categories.add(mask[i]);
89
+ }
90
+ }
91
+ return Array.from(categories).map(c => `unknown-${c}`);
92
+ }
93
+
94
+ function findMainCharacterLocation(mask: Uint8Array, width: number, height: number): { x: number, y: number } {
95
+ let sumX = 0, sumY = 0, count = 0;
96
+ for (let y = 0; y < height; y++) {
97
+ for (let x = 0; x < width; x++) {
98
+ const index = y * width + x;
99
+ if (mask[index] > 0) {
100
+ sumX += x;
101
+ sumY += y;
102
+ count++;
103
+ }
104
+ }
105
+ }
106
+ return count > 0 ? { x: sumX / count, y: sumY / count } : { x: width / 2, y: height / 2 };
107
+ }
108
+
109
+
110
+ function drawSegmentationMask(ctx: CanvasRenderingContext2D, mask: Uint8Array, width: number, height: number) {
111
+ const imageData = ctx.getImageData(0, 0, width, height);
112
+ const data = imageData.data;
113
+ for (let i = 0; i < mask.length; i++) {
114
+ const category = mask[i];
115
+ if (category > 0) {
116
+ // Use a different color for each category
117
+ const color = getCategoryColor(category);
118
+ data[i * 4] = color[0];
119
+ data[i * 4 + 1] = color[1];
120
+ data[i * 4 + 2] = color[2];
121
+ data[i * 4 + 3] = 128; // 50% opacity
122
+ }
123
+ }
124
+ ctx.putImageData(imageData, 0, 0);
125
+ }
126
+
127
+ function getCategoryColor(category: number): [number, number, number] {
128
+ // Generate a pseudo-random color based on the category
129
+ const hue = (category * 137) % 360;
130
+ return hslToRgb(hue / 360, 1, 0.5);
131
+ }
132
+
133
+ function hslToRgb(h: number, s: number, l: number): [number, number, number] {
134
+ let r, g, b;
135
+ if (s === 0) {
136
+ r = g = b = l;
137
+ } else {
138
+ const hue2rgb = (p: number, q: number, t: number) => {
139
+ if (t < 0) t += 1;
140
+ if (t > 1) t -= 1;
141
+ if (t < 1/6) return p + (q - p) * 6 * t;
142
+ if (t < 1/2) return q;
143
+ if (t < 2/3) return p + (q - p) * (2/3 - t) * 6;
144
+ return p;
145
+ };
146
+ const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
147
+ const p = 2 * l - q;
148
+ r = hue2rgb(p, q, h + 1/3);
149
+ g = hue2rgb(p, q, h);
150
+ b = hue2rgb(p, q, h - 1/3);
151
+ }
152
+ return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
153
+ }
154
+
155
+ function drawSpeechBubble(
156
+ ctx: CanvasRenderingContext2D,
157
+ location: { x: number, y: number },
158
+ text: string,
159
+ shape: "oval" | "rectangular" | "cloud" | "thought",
160
+ line: "handdrawn" | "straight" | "bubble" | "chaotic",
161
+ font: string,
162
+ characterDetected: boolean,
163
+ imageWidth: number,
164
+ imageHeight: number,
165
+ characterBoundingBox: { top: number, left: number, width: number, height: number } | null
166
+ ) {
167
+ const bubbleWidth = Math.min(300, imageWidth * 0.4);
168
+ const bubbleHeight = Math.min(150, imageHeight * 0.3);
169
+ const padding = 20;
170
+
171
+ const fontSize = Math.max(15, Math.min(30, 500 / text.length)); // Increased font size by 25%
172
+ ctx.font = `${fontSize}px ${font}`;
173
+
174
+ const wrappedText = wrapText(ctx, text, bubbleWidth - padding * 2);
175
+ const textDimensions = measureTextDimensions(ctx, wrappedText);
176
+
177
+ const finalWidth = Math.max(bubbleWidth, textDimensions.width + padding * 2);
178
+ const finalHeight = Math.max(bubbleHeight, textDimensions.height + padding * 2);
179
+
180
+ const bubbleLocation = {
181
+ x: Math.max(finalWidth / 2, Math.min(imageWidth - finalWidth / 2, location.x)),
182
+ y: Math.max(finalHeight / 2, Math.min(imageHeight - finalHeight / 2, location.y - finalHeight))
183
+ };
184
+
185
+ ctx.fillStyle = 'white';
186
+ ctx.strokeStyle = 'black';
187
+ ctx.lineWidth = 2;
188
+
189
+ ctx.beginPath();
190
+ drawBubbleShape(ctx, shape, bubbleLocation, finalWidth, finalHeight, location);
191
+ ctx.fill();
192
+ ctx.stroke();
193
+
194
+ applyLineStyle(ctx, line);
195
+
196
+ const tailTarget = characterBoundingBox
197
+ ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top + characterBoundingBox.height * 0.2 }
198
+ : location;
199
+
200
+ drawTail(ctx, bubbleLocation, finalWidth, finalHeight, tailTarget, shape);
201
+
202
+ ctx.fillStyle = 'black';
203
+ ctx.textAlign = 'center';
204
+ ctx.textBaseline = 'middle';
205
+ drawFormattedText(ctx, wrappedText, bubbleLocation.x, bubbleLocation.y, finalWidth - padding * 2, fontSize);
206
+ }
207
+
208
+ function drawBubbleShape(
209
+ ctx: CanvasRenderingContext2D,
210
+ shape: "oval" | "rectangular" | "cloud" | "thought",
211
+ bubbleLocation: { x: number, y: number },
212
+ width: number,
213
+ height: number,
214
+ tailTarget: { x: number, y: number }
215
+ ) {
216
+ switch (shape) {
217
+ case "oval":
218
+ drawOvalBubble(ctx, bubbleLocation, width, height);
219
+ break;
220
+ case "rectangular":
221
+ drawRectangularBubble(ctx, bubbleLocation, width, height);
222
+ break;
223
+ case "cloud":
224
+ drawCloudBubble(ctx, bubbleLocation, width, height);
225
+ break;
226
+ case "thought":
227
+ drawThoughtBubble(ctx, bubbleLocation, width, height);
228
+ break;
229
+ }
230
+ }
231
+
232
+ function drawOvalBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
233
+ ctx.beginPath();
234
+ ctx.ellipse(location.x, location.y, width / 2, height / 2, 0, 0, 2 * Math.PI);
235
+ ctx.closePath();
236
+ }
237
+
238
+ function drawRectangularBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
239
+ const radius = 20;
240
+ ctx.beginPath();
241
+ ctx.moveTo(location.x - width / 2 + radius, location.y - height / 2);
242
+ ctx.lineTo(location.x + width / 2 - radius, location.y - height / 2);
243
+ ctx.quadraticCurveTo(location.x + width / 2, location.y - height / 2, location.x + width / 2, location.y - height / 2 + radius);
244
+ ctx.lineTo(location.x + width / 2, location.y + height / 2 - radius);
245
+ ctx.quadraticCurveTo(location.x + width / 2, location.y + height / 2, location.x + width / 2 - radius, location.y + height / 2);
246
+ ctx.lineTo(location.x - width / 2 + radius, location.y + height / 2);
247
+ ctx.quadraticCurveTo(location.x - width / 2, location.y + height / 2, location.x - width / 2, location.y + height / 2 - radius);
248
+ ctx.lineTo(location.x - width / 2, location.y - height / 2 + radius);
249
+ ctx.quadraticCurveTo(location.x - width / 2, location.y - height / 2, location.x - width / 2 + radius, location.y - height / 2);
250
+ ctx.closePath();
251
+ }
252
+
253
+ function drawCloudBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
254
+ const numBumps = Math.floor(width / 40);
255
+ const bumpRadius = width / (numBumps * 2);
256
+
257
+ ctx.beginPath();
258
+ ctx.moveTo(location.x - width / 2 + bumpRadius, location.y);
259
+
260
+ // Top
261
+ for (let i = 0; i < numBumps; i++) {
262
+ const x = location.x - width / 2 + (i * 2 + 1) * bumpRadius;
263
+ const y = location.y - height / 2;
264
+ ctx.quadraticCurveTo(x, y - bumpRadius / 2, x + bumpRadius, y);
265
+ }
266
+
267
+ // Right
268
+ for (let i = 0; i < numBumps / 2; i++) {
269
+ const x = location.x + width / 2;
270
+ const y = location.y - height / 2 + (i * 2 + 1) * bumpRadius * 2;
271
+ ctx.quadraticCurveTo(x + bumpRadius / 2, y, x, y + bumpRadius * 2);
272
+ }
273
+
274
+ // Bottom
275
+ for (let i = numBumps; i > 0; i--) {
276
+ const x = location.x - width / 2 + (i * 2 - 1) * bumpRadius;
277
+ const y = location.y + height / 2;
278
+ ctx.quadraticCurveTo(x, y + bumpRadius / 2, x - bumpRadius, y);
279
+ }
280
+
281
+ // Left
282
+ for (let i = numBumps / 2; i > 0; i--) {
283
+ const x = location.x - width / 2;
284
+ const y = location.y - height / 2 + (i * 2 - 1) * bumpRadius * 2;
285
+ ctx.quadraticCurveTo(x - bumpRadius / 2, y, x, y - bumpRadius * 2);
286
+ }
287
+ ctx.closePath();
288
+ }
289
+
290
+ function drawThoughtBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
291
+ drawCloudBubble(ctx, location, width, height);
292
+ // The tail for thought bubbles is handled in the drawTail function
293
+ }
294
+
295
+ function drawTail(
296
+ ctx: CanvasRenderingContext2D,
297
+ bubbleLocation: { x: number, y: number },
298
+ width: number,
299
+ height: number,
300
+ tailTarget: { x: number, y: number },
301
+ shape: string
302
+ ) {
303
+ const tailLength = Math.min(50, height / 2);
304
+ const startX = bubbleLocation.x + (tailTarget.x > bubbleLocation.x ? width / 4 : -width / 4);
305
+ const startY = bubbleLocation.y + height / 2;
306
+
307
+ ctx.beginPath();
308
+ ctx.moveTo(startX, startY);
309
+
310
+ if (shape === "thought") {
311
+ const bubbleCount = 3;
312
+ for (let i = 0; i < bubbleCount; i++) {
313
+ const t = (i + 1) / (bubbleCount + 1);
314
+ const x = startX + (tailTarget.x - startX) * t;
315
+ const y = startY + (tailTarget.y - startY) * t;
316
+ const radius = 5 * (1 - t);
317
+ ctx.lineTo(x - radius, y);
318
+ ctx.arc(x, y, radius, 0, Math.PI * 2);
319
+ }
320
+ } else {
321
+ const controlX = (startX + tailTarget.x) / 2;
322
+ const controlY = (startY + tailTarget.y + 20) / 2;
323
+ ctx.quadraticCurveTo(controlX, controlY, tailTarget.x, tailTarget.y);
324
+ ctx.quadraticCurveTo(controlX, controlY, startX + (tailTarget.x > bubbleLocation.x ? -10 : 10), startY);
325
+ }
326
+ ctx.closePath();
327
+ ctx.fill();
328
+ ctx.stroke();
329
+ }
330
+
331
+ function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): { top: number, left: number, width: number, height: number } {
332
+ let minX = width, minY = height, maxX = 0, maxY = 0;
333
+ for (let y = 0; y < height; y++) {
334
+ for (let x = 0; x < width; x++) {
335
+ const index = y * width + x;
336
+ if (mask[index] > 0) {
337
+ minX = Math.min(minX, x);
338
+ minY = Math.min(minY, y);
339
+ maxX = Math.max(maxX, x);
340
+ maxY = Math.max(maxY, y);
341
+ }
342
+ }
343
+ }
344
+ return {
345
+ top: minY,
346
+ left: minX,
347
+ width: maxX - minX,
348
+ height: maxY - minY
349
+ };
350
+ }
351
+
352
+ function applyLineStyle(ctx: CanvasRenderingContext2D, style: string) {
353
+ switch (style) {
354
+ case "handdrawn":
355
+ ctx.setLineDash([5, 5]);
356
+ break;
357
+ case "straight":
358
+ ctx.setLineDash([]);
359
+ break;
360
+ case "bubble":
361
+ ctx.setLineDash([0, 10]);
362
+ ctx.lineCap = "round";
363
+ break;
364
+ case "chaotic":
365
+ ctx.setLineDash([10, 5, 2, 5]);
366
+ break;
367
+ }
368
+ }
369
+
370
+ function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number): string[] {
371
+ const words = text.split(' ');
372
+ const lines: string[] = [];
373
+ let currentLine = '';
374
+
375
+ for (const word of words) {
376
+ const testLine = currentLine + (currentLine ? ' ' : '') + word;
377
+ const metrics = ctx.measureText(testLine);
378
+
379
+ if (metrics.width > maxWidth || word.endsWith('.') || word.endsWith(',')) {
380
+ lines.push(currentLine);
381
+ currentLine = word;
382
+ } else {
383
+ currentLine = testLine;
384
+ }
385
+ }
386
+
387
+ if (currentLine) {
388
+ lines.push(currentLine);
389
+ }
390
+
391
+ return lines;
392
+ }
393
+
394
+
395
+ function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[]): { width: number, height: number } {
396
+ let maxWidth = 0;
397
+ const lineHeight = ctx.measureText('M').width * 1.2;
398
+ const height = lineHeight * lines.length;
399
+
400
+ for (const line of lines) {
401
+ const metrics = ctx.measureText(line);
402
+ maxWidth = Math.max(maxWidth, metrics.width);
403
+ }
404
+
405
+ return { width: maxWidth, height };
406
+ }
407
+
408
+ function drawFormattedText(ctx: CanvasRenderingContext2D, lines: string[], x: number, y: number, maxWidth: number, fontSize: number) {
409
+ const lineHeight = fontSize * 1.2;
410
+ const totalHeight = lineHeight * lines.length;
411
+ let startY = y - totalHeight / 2 + lineHeight / 2;
412
+
413
+ for (let i = 0; i < lines.length; i++) {
414
+ const line = lines[i];
415
+ const lineY = startY + i * lineHeight;
416
+ const maxLineWidth = Math.min(maxWidth, maxWidth * (1 - Math.abs(i - (lines.length - 1) / 2) / lines.length));
417
+ ctx.fillText(line, x, lineY, maxLineWidth);
418
+ }
419
+ }
src/lib/createLlamaPrompt.ts CHANGED
@@ -3,7 +3,7 @@ export function createLlamaPrompt(messages: Array<{ role: string, content: strin
3
  const B_INST = "[INST]", E_INST = "[/INST]";
4
  const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
5
  const BOS = "<s>", EOS = "</s>";
6
- const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
7
 
8
  if (messages[0].role != "system"){
9
  messages = [
 
3
  const B_INST = "[INST]", E_INST = "[/INST]";
4
  const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
5
  const BOS = "<s>", EOS = "</s>";
6
+ const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories, speeches and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
7
 
8
  if (messages[0].role != "system"){
9
  messages = [
src/lib/dirtyGeneratedPanelCleaner.ts CHANGED
@@ -3,8 +3,10 @@ import { GeneratedPanel } from "@/types"
3
  export function dirtyGeneratedPanelCleaner({
4
  panel,
5
  instructions,
 
6
  caption
7
  }: GeneratedPanel): GeneratedPanel {
 
8
  let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
9
  let newInstructions = (
10
  // need to remove from LLM garbage here, too
@@ -34,6 +36,7 @@ export function dirtyGeneratedPanelCleaner({
34
  return {
35
  panel,
36
  instructions: newInstructions,
 
37
  caption: newCaption,
38
  }
39
  }
 
3
  export function dirtyGeneratedPanelCleaner({
4
  panel,
5
  instructions,
6
+ speech,
7
  caption
8
  }: GeneratedPanel): GeneratedPanel {
9
+ let newSpeech = `${speech || ""}`.split(":").pop()?.trim() || ""
10
  let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
11
  let newInstructions = (
12
  // need to remove from LLM garbage here, too
 
36
  return {
37
  panel,
38
  instructions: newInstructions,
39
+ speech: newSpeech,
40
  caption: newCaption,
41
  }
42
  }
src/lib/dirtyGeneratedPanelsParser.ts CHANGED
@@ -14,15 +14,18 @@ export function dirtyGeneratedPanelsParser(input: string): GeneratedPanel[] {
14
 
15
  const results = jsonData.map((item, i) => {
16
  let panel = i
 
17
  let caption = item.caption ? item.caption.trim() : ''
18
  let instructions = item.instructions ? item.instructions.trim() : ''
19
- if (!instructions && caption) {
 
 
20
  instructions = caption
21
  }
22
  if (!caption && instructions) {
23
  caption = instructions
24
  }
25
- return { panel, caption, instructions }
26
  })
27
 
28
  return results
 
14
 
15
  const results = jsonData.map((item, i) => {
16
  let panel = i
17
+ let speech = item.speech ? item.speech.trim() : ''
18
  let caption = item.caption ? item.caption.trim() : ''
19
  let instructions = item.instructions ? item.instructions.trim() : ''
20
+ if (!instructions && !caption && speech) {
21
+ instructions = speech
22
+ } else if (!instructions && caption) {
23
  instructions = caption
24
  }
25
  if (!caption && instructions) {
26
  caption = instructions
27
  }
28
+ return { panel, speech, caption, instructions }
29
  })
30
 
31
  return results
src/lib/parseBadJSON.ts CHANGED
@@ -5,7 +5,7 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
5
  try {
6
  return JSON.parse(jsonLikeString) as GeneratedPanels
7
  } catch (err) {
8
- var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
9
 
10
  let results = [];
11
  let match;
@@ -14,7 +14,8 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
14
  let json = {
15
  panel: Number(match[1]),
16
  instructions: match[2],
17
- caption: match[3]
 
18
  };
19
  results.push(json);
20
  }
 
5
  try {
6
  return JSON.parse(jsonLikeString) as GeneratedPanels
7
  } catch (err) {
8
+ var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"speech"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
9
 
10
  let results = [];
11
  let match;
 
14
  let json = {
15
  panel: Number(match[1]),
16
  instructions: match[2],
17
+ speech: match[3],
18
+ caption: match[4]
19
  };
20
  results.push(json);
21
  }
src/types.ts CHANGED
@@ -89,6 +89,7 @@ export interface ImageAnalysisResponse {
89
  export type GeneratedPanel = {
90
  panel: number
91
  instructions: string
 
92
  caption: string
93
  }
94
 
 
89
  export type GeneratedPanel = {
90
  panel: number
91
  instructions: string
92
+ speech: string
93
  caption: string
94
  }
95