jbilcke-hf HF staff commited on
Commit
b843e59
·
1 Parent(s): 2e12a66

improve speech bubbles

Browse files
src/app/queries/getSystemPrompt.ts CHANGED
@@ -19,7 +19,7 @@ export function getSystemPrompt({
19
  }) {
20
  return [
21
  `You are a writer specialized in ${preset.llmPrompt}`,
22
- `Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Speeches are the dialogues, so they MUST be written in 1st person style. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
23
  `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
24
  // `Give your response as Markdown bullet points.`,
25
  `Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. Write speeces in 1st person style, with intensity, humor etc. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
 
19
  }) {
20
  return [
21
  `You are a writer specialized in ${preset.llmPrompt}`,
22
+ `Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Speeches are the dialogues, so they MUST be written in 1st person style, and be short, eg a couple of short sentences. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
23
  `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
24
  // `Give your response as Markdown bullet points.`,
25
  `Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. Write speeces in 1st person style, with intensity, humor etc. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
src/lib/bubble/injectSpeechBubbleInTheBackground.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
2
  import { actionman } from "../fonts";
3
 
4
  interface BoundingBox {
@@ -53,7 +53,7 @@ export async function injectSpeechBubbleInTheBackground(params: {
53
  outputConfidenceMasks: false
54
  });
55
 
56
- const segmentationResult = imageSegmenter.segment(image);
57
  let characterBoundingBox: BoundingBox | null = null;
58
 
59
  if (segmentationResult.categoryMask) {
@@ -85,24 +85,61 @@ function loadImage(base64: string): Promise<HTMLImageElement> {
85
  });
86
  }
87
 
88
- function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): BoundingBox {
89
- let minX = width, minY = height, maxX = 0, maxY = 0;
 
 
90
  for (let y = 0; y < height; y++) {
91
  for (let x = 0; x < width; x++) {
92
  const index = y * width + x;
93
- if (mask[index] > 0) {
94
- minX = Math.min(minX, x);
95
- minY = Math.min(minY, y);
96
- maxX = Math.max(maxX, x);
97
- maxY = Math.max(maxY, y);
98
  }
99
  }
100
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return {
102
- top: minY,
103
  left: minX,
104
- width: maxX - minX,
105
- height: maxY - minY
 
106
  };
107
  }
108
 
@@ -134,13 +171,21 @@ function calculateBubbleLocations(
134
  const padding = 50;
135
  const availableWidth = imageWidth - padding * 2;
136
  const availableHeight = imageHeight - padding * 2;
137
- const maxAttempts = 50;
138
 
139
  for (let i = 0; i < bubbleCount; i++) {
140
  let x, y;
141
  let attempts = 0;
142
  do {
143
- x = Math.random() * availableWidth + padding;
 
 
 
 
 
 
 
 
144
  y = (i / bubbleCount) * availableHeight + padding;
145
  attempts++;
146
 
@@ -224,8 +269,8 @@ function drawSpeechBubble(
224
  const fontSize = 20;
225
  ctx.font = `${fontSize}px ${font}`;
226
 
227
- // Adjust maximum width to account for border padding
228
- const maxBubbleWidth = imageWidth - 2 * borderPadding;
229
  const wrappedText = wrapText(ctx, text, maxBubbleWidth - padding * 2, fontSize);
230
  const textDimensions = measureTextDimensions(ctx, wrappedText, fontSize);
231
 
@@ -347,13 +392,12 @@ function adjustBubbleLocation(
347
 
348
  // Ensure the bubble doesn't overlap with the character
349
  if (characterBoundingBox) {
350
- if (
351
- adjustedX > characterBoundingBox.left &&
352
- adjustedX < characterBoundingBox.left + characterBoundingBox.width
353
- ) {
354
- adjustedX = characterBoundingBox.left > imageWidth / 2
355
- ? characterBoundingBox.left - width / 2 - 10
356
- : characterBoundingBox.left + characterBoundingBox.width + width / 2 + 10;
357
  }
358
  }
359
 
 
1
+ import { ImageSegmenter, FilesetResolver, ImageSegmenterResult } from "@mediapipe/tasks-vision"
2
  import { actionman } from "../fonts";
3
 
4
  interface BoundingBox {
 
53
  outputConfidenceMasks: false
54
  });
55
 
56
+ const segmentationResult: ImageSegmenterResult = imageSegmenter.segment(image);
57
  let characterBoundingBox: BoundingBox | null = null;
58
 
59
  if (segmentationResult.categoryMask) {
 
85
  });
86
  }
87
 
88
+ function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): BoundingBox | null {
89
+ let shapes: BoundingBox[] = [];
90
+ let visited = new Set<number>();
91
+
92
  for (let y = 0; y < height; y++) {
93
  for (let x = 0; x < width; x++) {
94
  const index = y * width + x;
95
+ if (mask[index] > 0 && !visited.has(index)) {
96
+ let shape = floodFill(mask, width, height, x, y, visited);
97
+ shapes.push(shape);
 
 
98
  }
99
  }
100
  }
101
+
102
+ // Sort shapes by area (descending) and filter out small shapes
103
+ shapes = shapes
104
+ .filter(shape => (shape.width * shape.height) > (width * height * 0.01))
105
+ .sort((a, b) => (b.width * b.height) - (a.width * a.height));
106
+
107
+ // Find the most vertically rectangular shape
108
+ let mostVerticalShape = shapes.reduce((prev, current) => {
109
+ let prevRatio = prev.height / prev.width;
110
+ let currentRatio = current.height / current.width;
111
+ return currentRatio > prevRatio ? current : prev;
112
+ });
113
+
114
+ return mostVerticalShape || null;
115
+ }
116
+
117
+ function floodFill(mask: Uint8Array, width: number, height: number, startX: number, startY: number, visited: Set<number>): BoundingBox {
118
+ let queue = [[startX, startY]];
119
+ let minX = startX, maxX = startX, minY = startY, maxY = startY;
120
+
121
+ while (queue.length > 0) {
122
+ let [x, y] = queue.pop()!;
123
+ let index = y * width + x;
124
+
125
+ if (x < 0 || x >= width || y < 0 || y >= height || mask[index] === 0 || visited.has(index)) {
126
+ continue;
127
+ }
128
+
129
+ visited.add(index);
130
+ minX = Math.min(minX, x);
131
+ maxX = Math.max(maxX, x);
132
+ minY = Math.min(minY, y);
133
+ maxY = Math.max(maxY, y);
134
+
135
+ queue.push([x+1, y], [x-1, y], [x, y+1], [x, y-1]);
136
+ }
137
+
138
  return {
 
139
  left: minX,
140
+ top: minY,
141
+ width: maxX - minX + 1,
142
+ height: maxY - minY + 1
143
  };
144
  }
145
 
 
171
  const padding = 50;
172
  const availableWidth = imageWidth - padding * 2;
173
  const availableHeight = imageHeight - padding * 2;
174
+ const maxAttempts = 100;
175
 
176
  for (let i = 0; i < bubbleCount; i++) {
177
  let x, y;
178
  let attempts = 0;
179
  do {
180
+ // Adjust x to avoid the middle of the character
181
+ if (characterBoundingBox) {
182
+ const characterMiddle = characterBoundingBox.left + characterBoundingBox.width / 2;
183
+ const leftSide = Math.random() * (characterMiddle - padding - padding);
184
+ const rightSide = characterMiddle + Math.random() * (imageWidth - characterMiddle - padding - padding);
185
+ x = Math.random() < 0.5 ? leftSide : rightSide;
186
+ } else {
187
+ x = Math.random() * availableWidth + padding;
188
+ }
189
  y = (i / bubbleCount) * availableHeight + padding;
190
  attempts++;
191
 
 
269
  const fontSize = 20;
270
  ctx.font = `${fontSize}px ${font}`;
271
 
272
+ // Adjust maximum width to account for border padding and limit to 33% of image width
273
+ const maxBubbleWidth = Math.min(imageWidth - 2 * borderPadding, imageWidth * 0.33);
274
  const wrappedText = wrapText(ctx, text, maxBubbleWidth - padding * 2, fontSize);
275
  const textDimensions = measureTextDimensions(ctx, wrappedText, fontSize);
276
 
 
392
 
393
  // Ensure the bubble doesn't overlap with the character
394
  if (characterBoundingBox) {
395
+ const characterMiddle = characterBoundingBox.left + characterBoundingBox.width / 2;
396
+ if (Math.abs(adjustedX - characterMiddle) < width / 2) {
397
+ // If the bubble is in the middle of the character, move it to the side
398
+ adjustedX = adjustedX < characterMiddle
399
+ ? Math.max(width / 2 + borderPadding, characterBoundingBox.left - width / 2 - 10)
400
+ : Math.min(imageWidth - width / 2 - borderPadding, characterBoundingBox.left + characterBoundingBox.width + width / 2 + 10);
 
401
  }
402
  }
403