vision-agent / app /page.tsx
MingruiZhang's picture
feat: tips and new example (#119)
2fc64b7 unverified
'use client';
import { useRouter } from 'next/navigation';
import { useRef } from 'react';
import Composer, { ComposerRef } from '@/components/chat/Composer';
import { dbPostCreateChat } from '@/lib/db/functions';
import { nanoid } from '@/lib/utils';
import Chip from '@/components/ui/Chip';
import {
IconArrowDown,
IconArrowUpRight,
IconCaretDown,
IconSpark,
} from '@/components/ui/Icons';
import { EXAMPLES } from '@/lib/constants';
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert';
import { ThumbsDown, ThumbsUp } from 'lucide-react';
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from '@/components/ui/collapsible';
import { CodeBlock } from '@/components/ui/CodeBlock';
export default function Page() {
const router = useRouter();
const composerRef = useRef<ComposerRef>(null);
return (
<div className="h-screen w-screen homepage">
<div className="mx-auto w-[42rem] max-w-full px-4 mt-24">
<h1 className="mb-4 text-center relative">
Vision Agent
<Chip className="absolute bg-green-100 text-green-500">BETA</Chip>
</h1>
<h4 className="text-center">
Generate code to solve your vision problem with simple prompts.
</h4>
<div className="my-8">
<Composer
ref={composerRef}
onSubmit={async ({ input, mediaUrl }) => {
const newId = nanoid();
const resp = await dbPostCreateChat({
id: newId,
title: `conversation-${newId}`,
mediaUrl,
message: {
prompt: input,
mediaUrl,
},
});
if (resp) {
router.push(`/chat/${newId}`);
}
}}
/>
</div>
{EXAMPLES.map((example, index) => {
return (
<Chip
key={index}
className="bg-transparent border border-zinc-500 cursor-pointer px-2 py-0.5"
onClick={() => {
composerRef.current?.setInput(example.prompt);
composerRef.current?.setMediaUrl(example.mediaUrl);
}}
>
<div className="flex flex-row items-center space-x-2">
<p className="text-primary text-sm">{example.title}</p>
<IconArrowUpRight className="text-primary" />
</div>
</Chip>
);
})}
<Collapsible className="mt-8 bg-zinc-800 relative w-full rounded-lg border px-4 py-3 text-sm [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-foreground [&>svg~*]:pl-7">
<CollapsibleTrigger className="flex flex-row items-center space-x-4 w-full">
<IconSpark />
<h4 className="font-bold grow text-left">
Vision Agent prompting tips
</h4>
<IconCaretDown />
</CollapsibleTrigger>
<CollapsibleContent className="mt-4 CollapsibleContent text-sm space-y-2 [&_p]:leading-relaxed">
<p>
<span className="font-bold">Be specific:</span> Give concrete
instructions to what you desire as output, avoid vague questions.
</p>
<div className="flex flex-row space-x-2 justify-start items-center w-full">
<div className="w-1/8">
<ThumbsUp className="text-green-500 size-5" />
</div>
<p className="italic w-7/8">
Detect people wearing helmet by detecting people, then detecting
helmets, a person is wearing a helmet if the helmet is detected
near the person.
</p>
</div>
<div className="flex flex-row space-x-2 justify-start items-center w-full">
<div className="w-1/8">
<ThumbsDown className="text-red-500 size-5" />
</div>
<p className="italic w-7/8">Detect people wearing helmet</p>
</div>
<p>
<span className="font-bold">Start simple:</span> Start with simple
prompt to understand underlying tool performance first
</p>
<div className="flex flex-row space-x-2 justify-start items-center w-full">
<div className="w-1/8"> - </div>
<p className="italic w-7/8">
Can you run OCR on this image and plot the detected text?
</p>
</div>
<div className="flex flex-row space-x-2 justify-start items-center w-full">
<div className="w-1/8"> - </div>
<p className="italic w-7/8">
Can you detect the people in this image and visualize the
result?
</p>
</div>
<p>
<span className="font-bold">Focus on single problem:</span> Each
conversation should focus on solving single problem, start new
conversations when switching task
</p>
<p>
<span className="font-bold">Ask for visualization:</span> You can
simply add{' '}
<span className="font-bold font-sans">Visualize the result</span>{' '}
at the end of your prompt to visualize the result
</p>
</CollapsibleContent>
</Collapsible>
</div>
</div>
);
}