Spaces:

LandingAI
/

vision-agent

Sleeping

File size: 5,504 Bytes

'use client';

import { useRouter } from 'next/navigation';

import { useRef } from 'react';
import Composer, { ComposerRef } from '@/components/chat/Composer';
import { dbPostCreateChat } from '@/lib/db/functions';
import { nanoid } from '@/lib/utils';
import Chip from '@/components/ui/Chip';
import {
  IconArrowDown,
  IconArrowUpRight,
  IconCaretDown,
  IconSpark,
} from '@/components/ui/Icons';
import { EXAMPLES } from '@/lib/constants';
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert';
import { ThumbsDown, ThumbsUp } from 'lucide-react';
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from '@/components/ui/collapsible';
import { CodeBlock } from '@/components/ui/CodeBlock';

export default function Page() {
  const router = useRouter();
  const composerRef = useRef<ComposerRef>(null);
  return (
    <div className="h-screen w-screen homepage">
      <div className="mx-auto w-[42rem] max-w-full px-4 mt-24">
        <h1 className="mb-4 text-center relative">
          Vision Agent
          <Chip className="absolute bg-green-100 text-green-500">BETA</Chip>
        </h1>
        <h4 className="text-center">
          Generate code to solve your vision problem with simple prompts.
        </h4>
        <div className="my-8">
          <Composer
            ref={composerRef}
            onSubmit={async ({ input, mediaUrl }) => {
              const newId = nanoid();
              const resp = await dbPostCreateChat({
                id: newId,
                title: `conversation-${newId}`,
                mediaUrl,
                message: {
                  prompt: input,
                  mediaUrl,
                },
              });
              if (resp) {
                router.push(`/chat/${newId}`);
              }
            }}
          />
        </div>
        {EXAMPLES.map((example, index) => {
          return (
            <Chip
              key={index}
              className="bg-transparent border border-zinc-500 cursor-pointer px-2 py-0.5"
              onClick={() => {
                composerRef.current?.setInput(example.prompt);
                composerRef.current?.setMediaUrl(example.mediaUrl);
              }}
            >
              <div className="flex flex-row items-center space-x-2">
                <p className="text-primary text-sm">{example.title}</p>
                <IconArrowUpRight className="text-primary" />
              </div>
            </Chip>
          );
        })}
        <Collapsible className="mt-8 bg-zinc-800 relative w-full rounded-lg border px-4 py-3 text-sm [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-foreground [&>svg~*]:pl-7">
          <CollapsibleTrigger className="flex flex-row items-center space-x-4 w-full">
            <IconSpark />
            <h4 className="font-bold grow text-left">
              Vision Agent prompting tips
            </h4>
            <IconCaretDown />
          </CollapsibleTrigger>
          <CollapsibleContent className="mt-4 CollapsibleContent text-sm space-y-2 [&_p]:leading-relaxed">
            <p>
              <span className="font-bold">Be specific:</span> Give concrete
              instructions to what you desire as output, avoid vague questions.
            </p>
            <div className="flex flex-row space-x-2 justify-start items-center w-full">
              <div className="w-1/8">
                <ThumbsUp className="text-green-500 size-5" />
              </div>
              <p className="italic w-7/8">
                Detect people wearing helmet by detecting people, then detecting
                helmets, a person is wearing a helmet if the helmet is detected
                near the person.
              </p>
            </div>
            <div className="flex flex-row space-x-2 justify-start items-center w-full">
              <div className="w-1/8">
                <ThumbsDown className="text-red-500 size-5" />
              </div>
              <p className="italic w-7/8">Detect people wearing helmet</p>
            </div>
            <p>
              <span className="font-bold">Start simple:</span> Start with simple
              prompt to understand underlying tool performance first
            </p>
            <div className="flex flex-row space-x-2 justify-start items-center w-full">
              <div className="w-1/8"> - </div>
              <p className="italic w-7/8">
                Can you run OCR on this image and plot the detected text?
              </p>
            </div>
            <div className="flex flex-row space-x-2 justify-start items-center w-full">
              <div className="w-1/8"> - </div>
              <p className="italic w-7/8">
                Can you detect the people in this image and visualize the
                result?
              </p>
            </div>
            <p>
              <span className="font-bold">Focus on single problem:</span> Each
              conversation should focus on solving single problem, start new
              conversations when switching task
            </p>
            <p>
              <span className="font-bold">Ask for visualization:</span> You can
              simply add{' '}
              <span className="font-bold font-sans">Visualize the result</span>{' '}
              at the end of your prompt to visualize the result
            </p>
          </CollapsibleContent>
        </Collapsible>
      </div>
    </div>
  );
}