lm-watermarking

Runtime error

App Files Files Community

jwkirchenbauer commited on Mar 9, 2023

Commit

a7d76f1

1 Parent(s): a230391

Added a welcome, paper tldr tab

Browse files

Files changed (2) hide show

app.py +1 -1
demo_watermark.py +90 -45

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ arg_dict = {
     'sampling_temp': 0.7,
     'use_gpu': True,
     'seeding_scheme': 'simple_1',
-    'gamma': 0.25,
     'delta': 2.0,
     'normalizers': '',
     'ignore_repeated_bigrams': False,

     'sampling_temp': 0.7,
     'use_gpu': True,
     'seeding_scheme': 'simple_1',
+    'gamma': 0.5,
     'delta': 2.0,
     'normalizers': '',
     'ignore_repeated_bigrams': False,

demo_watermark.py CHANGED Viewed

@@ -343,49 +343,63 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                 [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
                 """
                 )
-            # with gr.Column(scale=2):
-            #     pass
-            # ![visitor badge](https://visitor-badge.glitch.me/badge?page_id=tomg-group-umd_lm-watermarking) # buggy
-        with gr.Accordion("Understanding the output metrics",open=False):
-            gr.Markdown(
-            """
-            - `z-score threshold` : The cuttoff for the hypothesis test
-            - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
-                The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
-                a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
-                described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
-            - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
-            - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
-            - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
-                we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
-            - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
-                observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
-                If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
-            -  `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
-            - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
-                the confidence of the detection based on the unlikeliness of this `z-score` observation.
-            """
-            )
-        with gr.Accordion("A note on model capability",open=True):
-            gr.Markdown(
-                """
-                This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
-                Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
-                For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
-                Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
-                Longer prompts that end mid-sentence will result in more fluent generations.
-                """
-                )
-        gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
         # Construct state for parameters, define updates and toggles
         default_prompt = args.__dict__.pop("default_prompt")
         session_args = gr.State(value=args)
-        with gr.Tab("Generate and Detect"):
             with gr.Row():
                 prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
@@ -463,7 +477,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                     with gr.Column(scale=1):
                         select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
-        with gr.Accordion("Understanding the settings",open=False):
             gr.Markdown(
             """
             #### Generation Parameters:
@@ -515,6 +530,27 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
             """
             )
         gr.HTML("""
                 <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
                     Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
@@ -532,7 +568,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
         output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
         output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
         # Register main detection tab click
-        detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
         # State management logic
         # update callbacks that change the state dict
@@ -624,10 +661,15 @@ def main(args):
         model, tokenizer, device = load_model(args)
     else:
         model, tokenizer, device = None, None, None
-    # Generate and detect, report to stdout
-    if not args.skip_model_load:
-        input_text = (
         "The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
         "species of turtle native to the brackish coastal tidal marshes of the "
         "Northeastern and southern United States, and in Bermuda.[6] It belongs "
@@ -648,9 +690,12 @@ def main(args):
         "or white. All have a unique pattern of wiggly, black markings or spots "
         "on their body and head. The diamondback terrapin has large webbed "
         "feet.[9] The species is"
-        )
-        args.default_prompt = input_text
         term_width = 80
         print("#"*term_width)

                 [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
                 """
                 )
+                gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
         # Construct state for parameters, define updates and toggles
         default_prompt = args.__dict__.pop("default_prompt")
         session_args = gr.State(value=args)
+        with gr.Tab("Welcome"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    gr.Markdown(
+                        """
+                        Potential harms of large language models can be mitigated by *watermarking* a model's output.
+                        *Watermarks* are embedded signals in the generated text that are invisible to humans but algorithmically
+                        detectable, that allow *anyone* to later check whether a given span of text
+                        was likely to have been generated by a model that uses the watermark.
+                        This space showcases a watermarking approach that can be applied to _any_ generative language model.
+                        For demonstration purposes, the space serves a "small" multi-billion parameter model (see the following note for caveats due to small size).
+                        """
+                        )
+                    with gr.Accordion("A note on model generation quality",open=False):
+                        gr.Markdown(
+                            """
+                            This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
+                            Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
+                            For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
+                            Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
+                            Longer prompts that end mid-sentence will result in more fluent generations.
+                            """
+                            )
+                    gr.Markdown(
+                        """
+                        **[Generate & Detect]**: The first tab shows that the watermark can be embedded with
+                        negligible impact on text quality. You can try any prompt and compare the quality of
+                        normal text (*Output Without Watermark*) to the watermarked text (*Output With Watermark*) below it.
+                        Metrics on the right show that the watermark can be reliably detected.
+                        Detection is very efficient and does not use the language model or its parameters.
+                        **[Detector Only]**: You can also copy-paste the watermarked text (or any other text)
+                        into the second tab. This can be used to see how many sentences you could remove and still detect the watermark.
+                        You can also verify here that the detection has, by design, a low false-positive rate;
+                        This means that human-generated text that you copy into this detector will not be marked as machine-generated.
+                        You can find more details on how this watermark functions in our [ArXiv preprint](https://arxiv.org/abs/2301.10226).
+                        """
+                        )
+                with gr.Column(scale=1):
+                    gr.Markdown(
+                        """
+                        ![](https://drive.google.com/uc?export=view&id=1yVLPcjm-xvaCjQyc3FGLsWIU84v1QRoC)
+                        """
+                    )
+        with gr.Tab("Generate & Detect"):
             with gr.Row():
                 prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
                     with gr.Column(scale=1):
                         select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
+        with gr.Accordion("What do the settings do?",open=False):
             gr.Markdown(
             """
             #### Generation Parameters:
             """
             )
+        with gr.Accordion("What do the output metrics mean?",open=False):
+            gr.Markdown(
+            """
+            - `z-score threshold` : The cuttoff for the hypothesis test
+            - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
+                The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
+                a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
+                described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
+            - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
+            - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
+            - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
+                we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
+            - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
+                observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
+                If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
+            -  `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
+            - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
+                the confidence of the detection based on the unlikeliness of this `z-score` observation.
+            """
+            )
         gr.HTML("""
                 <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
                     Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
         output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
         output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
         # Register main detection tab click
+        # detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
+        detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args], api_name="detection")
         # State management logic
         # update callbacks that change the state dict
         model, tokenizer, device = load_model(args)
     else:
         model, tokenizer, device = None, None, None
+        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
+        if args.use_gpu:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            device = "cpu"
+    # terrapin example
+    input_text = (
         "The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
         "species of turtle native to the brackish coastal tidal marshes of the "
         "Northeastern and southern United States, and in Bermuda.[6] It belongs "
         "or white. All have a unique pattern of wiggly, black markings or spots "
         "on their body and head. The diamondback terrapin has large webbed "
         "feet.[9] The species is"
+    )
+    args.default_prompt = input_text
+    # Generate and detect, report to stdout
+    if not args.skip_model_load:
         term_width = 80
         print("#"*term_width)