Spaces:
Runtime error
Runtime error
jwkirchenbauer
commited on
Commit
·
a7d76f1
1
Parent(s):
a230391
Added a welcome, paper tldr tab
Browse files- app.py +1 -1
- demo_watermark.py +90 -45
app.py
CHANGED
@@ -35,7 +35,7 @@ arg_dict = {
|
|
35 |
'sampling_temp': 0.7,
|
36 |
'use_gpu': True,
|
37 |
'seeding_scheme': 'simple_1',
|
38 |
-
'gamma': 0.
|
39 |
'delta': 2.0,
|
40 |
'normalizers': '',
|
41 |
'ignore_repeated_bigrams': False,
|
|
|
35 |
'sampling_temp': 0.7,
|
36 |
'use_gpu': True,
|
37 |
'seeding_scheme': 'simple_1',
|
38 |
+
'gamma': 0.5,
|
39 |
'delta': 2.0,
|
40 |
'normalizers': '',
|
41 |
'ignore_repeated_bigrams': False,
|
demo_watermark.py
CHANGED
@@ -343,49 +343,63 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
343 |
[![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
|
344 |
"""
|
345 |
)
|
346 |
-
|
347 |
-
# pass
|
348 |
-
# ![visitor badge](https://visitor-badge.glitch.me/badge?page_id=tomg-group-umd_lm-watermarking) # buggy
|
349 |
-
|
350 |
-
with gr.Accordion("Understanding the output metrics",open=False):
|
351 |
-
gr.Markdown(
|
352 |
-
"""
|
353 |
-
- `z-score threshold` : The cuttoff for the hypothesis test
|
354 |
-
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
355 |
-
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
356 |
-
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
357 |
-
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
358 |
-
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
359 |
-
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
360 |
-
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
361 |
-
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
362 |
-
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
363 |
-
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
364 |
-
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
365 |
-
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
366 |
-
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
367 |
-
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
368 |
-
"""
|
369 |
-
)
|
370 |
-
|
371 |
-
with gr.Accordion("A note on model capability",open=True):
|
372 |
-
gr.Markdown(
|
373 |
-
"""
|
374 |
-
This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
|
375 |
-
|
376 |
-
Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
|
377 |
-
For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
|
378 |
-
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
379 |
-
Longer prompts that end mid-sentence will result in more fluent generations.
|
380 |
-
"""
|
381 |
-
)
|
382 |
-
gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
|
383 |
|
384 |
# Construct state for parameters, define updates and toggles
|
385 |
default_prompt = args.__dict__.pop("default_prompt")
|
386 |
session_args = gr.State(value=args)
|
387 |
|
388 |
-
with gr.Tab("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
with gr.Row():
|
391 |
prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
|
@@ -463,7 +477,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
463 |
with gr.Column(scale=1):
|
464 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
465 |
|
466 |
-
|
|
|
467 |
gr.Markdown(
|
468 |
"""
|
469 |
#### Generation Parameters:
|
@@ -515,6 +530,27 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
515 |
"""
|
516 |
)
|
517 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
518 |
gr.HTML("""
|
519 |
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
520 |
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
@@ -532,7 +568,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
532 |
output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
|
533 |
output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
|
534 |
# Register main detection tab click
|
535 |
-
detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
|
|
|
536 |
|
537 |
# State management logic
|
538 |
# update callbacks that change the state dict
|
@@ -624,10 +661,15 @@ def main(args):
|
|
624 |
model, tokenizer, device = load_model(args)
|
625 |
else:
|
626 |
model, tokenizer, device = None, None, None
|
|
|
|
|
|
|
|
|
|
|
627 |
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
"The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
|
632 |
"species of turtle native to the brackish coastal tidal marshes of the "
|
633 |
"Northeastern and southern United States, and in Bermuda.[6] It belongs "
|
@@ -648,9 +690,12 @@ def main(args):
|
|
648 |
"or white. All have a unique pattern of wiggly, black markings or spots "
|
649 |
"on their body and head. The diamondback terrapin has large webbed "
|
650 |
"feet.[9] The species is"
|
651 |
-
|
652 |
|
653 |
-
|
|
|
|
|
|
|
654 |
|
655 |
term_width = 80
|
656 |
print("#"*term_width)
|
|
|
343 |
[![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
|
344 |
"""
|
345 |
)
|
346 |
+
gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
# Construct state for parameters, define updates and toggles
|
349 |
default_prompt = args.__dict__.pop("default_prompt")
|
350 |
session_args = gr.State(value=args)
|
351 |
|
352 |
+
with gr.Tab("Welcome"):
|
353 |
+
with gr.Row():
|
354 |
+
with gr.Column(scale=2):
|
355 |
+
gr.Markdown(
|
356 |
+
"""
|
357 |
+
Potential harms of large language models can be mitigated by *watermarking* a model's output.
|
358 |
+
*Watermarks* are embedded signals in the generated text that are invisible to humans but algorithmically
|
359 |
+
detectable, that allow *anyone* to later check whether a given span of text
|
360 |
+
was likely to have been generated by a model that uses the watermark.
|
361 |
+
|
362 |
+
This space showcases a watermarking approach that can be applied to _any_ generative language model.
|
363 |
+
For demonstration purposes, the space serves a "small" multi-billion parameter model (see the following note for caveats due to small size).
|
364 |
+
"""
|
365 |
+
)
|
366 |
+
with gr.Accordion("A note on model generation quality",open=False):
|
367 |
+
gr.Markdown(
|
368 |
+
"""
|
369 |
+
This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
|
370 |
+
|
371 |
+
Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
|
372 |
+
For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
|
373 |
+
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
374 |
+
Longer prompts that end mid-sentence will result in more fluent generations.
|
375 |
+
"""
|
376 |
+
)
|
377 |
+
gr.Markdown(
|
378 |
+
"""
|
379 |
+
**[Generate & Detect]**: The first tab shows that the watermark can be embedded with
|
380 |
+
negligible impact on text quality. You can try any prompt and compare the quality of
|
381 |
+
normal text (*Output Without Watermark*) to the watermarked text (*Output With Watermark*) below it.
|
382 |
+
Metrics on the right show that the watermark can be reliably detected.
|
383 |
+
Detection is very efficient and does not use the language model or its parameters.
|
384 |
+
|
385 |
+
**[Detector Only]**: You can also copy-paste the watermarked text (or any other text)
|
386 |
+
into the second tab. This can be used to see how many sentences you could remove and still detect the watermark.
|
387 |
+
You can also verify here that the detection has, by design, a low false-positive rate;
|
388 |
+
This means that human-generated text that you copy into this detector will not be marked as machine-generated.
|
389 |
+
|
390 |
+
You can find more details on how this watermark functions in our [ArXiv preprint](https://arxiv.org/abs/2301.10226).
|
391 |
+
"""
|
392 |
+
)
|
393 |
+
|
394 |
+
with gr.Column(scale=1):
|
395 |
+
gr.Markdown(
|
396 |
+
"""
|
397 |
+
![](https://drive.google.com/uc?export=view&id=1yVLPcjm-xvaCjQyc3FGLsWIU84v1QRoC)
|
398 |
+
"""
|
399 |
+
)
|
400 |
+
|
401 |
+
|
402 |
+
with gr.Tab("Generate & Detect"):
|
403 |
|
404 |
with gr.Row():
|
405 |
prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
|
|
|
477 |
with gr.Column(scale=1):
|
478 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
479 |
|
480 |
+
|
481 |
+
with gr.Accordion("What do the settings do?",open=False):
|
482 |
gr.Markdown(
|
483 |
"""
|
484 |
#### Generation Parameters:
|
|
|
530 |
"""
|
531 |
)
|
532 |
|
533 |
+
with gr.Accordion("What do the output metrics mean?",open=False):
|
534 |
+
gr.Markdown(
|
535 |
+
"""
|
536 |
+
- `z-score threshold` : The cuttoff for the hypothesis test
|
537 |
+
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
538 |
+
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
539 |
+
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
540 |
+
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
541 |
+
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
542 |
+
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
543 |
+
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
544 |
+
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
545 |
+
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
546 |
+
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
547 |
+
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
548 |
+
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
549 |
+
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
550 |
+
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
551 |
+
"""
|
552 |
+
)
|
553 |
+
|
554 |
gr.HTML("""
|
555 |
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
556 |
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
|
|
568 |
output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
|
569 |
output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
|
570 |
# Register main detection tab click
|
571 |
+
# detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
|
572 |
+
detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args], api_name="detection")
|
573 |
|
574 |
# State management logic
|
575 |
# update callbacks that change the state dict
|
|
|
661 |
model, tokenizer, device = load_model(args)
|
662 |
else:
|
663 |
model, tokenizer, device = None, None, None
|
664 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
|
665 |
+
if args.use_gpu:
|
666 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
667 |
+
else:
|
668 |
+
device = "cpu"
|
669 |
|
670 |
+
|
671 |
+
# terrapin example
|
672 |
+
input_text = (
|
673 |
"The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
|
674 |
"species of turtle native to the brackish coastal tidal marshes of the "
|
675 |
"Northeastern and southern United States, and in Bermuda.[6] It belongs "
|
|
|
690 |
"or white. All have a unique pattern of wiggly, black markings or spots "
|
691 |
"on their body and head. The diamondback terrapin has large webbed "
|
692 |
"feet.[9] The species is"
|
693 |
+
)
|
694 |
|
695 |
+
args.default_prompt = input_text
|
696 |
+
|
697 |
+
# Generate and detect, report to stdout
|
698 |
+
if not args.skip_model_load:
|
699 |
|
700 |
term_width = 80
|
701 |
print("#"*term_width)
|