File size: 115,012 Bytes
7f989d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{\n",
" \"createdAt\": \"2024-06-19T09:44:35.578Z\",\n",
" \"id\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"metadata\": {},\n",
" \"name\": \"TEST\",\n",
" \"participant\": {},\n",
" \"steps\": [\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:48.221\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\",\n",
" \"input\": {\n",
" \"question\": \"What is inductive bias in CNNs?\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\",\n",
" \"chat_history\": [],\n",
" \"original_question\": \"What is inductive bias in CNNs?\",\n",
" \"question\": \"What is inductive bias in CNNs?\",\n",
" \"rephrased_question\": \"What is inductive bias in CNNs?\",\n",
" \"source_documents\": [\n",
" \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.4147438882496012}\",\n",
" \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\",\n",
" \"page_content='Convolutional networks \\u2022 Networks for images \\u2022 Invariance and equivariance \\u2022 1D convolution \\u2022 Convolutional layers \\u2022 Channels \\u2022 Receptive fields \\u2022 Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:46.84\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:48.223\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"18af26e1-e738-4304-b484-1fd7bd5a26e6\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"What is inductive bias in CNNs?\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:48.223\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:48.225\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"067c2783-38ff-43e4-866a-abf957415359\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:48.225\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:58.148\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"d2b74775-41fc-4ebc-bc40-d8d7cf67015d\",\n",
" \"input\": {\n",
" \"question\": \"Do transformers have this?\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"I'm not sure, I don't have that information.\",\n",
" \"chat_history\": [\n",
" \"content='What is inductive bias in CNNs?'\",\n",
" \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\"\n",
" ],\n",
" \"original_question\": \"Do transformers have this?\",\n",
" \"question\": \"Do transformers have this?\",\n",
" \"rephrased_question\": \"Do transformers exhibit a similar inductive bias as CNNs?\",\n",
" \"source_documents\": [\n",
" \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\",\n",
" \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\",\n",
" \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:56.486\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:58.15\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"2ec55d74-cce5-42d4-827e-375d597cf91c\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"Do transformers have this?\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:58.15\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:44:58.151\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"703534b4-981a-43a3-8571-773367be2fa5\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"I'm not sure, I don't have that information.\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:44:58.151\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:13.1\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"fd58e85c-dbaa-4d06-8a75-a3839e37856d\",\n",
" \"input\": {\n",
" \"question\": \"cool, suggest a project with transformers to use as my final project\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\",\n",
" \"chat_history\": [\n",
" \"content='What is inductive bias in CNNs?'\",\n",
" \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n",
" \"content='Do transformers have this?'\",\n",
" \"content=\\\"I'm not sure, I don't have that information.\\\"\"\n",
" ],\n",
" \"original_question\": \"cool, suggest a project with transformers to use as my final project\",\n",
" \"question\": \"cool, suggest a project with transformers to use as my final project\",\n",
" \"rephrased_question\": \"What is the concept of inductive bias in transformers, and how does it relate to the prioritization of information from neighboring nodes like in CNNs?\",\n",
" \"source_documents\": [\n",
" \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.3613770850497736}\",\n",
" \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\",\n",
" \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:10.667\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:13.102\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"c86489d2-cbf3-475e-867e-d1b20addedf0\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"cool, suggest a project with transformers to use as my final project\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:13.102\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:13.103\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"75eb694e-4b1b-4161-96ea-9ecec4092f53\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:13.103\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:26.404\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"4d02c5ad-1bd9-4767-beab-4c724062093e\",\n",
" \"input\": {\n",
" \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\",\n",
" \"chat_history\": [\n",
" \"content='What is inductive bias in CNNs?'\",\n",
" \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n",
" \"content='Do transformers have this?'\",\n",
" \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n",
" \"content='cool, suggest a project with transformers to use as my final project'\",\n",
" \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\"\n",
" ],\n",
" \"original_question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n",
" \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n",
" \"rephrased_question\": \"What is image captioning and how are transformers beneficial for this task?\",\n",
" \"source_documents\": [\n",
" \"page_content='Image captioning 24 UDL \\u2022 E.g. CNN-RNN, LSTM, Transformers ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 23, 'date': '01/18/2024\\\\n Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.552598095386973}\",\n",
" \"page_content='Next Time \\u2022 Tokenization and Learned Embeddings \\u2022 Training and Fine-Tuning Transformers \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 \\u2026 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\",\n",
" \"page_content='Encoder-Decoder Model \\u2022 Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:23.305\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:26.405\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"3cce51a5-4bcc-42f0-b186-063cb713856a\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:26.405\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:26.407\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"36ae0e9d-540f-45e9-9adf-a596d0886304\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:26.407\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:48.729\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"9e061c1e-ad50-400f-983f-813587508223\",\n",
" \"input\": {\n",
" \"question\": \"How do I learn the math behind transformers, what are the most important bits?\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\",\n",
" \"chat_history\": [\n",
" \"content='Do transformers have this?'\",\n",
" \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n",
" \"content='cool, suggest a project with transformers to use as my final project'\",\n",
" \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n",
" \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
" \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\"\n",
" ],\n",
" \"original_question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n",
" \"question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n",
" \"rephrased_question\": \"What are the key components I should focus on when learning the mathematics behind transformers?\",\n",
" \"source_documents\": [\n",
" \"page_content='After the break \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 RAG pattern \\u2022 Training and Fine-Tuning Transformers \\u2022 \\u2026 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\",\n",
" \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\",\n",
" \"page_content=\\\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we'll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\\\" metadata={'source': 'https://dl4ds.github.io/sp2024/lectures/', 'page': 0, 'score': 0.3078678255019791}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:46.877\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:48.731\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"9cd0e108-9097-4372-97d5-bb2624a03548\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"How do I learn the math behind transformers, what are the most important bits?\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:48.731\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:45:48.733\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"8c9cf79f-767d-4184-9eeb-baf6e867653e\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:45:48.733\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:02.781\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\",\n",
" \"input\": {\n",
" \"question\": \"What is attention?\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\",\n",
" \"chat_history\": [\n",
" \"content='cool, suggest a project with transformers to use as my final project'\",\n",
" \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n",
" \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
" \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n",
" \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n",
" \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\"\n",
" ],\n",
" \"original_question\": \"What is attention?\",\n",
" \"question\": \"What is attention?\",\n",
" \"rephrased_question\": \"What is attention?\",\n",
" \"source_documents\": [\n",
" \"page_content='Transformers, GPTs and Assistants A. Vaswani et al., \\u201cAttention is All you Need,\\u201d presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 37, 'date': '01/18/2024\\\\n Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.40361429640387503}\",\n",
" \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\",\n",
" \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:00.606\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:02.783\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"718cdca2-b453-4b3a-9e0b-fbcdd38c54c7\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"What is attention?\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:02.783\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:02.785\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"ae559dc2-c069-419e-b0e5-f117861292f8\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:02.785\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:10.547\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"4ac119fb-86f5-4528-9edc-44cc616de390\",\n",
" \"input\": {\n",
" \"question\": \"cool thanks, see ya\"\n",
" },\n",
" \"metadata\": {},\n",
" \"name\": \"RAG\",\n",
" \"output\": {\n",
" \"answer\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\",\n",
" \"chat_history\": [\n",
" \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
" \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n",
" \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n",
" \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\",\n",
" \"content='What is attention?'\",\n",
" \"content='Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!'\"\n",
" ],\n",
" \"original_question\": \"cool thanks, see ya\",\n",
" \"question\": \"cool thanks, see ya\",\n",
" \"rephrased_question\": \"No rephrasing needed.\",\n",
" \"source_documents\": [\n",
" \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\",\n",
" \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\",\n",
" \"page_content='Example 1: univariate regression <latexit sha1_ba se64=\\\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\\\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, \\u03c6], \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212f[x, \\u03c6])2 2\\u03c32 \\\" <latexit sha1_ba se64=\\\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\\\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|\\u00b5, \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212\\u00b5)2 2\\u03c32 \\\" In this case, just the mean Just learn the mean, \\ud835\\udf07, and assume the variance is fixed,. 63 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 61, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.038714982109774976}\"\n",
" ]\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:08.896\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"retrieval\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:10.549\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"4425cdbd-0da9-4d90-80e2-d909d4390189\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"User\",\n",
" \"output\": {\n",
" \"content\": \"cool thanks, see ya\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:10.549\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"user_message\"\n",
" },\n",
" {\n",
" \"attachments\": [],\n",
" \"endTime\": \"2024-06-19T09:46:10.55\",\n",
" \"error\": null,\n",
" \"generation\": null,\n",
" \"id\": \"ae5047bf-cdbd-4490-a5fb-beb9cf2f27e5\",\n",
" \"input\": null,\n",
" \"metadata\": {},\n",
" \"name\": \"AI_Tutor\",\n",
" \"output\": {\n",
" \"content\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"\n",
" },\n",
" \"parentId\": null,\n",
" \"scores\": [],\n",
" \"startTime\": \"2024-06-19T09:46:10.55\",\n",
" \"tags\": null,\n",
" \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
" \"type\": \"assistant_message\"\n",
" }\n",
" ],\n",
" \"tags\": [\n",
" \"FAISS\",\n",
" \"gpt-3.5-turbo-1106\"\n",
" ]\n",
"}]\n",
"{'step_id': ['1ab30fe6-99cb-4327-bd7d-e3128b2c1a62', 'd2b74775-41fc-4ebc-bc40-d8d7cf67015d', 'fd58e85c-dbaa-4d06-8a75-a3839e37856d', '4d02c5ad-1bd9-4767-beab-4c724062093e', '9e061c1e-ad50-400f-983f-813587508223', '74ed6d18-6c08-4d65-b47a-4d8efbcdfc20', '4ac119fb-86f5-4528-9edc-44cc616de390'], 'user_input': ['What is inductive bias in CNNs?', 'Do transformers have this?', 'cool, suggest a project with transformers to use as my final project', 'Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?', 'How do I learn the math behind transformers, what are the most important bits?', 'What is attention?', 'cool thanks, see ya'], 'output': ['Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.', \"I'm not sure, I don't have that information.\", \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\", 'Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!', 'You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!', 'Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!', \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"], 'context': [['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks • Networks for images • Invariance and equivariance • 1D convolution • Convolutional layers • Channels • Receptive fields • Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"], [\"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"], ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"], ['page_content=\\'Image captioning 24 UDL • E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time • Tokenization and Learned Embeddings • Training and Fine-Tuning Transformers • Image Transformers • Multimodal Transformers • … 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model • Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"], [\"page_content='After the break • Image Transformers • Multimodal Transformers • RAG pattern • Training and Fine-Tuning Transformers • … 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}'], ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., “Attention is All you Need,” presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"], [\"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression <latexit sha1_ba se64=\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, φ], σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −f[x, φ])2 2σ2 \" <latexit sha1_ba se64=\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|µ, σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −µ)2 2σ2 \" In this case, just the mean Just learn the mean, 𝜇, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']]}\n",
"Step ID: 1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\n",
"User Input: What is inductive bias in CNNs?\n",
"Output: Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\n",
"Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks • Networks for images • Invariance and equivariance • 1D convolution • Convolutional layers • Channels • Receptive fields • Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"]\n",
"Step ID: d2b74775-41fc-4ebc-bc40-d8d7cf67015d\n",
"User Input: Do transformers have this?\n",
"Output: I'm not sure, I don't have that information.\n",
"Context: [\"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"]\n",
"Step ID: fd58e85c-dbaa-4d06-8a75-a3839e37856d\n",
"User Input: cool, suggest a project with transformers to use as my final project\n",
"Output: You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\n",
"Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"]\n",
"Step ID: 4d02c5ad-1bd9-4767-beab-4c724062093e\n",
"User Input: Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\n",
"Output: Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\n",
"Context: ['page_content=\\'Image captioning 24 UDL • E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time • Tokenization and Learned Embeddings • Training and Fine-Tuning Transformers • Image Transformers • Multimodal Transformers • … 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model • Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"]\n",
"Step ID: 9e061c1e-ad50-400f-983f-813587508223\n",
"User Input: How do I learn the math behind transformers, what are the most important bits?\n",
"Output: You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\n",
"Context: [\"page_content='After the break • Image Transformers • Multimodal Transformers • RAG pattern • Training and Fine-Tuning Transformers • … 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}']\n",
"Step ID: 74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\n",
"User Input: What is attention?\n",
"Output: Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\n",
"Context: ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., “Attention is All you Need,” presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"]\n",
"Step ID: 4ac119fb-86f5-4528-9edc-44cc616de390\n",
"User Input: cool thanks, see ya\n",
"Output: You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\n",
"Context: [\"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression <latexit sha1_ba se64=\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, φ], σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −f[x, φ])2 2σ2 \" <latexit sha1_ba se64=\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|µ, σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −µ)2 2σ2 \" In this case, just the mean Just learn the mean, 𝜇, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']\n"
]
}
],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from literalai import LiteralClient\n",
"\n",
"virtual_app = dict(\n",
" llm=dict(modelname=\"AI Tutor - GPT3 - FAISS\"),\n",
" template=\"information about the template I used in my app\",\n",
" debug=\"all of these fields are completely optional\",\n",
")\n",
"from trulens_eval import Select\n",
"from trulens_eval.tru_virtual import VirtualApp\n",
"\n",
"virtual_app = VirtualApp(virtual_app) # can start with the prior dictionary\n",
"virtual_app[Select.RecordCalls.llm.maxtokens] = 1024\n",
"\n",
"from trulens_eval import Select\n",
"\n",
"retriever = Select.RecordCalls.retriever\n",
"synthesizer = Select.RecordCalls.synthesizer\n",
"\n",
"virtual_app[retriever] = \"retriever\"\n",
"virtual_app[synthesizer] = \"synthesizer\"\n",
"\n",
"\n",
"load_dotenv()\n",
"\n",
"literal_client = LiteralClient()\n",
"\n",
"threads = literal_client.api.get_threads(first=1).data\n",
"print(threads)\n",
"\n",
"rag_steps = []\n",
"for thread in threads:\n",
" rag_steps.extend([step for step in thread.steps if step.name == \"RAG\"])\n",
"\n",
"save_log_dict = {\"step_id\": [], \"user_input\": [], \"output\": [], \"context\": []}\n",
"for step in rag_steps:\n",
" save_log_dict[\"step_id\"].append(step.id)\n",
" save_log_dict[\"user_input\"].append(step.input[\"question\"])\n",
" save_log_dict[\"output\"].append(step.output[\"answer\"])\n",
" save_log_dict[\"context\"].append(step.output[\"source_documents\"])\n",
"\n",
"print(save_log_dict)\n",
"\n",
"records = []\n",
"for i in range(len(save_log_dict[\"step_id\"])):\n",
" step_id = save_log_dict[\"step_id\"][i]\n",
" user_input = save_log_dict[\"user_input\"][i]\n",
" output = save_log_dict[\"output\"][i]\n",
" context = save_log_dict[\"context\"][i]\n",
"\n",
" print(f\"Step ID: {step_id}\")\n",
" print(f\"User Input: {user_input}\")\n",
" print(f\"Output: {output}\")\n",
" print(f\"Context: {context}\")\n",
"\n",
" from trulens_eval.tru_virtual import VirtualRecord\n",
"\n",
" # The selector for a presumed context retrieval component's call to\n",
" # `get_context`. The names are arbitrary but may be useful for readability on\n",
" # your end.\n",
" context_call = retriever.get_context\n",
" generation = synthesizer.generate\n",
"\n",
" rec = VirtualRecord(\n",
" main_input=f\"{user_input}\",\n",
" main_output=f\"{output}\",\n",
" calls={\n",
" context_call: dict(args=[f\"{user_input}\"], rets=[f\"{output}\"]),\n",
" generation: dict(\n",
" args=[f\"{context}\"],\n",
" rets=[f\"{output}\"],\n",
" ),\n",
" },\n",
" )\n",
"\n",
" records.append(rec)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .\n",
"✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.retriever.get_context.rets[:] .\n",
"✅ In Groundedness, input source will be set to __record__.app.retriever.get_context.rets[:].collect() .\n",
"✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .\n",
"✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .\n",
"✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .\n"
]
}
],
"source": [
"from trulens_eval.feedback.provider import OpenAI\n",
"from trulens_eval.feedback.feedback import Feedback\n",
"\n",
"# Initialize provider class\n",
"provider = OpenAI()\n",
"\n",
"# Select context to be used in feedback. We select the return values of the\n",
"# virtual `get_context` call in the virtual `retriever` component. Names are\n",
"# arbitrary except for `rets`.\n",
"context = context_call.rets[:]\n",
"\n",
"# Question/statement relevance between question and each context chunk.\n",
"f_context_relevance = (\n",
" Feedback(provider.context_relevance_with_cot_reasons)\n",
" .on_input()\n",
" .on(context)\n",
")\n",
"\n",
"# Define a groundedness feedback function\n",
"f_groundedness = (\n",
" Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n",
" .on(context.collect())\n",
" .on_output()\n",
")\n",
"\n",
"# Question/answer relevance between overall question and answer.\n",
"f_qa_relevance = (\n",
" Feedback(provider.relevance_with_cot_reasons, name = \"Answer Relevance\")\n",
" .on_input_output()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🦑 Tru initialized with db url sqlite:///default.sqlite .\n",
"🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.\n"
]
}
],
"source": [
"from trulens_eval.tru_virtual import TruVirtual\n",
"\n",
"virtual_recorder = TruVirtual(\n",
" app_id=\"AI Tutor - GPT3 - FAISS\",\n",
" app=virtual_app,\n",
" feedbacks=[f_context_relevance, f_groundedness, f_qa_relevance],\n",
" feedback_mode = \"deferred\" # optional\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"for record in records:\n",
" virtual_recorder.add_record(record)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Force stopping dashboard ...\n",
"Starting dashboard ...\n",
"Config file already exists. Skipping writing process.\n",
"Credentials file already exists. Skipping writing process.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c3eb563d17e84983ad0d8eca36904ddb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dashboard started at http://10.0.0.226:8501 .\n"
]
},
{
"data": {
"text/plain": [
"<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from trulens_eval import Tru\n",
"tru = Tru()\n",
"\n",
"tru.run_dashboard(force=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Will keep max of 32 feedback(s) running.\n",
"Tasks are spread among max of 128 thread(s).\n",
"Will rerun running feedbacks after a minute.\n",
"Will rerun failed feedbacks after 5 minutes.\n"
]
},
{
"data": {
"text/plain": [
"<Thread(Thread-8 (runloop), started daemon 13681946624)>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "df51361ac4664845b804e574497e1b58",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Feedback Status: 80%|######## | 84/105 [00:00<?, ?feedbacks/s, DONE=84, NONE=21]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b27d0ca707e8414692191c246324466c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Done Runs: 0runs [00:00, ?runs/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d8345150aecc47fe9f37cc84f53fbb33",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Waiting for Runs: 0runs [00:00, ?runs/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tru.start_evaluator()\n",
"\n",
"# tru.stop_evaluator() # stop if needed"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "devpy11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.1.-1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|