{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c32bf0b9-1445-4ede-ae49-7dd63ff3b08e", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:37.268964Z", "iopub.status.busy": "2024-01-17T01:35:37.268658Z", "iopub.status.idle": "2024-01-17T01:35:37.284720Z", "shell.execute_reply": "2024-01-17T01:35:37.282292Z", "shell.execute_reply.started": "2024-01-17T01:35:37.268927Z" } }, "outputs": [], "source": [ "# for use in tutorial and development; do not include this `sys.path` change in production:\n", "import sys ; sys.path.insert(0, \"../\")" ] }, { "cell_type": "markdown", "id": "c8ff5d81-110c-42ae-8aa7-ed4fffea40c6", "metadata": {}, "source": [ "# reproduce results from the \"InGram\" paper" ] }, { "cell_type": "markdown", "id": "1e847d0a-bc6c-470a-9fef-620ebbdbbbc3", "metadata": {}, "source": [ "This is an attempt to reproduce the _graph of relations_ example given in `lee2023ingram`" ] }, { "cell_type": "markdown", "id": "61d8d39a-23e4-48e7-b8f4-0dd724ccf586", "metadata": {}, "source": [ "## environment" ] }, { "cell_type": "code", "execution_count": 2, "id": "22489527-2ad5-4e3c-be23-f511e6bcf69f", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:37.296455Z", "iopub.status.busy": "2024-01-17T01:35:37.295661Z", "iopub.status.idle": "2024-01-17T01:35:45.520968Z", "shell.execute_reply": "2024-01-17T01:35:45.519870Z", "shell.execute_reply.started": "2024-01-17T01:35:37.296419Z" }, "scrolled": true }, "outputs": [], "source": [ "import os\n", "import pathlib\n", "import typing\n", "\n", "from icecream import ic\n", "from pyinstrument import Profiler\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import pyvis\n", "\n", "import textgraphs" ] }, { "cell_type": "code", "execution_count": 3, "id": "438f5775-487b-493e-a172-59b652b94955", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.525301Z", "iopub.status.busy": "2024-01-17T01:35:45.524842Z", "iopub.status.idle": "2024-01-17T01:35:45.547432Z", "shell.execute_reply": "2024-01-17T01:35:45.546101Z", "shell.execute_reply.started": "2024-01-17T01:35:45.525270Z" } }, "outputs": [], "source": [ "%load_ext watermark" ] }, { "cell_type": "code", "execution_count": 4, "id": "adc052dd-5cca-4d11-b543-3f0999f4f883", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.548916Z", "iopub.status.busy": "2024-01-17T01:35:45.548691Z", "iopub.status.idle": "2024-01-17T01:35:45.592124Z", "shell.execute_reply": "2024-01-17T01:35:45.590790Z", "shell.execute_reply.started": "2024-01-17T01:35:45.548889Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last updated: 2024-01-16T17:35:45.550539-08:00\n", "\n", "Python implementation: CPython\n", "Python version : 3.10.11\n", "IPython version : 8.20.0\n", "\n", "Compiler : Clang 13.0.0 (clang-1300.0.29.30)\n", "OS : Darwin\n", "Release : 21.6.0\n", "Machine : x86_64\n", "Processor : i386\n", "CPU cores : 8\n", "Architecture: 64bit\n", "\n" ] } ], "source": [ "%watermark" ] }, { "cell_type": "code", "execution_count": 5, "id": "6e4618da-daf9-44c9-adbb-e5781dba5504", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.597302Z", "iopub.status.busy": "2024-01-17T01:35:45.596553Z", "iopub.status.idle": "2024-01-17T01:35:45.623704Z", "shell.execute_reply": "2024-01-17T01:35:45.621991Z", "shell.execute_reply.started": "2024-01-17T01:35:45.597251Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "matplotlib: 3.8.2\n", "pandas : 2.1.4\n", "pyvis : 0.3.2\n", "textgraphs: 0.5.0\n", "sys : 3.10.11 (v3.10.11:7d4cc5aa85, Apr 4 2023, 19:05:19) [Clang 13.0.0 (clang-1300.0.29.30)]\n", "\n" ] } ], "source": [ "%watermark --iversions" ] }, { "cell_type": "markdown", "id": "1a04e3dc-57d8-43a4-a342-cc38b86fc6a6", "metadata": {}, "source": [ "## load example graph" ] }, { "cell_type": "markdown", "id": "7c567afd-2f44-4391-899a-da6aba3d222e", "metadata": {}, "source": [ "load from a JSON file which replicates the data for the \"Figure 3\" example" ] }, { "cell_type": "code", "execution_count": 6, "id": "630430c5-21dc-4897-9a4b-3b01baf3de17", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.625764Z", "iopub.status.busy": "2024-01-17T01:35:45.625341Z", "iopub.status.idle": "2024-01-17T01:35:45.633487Z", "shell.execute_reply": "2024-01-17T01:35:45.632477Z", "shell.execute_reply.started": "2024-01-17T01:35:45.625720Z" } }, "outputs": [], "source": [ "graph: textgraphs.GraphOfRelations = textgraphs.GraphOfRelations(\n", " textgraphs.SimpleGraph()\n", ")\n", "\n", "ingram_path: pathlib.Path = pathlib.Path(os.getcwd()) / \"ingram.json\"\n", "\n", "graph.load_ingram(\n", " ingram_path,\n", " debug = False,\n", ")" ] }, { "cell_type": "markdown", "id": "01152885-f301-49b1-ab61-f5b19d81c036", "metadata": {}, "source": [ "set up the statistical stack profiling" ] }, { "cell_type": "code", "execution_count": 7, "id": "2a289117-301d-4027-ae1b-200201fb5f93", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.639466Z", "iopub.status.busy": "2024-01-17T01:35:45.639216Z", "iopub.status.idle": "2024-01-17T01:35:45.646105Z", "shell.execute_reply": "2024-01-17T01:35:45.644476Z", "shell.execute_reply.started": "2024-01-17T01:35:45.639439Z" } }, "outputs": [], "source": [ "profiler: Profiler = Profiler()\n", "profiler.start()" ] }, { "cell_type": "markdown", "id": "bf9d4f99-b82b-4d11-a9a4-31d0337f4aa8", "metadata": {}, "source": [ "## decouple graph edges into \"seeds\"" ] }, { "cell_type": "code", "execution_count": 8, "id": "da6fcb0f-b2ac-4f74-af39-2c129c750cab", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:45.648335Z", "iopub.status.busy": "2024-01-17T01:35:45.647905Z", "iopub.status.idle": "2024-01-17T01:35:46.520730Z", "shell.execute_reply": "2024-01-17T01:35:46.518237Z", "shell.execute_reply.started": "2024-01-17T01:35:45.648291Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- triples in source graph ---\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "ic| edge.src_node: 0, rel_id: 1, edge.dst_node: 1\n", "ic| edge.src_node: 0, rel_id: 0, edge.dst_node: 2\n", "ic| edge.src_node: 0, rel_id: 0, edge.dst_node: 3\n", "ic| edge.src_node: 4, rel_id: 2, edge.dst_node: 2\n", "ic| edge.src_node: 4, rel_id: 2, edge.dst_node: 3\n", "ic| edge.src_node: 4, rel_id: 1, edge.dst_node: 5\n", "ic| edge.src_node: 6, rel_id: 1, edge.dst_node: 5\n", "ic| edge.src_node: 6, rel_id: 2, edge.dst_node: 7\n", "ic| edge.src_node: 6, rel_id: 4, edge.dst_node: 8\n", "ic| edge.src_node: 9, " ] }, { "name": "stdout", "output_type": "stream", "text": [ " Steven_Spielberg Profession Director\n", " Steven_Spielberg Directed Catch_Me_If_Can\n", " Steven_Spielberg Directed Saving_Private_Ryan\n", " Tom_Hanks ActedIn Catch_Me_If_Can\n", " Tom_Hanks ActedIn Saving_Private_Ryan\n", " Tom_Hanks Profession Actor\n", " Mark_Hamil Profession Actor\n", " Mark_Hamil ActedIn Star_Wars\n", " Mark_Hamil BornIn California\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "rel_id: 5, edge.dst_node: 10\n", "ic| edge.src_node: 9, rel_id: 4, edge.dst_node: 10\n", "ic| edge.src_node: 9, rel_id: 3, edge.dst_node: 8\n", "ic| edge.src_node: 11, rel_id: 4, edge.dst_node: 12\n", "ic| edge.src_node: 11, rel_id: 3, edge.dst_node: 12\n", "ic| edge.src_node: 11, rel_id: 3, edge.dst_node: 8\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Brad_Pitt Nationality USA\n", " Brad_Pitt BornIn USA\n", " Brad_Pitt LivedIn California\n", " Clint_Eastwood BornIn San_Francisco\n", " Clint_Eastwood LivedIn San_Francisco\n", " Clint_Eastwood LivedIn California\n" ] } ], "source": [ "graph.seeds(\n", " debug = True,\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "a9c0fd41-45e9-4019-94bf-8e2cf5c33454", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:46.524005Z", "iopub.status.busy": "2024-01-17T01:35:46.523531Z", "iopub.status.idle": "2024-01-17T01:35:46.531929Z", "shell.execute_reply": "2024-01-17T01:35:46.530922Z", "shell.execute_reply.started": "2024-01-17T01:35:46.523965Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- nodes in source graph ---\n", "n: 0, Steven_Spielberg\n", " head: []\n", " tail: [(0, 'Profession', 1), (0, 'Directed', 2), (0, 'Directed', 3)]\n", "n: 1, Director\n", " head: [(0, 'Profession', 1)]\n", " tail: []\n", "n: 2, Catch_Me_If_Can\n", " head: [(0, 'Directed', 2), (4, 'ActedIn', 2)]\n", " tail: []\n", "n: 3, Saving_Private_Ryan\n", " head: [(0, 'Directed', 3), (4, 'ActedIn', 3)]\n", " tail: []\n", "n: 4, Tom_Hanks\n", " head: []\n", " tail: [(4, 'ActedIn', 2), (4, 'ActedIn', 3), (4, 'Profession', 5)]\n", "n: 5, Actor\n", " head: [(4, 'Profession', 5), (6, 'Profession', 5)]\n", " tail: []\n", "n: 6, Mark_Hamil\n", " head: []\n", " tail: [(6, 'Profession', 5), (6, 'ActedIn', 7), (6, 'BornIn', 8)]\n", "n: 7, Star_Wars\n", " head: [(6, 'ActedIn', 7)]\n", " tail: []\n", "n: 8, California\n", " head: [(6, 'BornIn', 8), (9, 'LivedIn', 8), (11, 'LivedIn', 8)]\n", " tail: []\n", "n: 9, Brad_Pitt\n", " head: []\n", " tail: [(9, 'Nationality', 10), (9, 'BornIn', 10), (9, 'LivedIn', 8)]\n", "n: 10, USA\n", " head: [(9, 'Nationality', 10), (9, 'BornIn', 10)]\n", " tail: []\n", "n: 11, Clint_Eastwood\n", " head: []\n", " tail: [(11, 'BornIn', 12), (11, 'LivedIn', 12), (11, 'LivedIn', 8)]\n", "n: 12, San_Francisco\n", " head: [(11, 'BornIn', 12), (11, 'LivedIn', 12)]\n", " tail: []\n", "\n", "--- edges in source graph ---\n", "e: 0, Directed\n", "e: 1, Profession\n", "e: 2, ActedIn\n", "e: 3, LivedIn\n", "e: 4, BornIn\n", "e: 5, Nationality\n" ] } ], "source": [ "graph.trace_source_graph()" ] }, { "cell_type": "markdown", "id": "7e7cb5f3-132c-4999-81eb-4f6167a31c9e", "metadata": {}, "source": [ "## construct a _graph of relations_" ] }, { "cell_type": "markdown", "id": "105702ed-7f9c-42ca-a57b-f1b15a206acf", "metadata": { "execution": { "iopub.execute_input": "2024-01-02T22:31:57.839227Z", "iopub.status.busy": "2024-01-02T22:31:57.838113Z", "iopub.status.idle": "2024-01-02T22:31:57.853374Z", "shell.execute_reply": "2024-01-02T22:31:57.851669Z", "shell.execute_reply.started": "2024-01-02T22:31:57.839155Z" } }, "source": [ "Transform the graph data into _graph of relations_" ] }, { "cell_type": "code", "execution_count": 10, "id": "eae8da18-f1be-4673-94e7-7b633bab9bd1", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:46.534228Z", "iopub.status.busy": "2024-01-17T01:35:46.533720Z", "iopub.status.idle": "2024-01-17T01:35:48.718340Z", "shell.execute_reply": "2024-01-17T01:35:48.715493Z", "shell.execute_reply.started": "2024-01-17T01:35:46.534166Z" }, "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "ic| node_id: 0, len(seeds" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- transformed triples ---\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "): 3\n", "ic| trans_arc: TransArc(pair_key=(0, 1),\n", " a_rel=1,\n", " b_rel=0,\n", " node_id=0,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(0, 1),\n", " a_rel=1,\n", " b_rel=0,\n", " node_id=0,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(0, 0),\n", " a_rel=0,\n", " b_rel=0,\n", " node_id=0,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 1, len(seeds" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "): 1\n", "ic| node_id: 2, len(seeds): 2\n", "ic| trans_arc: TransArc(pair_key=(0, 2),\n", " a_rel=0,\n", " b_rel=2,\n", " node_id=2,\n", " a_dir=,\n", " b_dir=<" ] }, { "name": "stdout", "output_type": "stream", "text": [ " (0, 2) Directed.head Catch_Me_If_Can ActedIn.head\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "RelDir.HEAD: 0>)\n", "ic| node_id: 3, len(seeds): 2\n", "ic| trans_arc: TransArc(pair_key=(0, 2),\n", " a_rel=0,\n", " b_rel=2,\n", " node_id=3,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (0, 2) Directed.head Saving_Private_Ryan ActedIn.head\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ": 4, len(seeds): 3\n", "ic| trans_arc: TransArc(pair_key=(2, 2),\n", " a_rel=2,\n", " b_rel=2,\n", " node_id=4,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(1, 2),\n", " a_rel=2,\n", " b_rel=1,\n", " node_id=4,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(1, 2)" ] }, { "name": "stdout", "output_type": "stream", "text": [ " (2, 2) ActedIn.tail Tom_Hanks ActedIn.tail\n", "\n", " (1, 2) ActedIn.tail Tom_Hanks Profession.tail\n", "\n", " (1, 2) ActedIn.tail Tom_Hanks Profession.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ",\n", " a_rel=2,\n", " b_rel=1,\n", " node_id=4,\n", " a_dir=,\n", " b_dir=)\n", "ic|" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " node_id: 5, len(seeds): 2\n", "ic| trans_arc: TransArc(pair_key=(1, 1),\n", " a_rel=1,\n", " b_rel=1,\n", " " ] }, { "name": "stdout", "output_type": "stream", "text": [ " (1, 1) Profession.head Actor Profession.head\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "node_id=5,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 6, len(seeds): 3\n", "ic| trans_arc: TransArc(pair_key=(1, 2),\n", " a_rel=1,\n", " b_rel=2,\n", " node_id=6,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(1, 4),\n", " a_rel=1,\n", " b_rel=4,\n", " node_id=6,\n", " a_dir" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (1, 4) Profession.tail Mark_Hamil BornIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(2, 4),\n", " a_rel=2,\n", " b_rel=4,\n", " node_id=6,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (2, 4) ActedIn.tail Mark_Hamil BornIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " a_dir=,\n", " b_dir=)\n", "ic| node_id: 7, len(seeds): 1\n", "ic| node_id: 8, len(seeds): 3\n", "ic| trans_arc: TransArc(pair_key=(3, 4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=8,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(3, 4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=8,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(3, 3),\n", " a_rel=3,\n", " b_rel=3,\n", " node_id=8,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 9, len(seeds): 3\n", "ic" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (3, 4) BornIn.head California LivedIn.head\n", "\n", " (3, 3) LivedIn.head California LivedIn.head\n", "\n", " (4, 5) Nationality.tail Brad_Pitt BornIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "| trans_arc: TransArc(pair_key=(4, 5),\n", " a_rel=5,\n", " b_rel=4,\n", " node_id=9,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(3, 5),\n", " a_rel=5,\n", " b_rel=3,\n", " node_id=9,\n", " a_dir=,\n", " b_dir=<" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (3, 5) Nationality.tail Brad_Pitt LivedIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "RelDir.TAIL: 1>)\n", "ic| trans_arc: TransArc(pair_key=(3, 4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=9,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 10, len(seeds): 2\n", "ic| trans_arc: TransArc(pair_key=(4, 5),\n", " a_rel=5,\n", " b_rel=4,\n", " node_id=10,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 11, len(seeds): 3\n", "ic| trans_arc: TransArc(pair_key=(3, " ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (3, 4) BornIn.tail Brad_Pitt LivedIn.tail\n", "\n", " (4, 5) Nationality.head USA BornIn.head\n", "\n", " (3, 4) BornIn.tail Clint_Eastwood LivedIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=11,\n", " a_dir=,\n", " b_dir=)\n", "ic" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (3, 4) BornIn.tail Clint_Eastwood LivedIn.tail\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "| trans_arc: TransArc(pair_key=(3, 4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=11,\n", " a_dir=,\n", " b_dir=)\n", "ic| trans_arc: TransArc(pair_key=(3, 3),\n", " a_rel=3,\n", " b_rel=3,\n", " node_id=11,\n", " a_dir=,\n", " b_dir=)\n", "ic| node_id: 12, len(seeds" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " (3, 3) LivedIn.tail Clint_Eastwood LivedIn.tail\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "): 2\n", "ic| trans_arc: TransArc(pair_key=(3, 4),\n", " a_rel=4,\n", " b_rel=3,\n", " node_id=12,\n", " a_dir=,\n", " b_dir=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " (3, 4) BornIn.head San_Francisco LivedIn.head\n", "\n" ] } ], "source": [ "graph.construct_gor(\n", "\tdebug = True,\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "d5a06b72-c19b-440c-83c7-332f28aa9586", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:48.731674Z", "iopub.status.busy": "2024-01-17T01:35:48.731142Z", "iopub.status.idle": "2024-01-17T01:35:48.745182Z", "shell.execute_reply": "2024-01-17T01:35:48.739573Z", "shell.execute_reply.started": "2024-01-17T01:35:48.731638Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- collect shared entity tallies ---\n", "0 Directed\n", " h: 4 dict_items([(2, 4.0)])\n", " t: 6 dict_items([(0, 3.0), (1, 3.0)])\n", "1 Profession\n", " h: 3 dict_items([(1, 3.0)])\n", " t: 10 dict_items([(0, 3.0), (2, 5.0), (4, 2.0)])\n", "2 ActedIn\n", " h: 4 dict_items([(0, 4.0)])\n", " t: 10 dict_items([(1, 5.0), (2, 3.0), (4, 2.0)])\n", "3 LivedIn\n", " h: 8 dict_items([(3, 3.0), (4, 5.0)])\n", " t: 10 dict_items([(3, 3.0), (4, 5.0), (5, 2.0)])\n", "4 BornIn\n", " h: 7 dict_items([(3, 5.0), (5, 2.0)])\n", " t: 11 dict_items([(1, 2.0), (2, 2.0), (3, 5.0), (5, 2.0)])\n", "5 Nationality\n", " h: 2 dict_items([(4, 2.0)])\n", " t: 4 dict_items([(3, 2.0), (4, 2.0)])\n" ] } ], "source": [ "scores: typing.Dict[ tuple, float ] = graph.get_affinity_scores(\n", " debug = True,\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "id": "a3d2310b-11c1-476d-82ab-1e34bc496cb1", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:48.749266Z", "iopub.status.busy": "2024-01-17T01:35:48.748905Z", "iopub.status.idle": "2024-01-17T01:35:48.964799Z", "shell.execute_reply": "2024-01-17T01:35:48.957975Z", "shell.execute_reply.started": "2024-01-17T01:35:48.749231Z" }, "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "ic| scores: {(0, 0): 0.3,\n", " (0, 1): 0.2653846153846154,\n", " (0, 2): 0.34285714285714286,\n", " (1, 1): 0.23076923076923078,\n", " (1, 2): 0.3708791208791209,\n", " (1, 4): 0.13247863247863248,\n", " (2, 2): 0.21428571428571427,\n", " (2, 4): 0.12698412698412698,\n", " (3, 3): 0.3333333333333333,\n", " (3, 4): 0.5555555555555556,\n", " (3, 5): 0.2222222222222222,\n", " (4, 5): 0.4444444444444444}\n" ] } ], "source": [ "ic(scores);" ] }, { "cell_type": "markdown", "id": "8b71b841-0cf5-4cc6-af4c-c85344b8f6c5", "metadata": {}, "source": [ "## visualize the transform results" ] }, { "cell_type": "code", "execution_count": 13, "id": "5901a49e-3f90-4061-9c3a-e9d1f05b40f3", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:48.973661Z", "iopub.status.busy": "2024-01-17T01:35:48.973146Z", "iopub.status.idle": "2024-01-17T01:35:49.339291Z", "shell.execute_reply": "2024-01-17T01:35:49.337857Z", "shell.execute_reply.started": "2024-01-17T01:35:48.973607Z" } }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "graph.render_gor_plt(scores)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 14, "id": "8fa85274-6d16-48eb-b875-01108a9575b8", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:49.341965Z", "iopub.status.busy": "2024-01-17T01:35:49.341537Z", "iopub.status.idle": "2024-01-17T01:35:49.383683Z", "shell.execute_reply": "2024-01-17T01:35:49.382725Z", "shell.execute_reply.started": "2024-01-17T01:35:49.341916Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tmp.fig03.html\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pv_graph: pyvis.network.Network = graph.render_gor_pyvis(scores)\n", "\n", "pv_graph.force_atlas_2based(\n", " gravity = -38,\n", " central_gravity = 0.01,\n", " spring_length = 231,\n", " spring_strength = 0.7,\n", " damping = 0.8,\n", " overlap = 0,\n", ")\n", "\n", "pv_graph.show_buttons(filter_ = [ \"physics\" ])\n", "pv_graph.toggle_physics(True)\n", "\n", "pv_graph.prep_notebook()\n", "pv_graph.show(\"tmp.fig03.html\")" ] }, { "cell_type": "markdown", "id": "07cf6fca-af95-4cf0-9e3b-247521bafbff", "metadata": {}, "source": [ "## analysis" ] }, { "cell_type": "markdown", "id": "97af44dc-4e56-4986-9f54-cbfaff67e3d4", "metadata": {}, "source": [ "As the results below above illustrate, the computed _affinity scores_ differ from what is published in `lee2023ingram`. After trying several different variations of interpretation for the paper's descriptions, the current approach provides the closest approximation that we have obtained." ] }, { "cell_type": "code", "execution_count": 15, "id": "f64462b4-654a-4e2e-bea2-a36bdc5ec967", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:49.387402Z", "iopub.status.busy": "2024-01-17T01:35:49.386218Z", "iopub.status.idle": "2024-01-17T01:35:49.434520Z", "shell.execute_reply": "2024-01-17T01:35:49.432123Z", "shell.execute_reply.started": "2024-01-17T01:35:49.387333Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pairrel_arel_baffinityexpected
0(0, 0)DirectedDirected0.30NaN
1(0, 1)DirectedProfession0.270.22
2(0, 2)DirectedActedIn0.340.50
3(1, 1)ProfessionProfession0.23NaN
4(1, 2)ProfessionActedIn0.370.33
5(1, 4)ProfessionBornIn0.130.11
6(2, 2)ActedInActedIn0.21NaN
7(2, 4)ActedInBornIn0.130.11
8(3, 3)LivedInLivedIn0.33NaN
9(3, 4)LivedInBornIn0.560.81
10(3, 5)LivedInNationality0.220.11
11(4, 5)BornInNationality0.440.36
\n", "
" ], "text/plain": [ " pair rel_a rel_b affinity expected\n", "0 (0, 0) Directed Directed 0.30 NaN\n", "1 (0, 1) Directed Profession 0.27 0.22\n", "2 (0, 2) Directed ActedIn 0.34 0.50\n", "3 (1, 1) Profession Profession 0.23 NaN\n", "4 (1, 2) Profession ActedIn 0.37 0.33\n", "5 (1, 4) Profession BornIn 0.13 0.11\n", "6 (2, 2) ActedIn ActedIn 0.21 NaN\n", "7 (2, 4) ActedIn BornIn 0.13 0.11\n", "8 (3, 3) LivedIn LivedIn 0.33 NaN\n", "9 (3, 4) LivedIn BornIn 0.56 0.81\n", "10 (3, 5) LivedIn Nationality 0.22 0.11\n", "11 (4, 5) BornIn Nationality 0.44 0.36" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df: pd.DataFrame = graph.trace_metrics(scores)\n", "df" ] }, { "cell_type": "markdown", "id": "ff49fe28-e75f-4590-8b87-0d8962928cba", "metadata": {}, "source": [ "## statistical stack profile instrumentation" ] }, { "cell_type": "code", "execution_count": 16, "id": "af4ecb06-370f-4077-9899-29a1673e4768", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:49.437344Z", "iopub.status.busy": "2024-01-17T01:35:49.436840Z", "iopub.status.idle": "2024-01-17T01:35:49.444892Z", "shell.execute_reply": "2024-01-17T01:35:49.444135Z", "shell.execute_reply.started": "2024-01-17T01:35:49.437293Z" } }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "profiler.stop()" ] }, { "cell_type": "code", "execution_count": 17, "id": "d5ac2ce6-15b1-41ad-8215-8a5f76036cf1", "metadata": { "execution": { "iopub.execute_input": "2024-01-17T01:35:49.446514Z", "iopub.status.busy": "2024-01-17T01:35:49.446199Z", "iopub.status.idle": "2024-01-17T01:35:49.728817Z", "shell.execute_reply": "2024-01-17T01:35:49.728098Z", "shell.execute_reply.started": "2024-01-17T01:35:49.446483Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " _ ._ __/__ _ _ _ _ _/_ Recorded: 17:35:45 Samples: 2526\n", " /_//_/// /_\\ / //_// / //_'/ // Duration: 3.799 CPU time: 4.060\n", "/ _/ v4.6.1\n", "\n", "Program: /Users/paco/src/textgraphs/venv/lib/python3.10/site-packages/ipykernel_launcher.py -f /Users/paco/Library/Jupyter/runtime/kernel-27f0c564-73f8-45ab-9f64-8b064ae1de10.json\n", "\n", "3.799 IPythonKernel.dispatch_queue ipykernel/kernelbase.py:525\n", "└─ 3.791 IPythonKernel.process_one ipykernel/kernelbase.py:511\n", " [10 frames hidden] ipykernel, IPython\n", " 3.680 ZMQInteractiveShell.run_ast_nodes IPython/core/interactiveshell.py:3394\n", " ├─ 2.176 ../ipykernel_4421/3358887201.py:1\n", " │ └─ 2.176 GraphOfRelations.construct_gor textgraphs/gor.py:311\n", " │ ├─ 1.607 IceCreamDebugger.__call__ icecream/icecream.py:204\n", " │ │ [17 frames hidden] icecream, colorama, ipykernel, thread...\n", " │ │ 1.078 lock.acquire \n", " │ └─ 0.566 GraphOfRelations._transformed_triples textgraphs/gor.py:275\n", " │ └─ 0.563 IceCreamDebugger.__call__ icecream/icecream.py:204\n", " │ [13 frames hidden] icecream, colorama, ipykernel, zmq, t...\n", " ├─ 0.866 ../ipykernel_4421/4061275008.py:1\n", " │ └─ 0.866 GraphOfRelations.seeds textgraphs/gor.py:197\n", " │ └─ 0.865 IceCreamDebugger.__call__ icecream/icecream.py:204\n", " │ [42 frames hidden] icecream, inspect, posixpath, ../ipykernel_4421/559531165.py:1\n", " │ ├─ 0.234 show matplotlib/pyplot.py:482\n", " │ │ [32 frames hidden] matplotlib, matplotlib_inline, IPytho...\n", " │ └─ 0.128 GraphOfRelations.render_gor_plt textgraphs/gor.py:522\n", " │ └─ 0.104 draw_networkx networkx/drawing/nx_pylab.py:127\n", " │ [6 frames hidden] networkx, matplotlib\n", " ├─ 0.197 ../ipykernel_4421/1169542473.py:1\n", " │ └─ 0.197 IceCreamDebugger.__call__ icecream/icecream.py:204\n", " │ [14 frames hidden] icecream, colorama, ipykernel, thread...\n", " └─ 0.041 ../ipykernel_4421/2247466716.py:1\n", "\n", "\n" ] } ], "source": [ "profiler.print()" ] }, { "cell_type": "markdown", "id": "c47bcfd2-2bd6-49a5-8f1a-102d90edde39", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## outro" ] }, { "cell_type": "markdown", "id": "68bea4f9-aec2-4b28-8f08-a4034851d066", "metadata": {}, "source": [ "_\\[ more parts are in progress, getting added to this demo \\]_" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }