You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

490 lines
16 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2bb27e2a-b568-4319-9a9c-24917d8e9d0e",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from operator import itemgetter\n",
"import torch\n",
"from transformers import pipeline\n",
"from transformers import AutoModel, AutoTokenizer\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "511b179c-fc8d-4aba-ab9d-fe0b8510d62a",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a18de0f-93bf-4e4e-b24d-107f083aff8e",
"metadata": {},
"outputs": [],
"source": [
"import chromadb\n",
"chroma_client = chromadb.Client()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a25976c9-4b6c-40d7-914c-ad7840a06636",
"metadata": {},
"outputs": [],
"source": [
"collection = chroma_client.create_collection(\"pathfinder7\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8298e128-7c96-43a0-8d69-9c2d9fc3a7fd",
"metadata": {},
"outputs": [],
"source": [
"re.sub(r\"(\\d{8,}|Eric Ihli|<eihli@owoga.com>|paizo.com)\", \"\", \"paizo.com, Eric Ihli <eihli@owoga.com>, Feb 13, 2023paizo.com, Eric Ihli <eihli@owoga.com>, Feb 13, 2023\\n2365842023658420\\n236584204643808\\n46438084643808\\nAbility Score OverviewAbility Score Overview\\nEach ability score starts at 10, representing human Each ability score starts at 10, representing human \\naverage, but as you make character choices, youll adjust average, but as you make character choices, youll adjust \\nthese scores by applying ability boosts, which increase a these scores by applying ability boosts, which increase a \\nscore, and ability flaws, which decrease a score. As you score, and ability flaws,\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "293950e7-e1ac-4559-b8d4-617b6077f0c3",
"metadata": {},
"outputs": [],
"source": [
"def clean_page(page):\n",
" return re.sub(r\"(\\d{8,}|Eric Ihli|<eihli@owoga.com>|paizo.com)\", \"\", page).replace(r\" +\", \" \").replace(r\"\\n+\", \"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a5ba05c-65c8-42d7-943a-aabaa1c03f79",
"metadata": {},
"outputs": [],
"source": [
"import textract\n",
"import PyPDF2\n",
"world_guide = []\n",
"with open(\"/home/eihli/Downloads/Pathfinder 2e Lost Omens World Guide.pdf\", \"rb\") as f:\n",
" reader = PyPDF2.PdfReader(f)\n",
" for i, page in enumerate(reader.pages):\n",
" text = page.extract_text()\n",
" for j in range(0, len(text), len(text) // 10):\n",
" world_guide.append([f\"{i}.{j}\", clean_page(text[j:j+len(text)//10])])\n",
"\n",
"rulebook = []\n",
"with open(\"/home/eihli/Downloads/Pathfinder 2e Core Rulebook.pdf\", \"rb\") as f:\n",
" reader = PyPDF2.PdfReader(f)\n",
" for i, page in enumerate(reader.pages):\n",
" text = page.extract_text()\n",
" for j in range(0, len(text), len(text) // 10):\n",
" rulebook.append([f\"{i}.{j}\", clean_page(text[j:j+len(text)//10])])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4811057-7d8c-4422-8491-d1bfd770b40e",
"metadata": {},
"outputs": [],
"source": [
"text = world_guide + rulebook"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf98cad9-2661-4eea-add6-e5bfb9bc54ea",
"metadata": {},
"outputs": [],
"source": [
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30bfb343-8b89-48a8-91d9-436aeff0c524",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"dataset = []\n",
"batch_size = 32\n",
"\n",
"for i in tqdm(range(0, len(rulebook), batch_size)):\n",
" start = i\n",
" end = i + batch_size\n",
" batch = rulebook[i:end]\n",
" collection.upsert(\n",
" ids=list(map(lambda x: str(x[0]) + \":rulebook\", batch)),\n",
" documents=list(map(itemgetter(1), batch)),\n",
" metadatas=list(map(lambda x: {\"source\": \"rulebook\", \"page\": x[0]}, batch)),\n",
" )\n",
"\n",
"for i in tqdm(range(0, len(world_guide), batch_size)):\n",
" start = i\n",
" end = i + batch_size\n",
" batch = world_guide[i:end]\n",
" collection.upsert(\n",
" ids=list(map(lambda x: str(x[0]) + \":world_guide\", batch)),\n",
" documents=list(map(itemgetter(1), batch)),\n",
" metadatas=list(map(lambda x: {\"source\": \"world_guide\", \"page\": x[0]}, batch)),\n",
" )\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48002fdf-460f-4f8c-b539-1cb32b3bef82",
"metadata": {},
"outputs": [],
"source": [
"results = collection.query(query_texts=[\"How can a goblin alchemist pair well with a human cleric?\"], n_results=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e940fb3c-7d5d-4f97-9ff2-b423ba08a0a7",
"metadata": {},
"outputs": [],
"source": [
"results"
]
},
{
"cell_type": "markdown",
"id": "19c78008-2f5b-427a-9a80-b8942ec961df",
"metadata": {},
"source": [
"# Local data\n",
"\n",
"Reads from local files and inserts into the database."
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "8a4eafc1-7d7d-4424-923e-fcf0b545b620",
"metadata": {},
"outputs": [],
"source": [
"for dirpath, dirnames, filenames in os.walk(\".\"):\n",
" for filename in filenames:\n",
" if filename.endswith(\".txt\"):\n",
" with open(os.path.join(dirpath, filename)) as f:\n",
" collection.upsert(\n",
" ids=[filename],\n",
" documents=[f.read()],\n",
" metadatas=[{\"source\": filename.split(\".txt\")[0], \"page\": \"0\"}],\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7ab0341-e6f1-48e3-a76a-1606851e41f2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d03391e-009a-4d21-a258-fe51b19109a2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b78bf7c9-797d-4adf-94ad-dbf5e71783f5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "abab6622-d805-4e70-b850-9b93b3003240",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "decd813a-0b44-47cb-a47d-09365e24058f",
"metadata": {},
"outputs": [],
"source": [
"from transformers import pipeline\n",
"from transformers import AutoModel, AutoTokenizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe2c2a4c-b72e-48cf-a2e9-0ab4da32d397",
"metadata": {},
"outputs": [],
"source": [
"# t = AutoTokenizer.from_pretrained(\"mistralai/Mistral-7B-v0.1\", device_map=\"auto\")\n",
"# t.add_special_tokens({\"pad_token\": t.eos_token})\n",
"# m = AutoModel.from_pretrained(\"mistralai/Mistral-7B-v0.1\")\n",
"# m.resize_token_embeddings(len(t))\n",
"p = pipeline(\n",
" \"text-generation\", model=\"mistralai/Mistral-7B-Instruct-v0.1\", device_map='auto',\n",
" trust_remote_code=True, model_kwargs={\"torch_dtype\": torch.bfloat16, \"load_in_8bit\": True},\n",
" max_new_tokens=512, do_sample=True, temperature=0.6,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a52ef080-84ad-4391-9d1f-ea772d4546f6",
"metadata": {},
"outputs": [],
"source": [
"from string import Template\n",
"template = Template(\"\"\"\n",
"<s>\n",
"[INST]\n",
"Answer questions about a fantasy role playing game like Dungeons and Dragons or Pathfinder.\n",
"[/INST]\n",
"Ok. I'll take into account the context of a fantasy role playing game.\n",
"</s>\n",
"\n",
"<s>\n",
"What follows are pages from the Pathfinder core rulebook and the Lost Omens world guide.\n",
"Lost Omens is in Pathfinder lore.\n",
"Each page is seperated by \"------\".\n",
"For example:\n",
"\n",
"------ Page 20 of the core rulebook.\n",
"Alchemist\n",
"You enjoy tinkering with alchemical formulas and \n",
"substances in your spare time, and your studies have \n",
"progressed beyond mere experimentation.\n",
"------\n",
"------ Page 38 of the world guide.\n",
"Cleric\n",
"You are an ordained priest of your deity and have even \n",
"learned how to cast a few divine spells. Though your main \n",
"training lies elsewhere, your religious calling provides \n",
"you divine gifts. \n",
"------\n",
"\n",
"[INST]\n",
"Describe what a Cleric is.\n",
"[/INST]\n",
"A cleric is a priest of a particular deity. They can cast a few divine spells, but their main training lies elsewhere.\n",
"See page 38 of the core world guide.\n",
"</s>\n",
"\n",
"<s>\n",
"What follows are pages from the Pathfinder core rulebook and the Lost Omens world guide.\n",
"Lost Omens is in Pathfinder lore.\n",
"Each page is seperated by \"------\".\n",
"For example:\n",
"\n",
"------ Page 203 of the core rulebook.\n",
"• Barbarian alchemists can mix mutagens with \n",
"their rage to ferocious effect. \n",
"• Champion alchemists can focus on alchemical \n",
"items that boost defenses and heal others, \n",
"allowing their champions reaction \n",
"and lay on hands to go further. \n",
"• Fighter alchemists can use their \n",
"alchemy to gain additional options \n",
"in situations where their usual \n",
"tactics dont work. \n",
"• Ranger alchemists focus on alchemy \n",
"and snares, getting extra use out of \n",
"their Crafting skill and supplying \n",
"bombs for the bomb snare.\n",
"------\n",
"------ Page 102 of the world guide.\n",
"• Alchemist clerics work well with the chirurgeon \n",
"field, healing various ailments with either alchemy \n",
"or spells. \n",
"• Martial clerics are typically looking for a potent \n",
"domain spell or some healing to use in a pinch. \n",
"• Divine sorcerer clerics double down as the ultimate \n",
"divine spellcasters.\n",
"------\n",
"[INST]\n",
"What are some effective ways to play an alchemist?\n",
"[/INST]\n",
"There are many ways to be an effective alchemist. You could be a barbarian alchemist who mixes mutagens with rage to ferocious effect.\n",
"You could be a ranger alchemist who focuses on snares and getting bonuses to their crafting skill.\n",
"You could be a fighter alchemist who uses bombs, poisons, and traps to gain additional options when traditional tactics don't work.\n",
"</s>\n",
"\n",
"<s>\n",
"$pages\n",
"[INST]\n",
"$prompt\n",
"[/INST]\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12dddff0-a1af-4f4c-bee4-badb741f1885",
"metadata": {},
"outputs": [],
"source": [
"#list(zip(*list(*zip(results[\"ids\"], results[\"metadatas\"], results[\"documents\"]))))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4625ac89-9b0f-49af-a5ca-742b9b47db70",
"metadata": {},
"outputs": [],
"source": [
"def query(prompt):\n",
" results = collection.query(query_texts=[prompt], n_results=6)\n",
" result = \"\"\n",
" for id, md, doc in zip(*list(*zip(results[\"ids\"], results[\"metadatas\"], results[\"documents\"]))):\n",
" context = f\"------- Page {md['page']} of the {md['source']}\\n\"\n",
" context += doc + \"\\n\"\n",
" context += \"-------\\n\"\n",
" result += context\n",
" out = template.substitute(pages=result, prompt=prompt)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "255a2e8c-df5c-46c2-9ec5-56b638cb7a5a",
"metadata": {
"jupyter": {
"source_hidden": true
},
"scrolled": true
},
"outputs": [],
"source": [
"print(query(\"I'm a goblin alchemist who likes fire. What alchemical bomb should I use? What other spells, armor, or items would help my bombs be most effective?\"))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "21b030a8-a104-43b4-a9cf-eae4ec1522d3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
]
}
],
"source": [
"response = p(query(\"What spell should Kestrel use against undead?\"), return_full_text=False)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "e76b2e8f-45e6-4a99-ae58-9767799837e0",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Kestrel could use Disrupt Undead on an undead creature. It is a cantrip spell that deals 1d6 positive damage plus Kestrel's spellcasting ability modifier to the target. If the creature critically fails its Fortitude saving throw, it is stunned for 1 minute and becomes temporarily immune to any spell or other magical effect for 1 minute after that.\n",
"\n",
"If Kestrel wants to focus on dealing damage to undead creatures, they could use Necrotic Infusion. It is a feat that allows Kestrel to pour negative energy into their undead subject to empower its attacks. If the next action Kestrel uses is to cast Harm to restore Hit Points to a single undead creature, the target then deals an additional 1d6 negative damage with its melee weapons and unarmed attacks until the end of its next turn. If the Harm spell is a spell, Kestrel can choose to expend one of their Harm spells or one of their Heal spells to empower the undead creature instead.\n"
]
}
],
"source": [
"print(response[0][\"generated_text\"])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "67cc80cb-1430-4183-9022-dd2c06ce70de",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is said that Krogoth is a much better gollum player at the Crow's Cask than Locke. He gets drunk less often and is able to use his knowledge of the game to gain an advantage.\n"
]
}
],
"source": [
"response = p(query(\"Tell me who the best gollum player is at the Crow's Cask.\"), return_full_text=False)\n",
"print(response[0][\"generated_text\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b6ed234-c72d-4505-abf8-52d5d3e2473e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}