From 39ea2297f817fd03ca01f28c448177603972234e Mon Sep 17 00:00:00 2001 From: eliselavy <35602279+eliselavy@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:00:06 +0100 Subject: [PATCH] author's correction /https://github.com/C2DH/jdh-notebook/issues/187 --- article.ipynb | 427 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 304 insertions(+), 123 deletions(-) diff --git a/article.ipynb b/article.ipynb index da15f1e..8e9262f 100644 --- a/article.ipynb +++ b/article.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "f51f3b", "metadata": { "collapsed": false, "editable": true, @@ -21,6 +22,7 @@ }, { "cell_type": "markdown", + "id": "287abc", "metadata": { "collapsed": false, "jupyter": { @@ -38,11 +40,16 @@ }, { "cell_type": "markdown", + "id": "a82152", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false }, + "slideshow": { + "slide_type": "" + }, "tags": [ "copyright" ] @@ -52,30 +59,19 @@ "©. Published by De Gruyter in cooperation with the University of Luxembourg Centre for Contemporary and Digital History. This is an Open Access article distributed under the terms of the [Creative Commons Attribution License CC-BY](https://creativecommons.org/licenses/by/4.0/)\n" ] }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "tags": [ - "copyright" - ] - }, - "source": [ - "[![cc-by-nc-nd](https://licensebuttons.net/l/by-nc-nd/4.0/88x31.png)](https://creativecommons.org/licenses/by-nc-nd/4.0/) \n", - "©. Published by De Gruyter in cooperation with the University of Luxembourg Centre for Contemporary and Digital History. This is an Open Access article distributed under the terms of the [Creative Commons Attribution License CC-BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)\n" - ] - }, { "cell_type": "code", "execution_count": 6, + "id": "24d512", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false }, + "slideshow": { + "slide_type": "" + }, "tags": [ "cover" ] @@ -89,7 +85,8 @@ ] }, "execution_count": 6, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -101,11 +98,16 @@ }, { "cell_type": "markdown", + "id": "40e496", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false }, + "slideshow": { + "slide_type": "" + }, "tags": [ "keywords" ] @@ -116,21 +118,28 @@ }, { "cell_type": "markdown", + "id": "d15bd9", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false }, + "slideshow": { + "slide_type": "" + }, "tags": [ "abstract" ] }, "source": [ - "This article examines how digital historians are using large language models (LLMs) in their research and teaching, along with the critical and ethical debates surrounding their use. The article first assesses the historical capacities of LLMs as measured by machine learning benchmarks, and how such assessments can help historians understand the capacities and limits of these technologies. The utility of LLMs as digital tools are then demonstrated through a series of case studies using GPT-4 and other generative AI models for oral history transcriptions, correcting optical character recognition (OCR) errors, and metadata extraction. These case studies also demonstrate how frameworks for using LLMs, such as prompt engineering and retrieval augmented generation (RAG), are used to ground LLM outputs for consistency and greater accuracy. Acknowledging the significant ethical challenges posed by LLMs, the article emphasizes the need for critical engagement and the development of responsible frameworks for implementing these technologies in historical scholarship. By combining disciplinary expertise with innovative computational approaches, historians are discovering new ways to navigate the \"unheard-of historical abundance\" of the digital age, contributing to approaches to generative AI that enriches, rather than distorts, our understanding of the past." + "This article examines how digital historians are using large language models (LLMs) in their research and teaching, along with the critical and ethical debates surrounding their use. The article first assesses the historical capacities of LLMs as measured by machine learning benchmarks, and how such assessments can help historians understand the capacities and limits of these technologies. The utility of LLMs as digital tools are then demonstrated through a series of case studies using GPT-4 and other generative AI models for oral history transcriptions, correcting optical character recognition (OCR) errors, and metadata extraction. These case studies also demonstrate how frameworks for using LLMs, such as prompt engineering and retrieval augmented generation (RAG), are used to ground LLM outputs for consistency and greater accuracy. Acknowledging the significant ethical challenges posed by LLMs, the article emphasizes the need for critical engagement and the development of responsible frameworks for implementing these technologies in historical scholarship. By combining disciplinary expertise with innovative computational approaches, historians are discovering new ways to navigate the \"unheard-of historical abundance\" of the digital age, contributing to approaches to generative AI that enriches, rather than distorts, our understanding of the past.\n", + "\n" ] }, { "cell_type": "markdown", + "id": "8f7a3c", "metadata": { "collapsed": false, "jupyter": { @@ -143,6 +152,7 @@ }, { "cell_type": "markdown", + "id": "367c6a", "metadata": { "citation-manager": { "citations": { @@ -180,11 +190,12 @@ "source": [ "In 2003, Roy Rosenzweig predicted that digital historians would need to develop new techniques \"to research, write, and teach in a world of unheard-of historical abundance.\" (Rosenzweig, “Scarcity or Abundance?”) Over the past two decades historians have risen to this challenge, embracing digital mapping, network analysis, distant reading of large text collections, and machine learning as part of their growing methodological toolkit. (Graham, Milligan, and Weingart, Exploring Big Historical Data.) Generative artificial intelligence (AI) has emerged as another potential tool for historians, particularly large language models (LLMs), the most prominent form of this technology. These models possess striking capacities to generate, interpret, and manipulate data across a range of modalities. The rapidly-expanding scope of these capabilities and their limits remain intensely debated, as do their broader social, economic, cultural, and environmental impacts. Yet while still an emerging technology, historians are already demonstrating generative AI's potential as a versatile digital tool. Historians are also contributing to the critical discourse surrounding this new domain, raising key questions about how these models are created, their propensity to reinforce existing inequalities, and their potential to distort our understanding of the past. (Meadows and Sternfeld, “Artificial Intelligence and the Practice of History.”)\n", "\n", - "This article contributes to these debates by demonstrating how digital historians are using generative AI to explore the past and the disciplinary contributions historians can offer in these broader debates concerning generative AI. (Dzieza, “What AI Can Do for Historians.”) We begin by assessing the metrics commonly used to measure the historical knowledge of LLMs, and examine how such metrics can give us insights into the capacities and limits this technology. We then examine how generative AI can be used in tasks as varied as preparing datasets, exploring text collections, and offering novel (and controversial) methods of representing the past. We conclude with a call to historians to contribute to ongoing research and debates concerning the ethical use of generative AI. Given the rapid pace of innovation in this field, it is crucial that the profession addresses the implications of this technology for our research and teaching. Historians will have much to offer in contextualizing these technologies and their potential impacts on society." + "This article contributes to these debates by demonstrating how digital historians are using generative AI to explore the past and the disciplinary contributions historians can offer in these broader debates concerning this technology. (Dzieza, “What AI Can Do for Historians.”) We begin by assessing the metrics commonly used to measure the historical knowledge of LLMs, and examine how such metrics can give us insights into the capacities and limits this technology. We then examine how generative AI can be used in tasks as varied as preparing datasets, exploring text collections, and offering novel (and controversial) methods of representing the past. We conclude with a call to historians to contribute to ongoing research and debates concerning the ethical use of generative AI. Given the rapid pace of innovation in this field, it is crucial that the profession addresses the implications of this technology for our research and teaching. Historians will have much to offer in contextualizing these technologies and their potential impacts on society." ] }, { "cell_type": "markdown", + "id": "a95803", "metadata": { "collapsed": false, "jupyter": { @@ -197,6 +208,7 @@ }, { "cell_type": "markdown", + "id": "28cb81", "metadata": { "citation-manager": { "citations": { @@ -239,9 +251,15 @@ } }, "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + ] }, "source": [ "As historians explore the possibilities of generative AI, it is important to understand how these technologies are created and assessed. With this knowledge we can better evaluate their potential utility and their limits.\n", @@ -251,6 +269,7 @@ }, { "cell_type": "markdown", + "id": "f53f41", "metadata": { "collapsed": false, "jupyter": { @@ -263,6 +282,7 @@ }, { "cell_type": "markdown", + "id": "bc5d7e", "metadata": { "citation-manager": { "citations": { @@ -302,24 +322,31 @@ }, { "cell_type": "markdown", + "id": "875972", "metadata": { "citation-manager": { "citations": { - "pl7am": [ + "4lr4q": [ { - "id": "27937/MVDFMR8K", + "id": "27937/FMW5DCWM", "source": "zotero" } ], - "sj7gk": [ + "e4unb": [ { "id": "27937/EZNK3CE3", "source": "zotero" } ], - "xw5wn": [ + "g0kqk": [ { - "id": "27937/FMW5DCWM", + "id": "27937/MVDFMR8K", + "source": "zotero" + } + ], + "v35u4": [ + { + "id": "27937/78DL3V96", "source": "zotero" } ] @@ -331,13 +358,14 @@ } }, "source": [ - "While such claims have sparked both excitement and alarm, any assessment of LLMs must first be tempered with humility. LLMs are often described as possessing “knowledge” and “understanding,” yet direct engagement with these models can quickly reveal both their remarkable breadth and their narrow limits. Incisive critics of this technology characterize LLMs as “stochastic parrots” that excel at uncanny mimicry of human intelligence. (Bender et al., “On the Dangers of Stochastic Parrots | Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency.”) A form of this mimicry has proven convincing in the past. The first attribution of true artificial intelligence to a computer program occurred in 1966 with a scripted chatbot named ELIZA, developed by AI pioneer Joseph Weizenbaum. (McCorduck, Machines Who Think a Personal Inquiry into the History and Prospects of Artificial Intelligence.) A recent replication of this phenomenon occurred in June 2022 when a Google AI engineer declared the LLM he was training had become sentient. Such attributions will likely increase as newer LLMs demonstrate increasing proficiency in seemingly distinct human qualities, like humor. (Chowdhery et al., “PaLM.”) The means by which LLMs process, interpret, and generate information is a highly technical field requiring specialization in natural language processing, statistics, computational linguistics, and machine learning. While many historians may lack the technical knowledge to effectively evaluate the merits of these debates, when it comes to our own domain we are well equipped to offer informed insights.\n", + "While such claims have sparked both excitement and alarm, any assessment of LLMs must first be tempered with humility. LLMs are often described as possessing “knowledge” and “understanding,” yet direct engagement with these models can quickly reveal both their remarkable breadth and their narrow limits. Incisive critics of this technology characterize LLMs as “stochastic parrots” that excel at uncanny mimicry of human intelligence. (Bender et al., “On the Dangers of Stochastic Parrots | Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency.”) A form of this mimicry has proven convincing in the past. The first attribution of true artificial intelligence to a computer program occurred in 1966 with a scripted chatbot named ELIZA, developed by AI pioneer Joseph Weizenbaum. (McCorduck, Machines Who Think a Personal Inquiry into the History and Prospects of Artificial Intelligence.) A recent replication of this phenomenon occurred in June 2022 when a Google AI engineer declared the LLM he was training had become sentient. Such attributions will likely increase as newer LLMs demonstrate increasing proficiency in seemingly distinct human qualities, like humor. (Chowdhery et al., “PaLM.”) The means by which LLMs process, interpret, and generate information is a highly technical field requiring specialization in natural language processing, statistics, computational linguistics, and machine learning. While many historians may lack the technical knowledge to effectively evaluate the merits of these debates, when it comes to our own domain we are well equipped to offer informed insights.\n", "\n", - "Indeed, the standard measurement for a LLM’s historical knowledge was inadvertently created by historians. One widely-used measure for LLM performance is the Massive Multitask Language Understanding (MMLU) benchmark, developed in 2021 by a researchers led by Dan Hendryks. This benchmark contains nearly 16,000 questions from 57 academic disciplines ranging in difficulty from an elementary educational level to postgraduate curricula in professional domains like law and medicine. History is measured in this benchmark through some six hundred questions taken from the Advanced Placement (A.P.) curricula for U.S., European, and World history. Hundreds of thousands of secondary students across the globe annually enroll in these curricula, which are designed to replicate the rigors of an introductory, university-level history course. The educators who developed and refined these programs likely never imagined their work would serve as a technical benchmark, and the appropriateness of such a standard can be debated. Yet this benchmark, however imperfect, offers historians an accessible means to evaluate this highly technical domain." + "Indeed, the standard measurement for a LLM’s historical knowledge was inadvertently created by historians. One widely-used measure for LLM performance is the Massive Multitask Language Understanding (MMLU) benchmark, developed in 2021 by researchers led by Dan Hendryks. This benchmark contains nearly 16,000 questions from 57 academic disciplines ranging in difficulty from an elementary educational level to postgraduate curricula in professional domains like law and medicine. History is measured in this benchmark through some six hundred questions taken from the Advanced Placement (A.P.) curricula for U.S., European, and World history. Hundreds of thousands of secondary students across the globe annually enroll in these curricula, which are designed to replicate the rigors of an introductory, university-level history course. The educators who developed and refined these programs likely never imagined their work would serve as a technical benchmark, and the appropriateness of such a standard can be debated. (Marshall, “The Strange World of AP U.S. History.”) Yet this benchmark, however imperfect, offers historians an accessible means to evaluate this highly technical domain." ] }, { "cell_type": "markdown", + "id": "dc8ecc", "metadata": { "collapsed": false, "jupyter": { @@ -375,6 +403,7 @@ }, { "cell_type": "markdown", + "id": "a68f2f", "metadata": { "citation-manager": { "citations": { @@ -397,6 +426,7 @@ }, { "cell_type": "markdown", + "id": "2c3d1e", "metadata": { "citation-manager": { "citations": { @@ -426,6 +456,7 @@ }, { "cell_type": "markdown", + "id": "d3d25f", "metadata": { "collapsed": false, "jupyter": { @@ -439,6 +470,7 @@ { "cell_type": "code", "execution_count": 1, + "id": "2d13a5", "metadata": { "collapsed": false, "editable": true, @@ -448,7 +480,9 @@ "slideshow": { "slide_type": "" }, - "tags": [] + "tags": [ + "table-1-*" + ] }, "outputs": [ { @@ -461,7 +495,8 @@ ] }, "execution_count": 1, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -469,13 +504,13 @@ "from IPython.display import Image\n", "from IPython.display import display\n", "\n", - "# Load the image from the article GitHub URL\n", "table_1_url = 'https://raw.githubusercontent.com/Dr-Hutchinson/jdh_submission/main/media/Table%201%20-%20MMLU%20Benchmark%20Performance.png'\n", "display(Image(url=table_1_url, width=850))" ] }, { "cell_type": "markdown", + "id": "75a019", "metadata": { "citation-manager": { "citations": { @@ -511,6 +546,7 @@ }, { "cell_type": "markdown", + "id": "1f8c89", "metadata": { "citation-manager": { "citations": { @@ -625,9 +661,15 @@ } }, "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + ] }, "source": [ "Rapid improvement on this benchmark have been made in just a few years, with a variety of commercial and open-source LLMs now demonstrating expert-level accuracy on all three of the subject exams. These findings mirror the striking performance of models like GPT-4 in other knowledge domains such as medical school curricula (Nori et al., “Capabilities of GPT-4 on Medical Challenge Problems.”), American bar exams, (Katz, “GPT Takes the Bar Exam.”), and a host of other standardized assessments. (OpenAI, “GPT-4 Technical Report.”)\n", @@ -648,11 +690,19 @@ { "cell_type": "code", "execution_count": 2, + "id": "3fe86a", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "figure-hayseed-*" + ] }, "outputs": [ { @@ -665,7 +715,8 @@ ] }, "execution_count": 2, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -687,7 +738,8 @@ ] }, "execution_count": 2, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -718,6 +770,7 @@ { "cell_type": "code", "execution_count": 8, + "id": "ea08b7", "metadata": { "collapsed": false, "jupyter": { @@ -810,7 +863,8 @@ ] }, "execution_count": 8, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -843,6 +897,7 @@ { "cell_type": "code", "execution_count": 9, + "id": "9df000", "metadata": { "collapsed": false, "jupyter": { @@ -907,24 +962,27 @@ { "cell_type": "code", "execution_count": 3, + "id": "ab0b4b", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [], + "outputs": [ + ], "source": [ "# Enter OpenAI API key in the space below.\n", "# Access to OpenAI's API keys can be found here: https://beta.openai.com/signup\n", "\n", "import os\n", - "os.environ[\"OPENAI_API_KEY\"] = \" \"" + "os.environ[\"OPENAI_API_KEY\"] = \"\"" ] }, { "cell_type": "code", "execution_count": 10, + "id": "062e99", "metadata": { "collapsed": false, "jupyter": { @@ -1063,7 +1121,8 @@ ] }, "execution_count": 10, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -1103,6 +1162,7 @@ }, { "cell_type": "markdown", + "id": "bc375b", "metadata": { "citation-manager": { "citations": { @@ -1127,6 +1187,7 @@ }, { "cell_type": "markdown", + "id": "092e7b", "metadata": { "collapsed": false, "jupyter": { @@ -1139,6 +1200,7 @@ }, { "cell_type": "markdown", + "id": "3ce67d", "metadata": { "citation-manager": { "citations": { @@ -1167,6 +1229,7 @@ }, { "cell_type": "markdown", + "id": "1e318f", "metadata": { "collapsed": false, "jupyter": { @@ -1179,6 +1242,7 @@ }, { "cell_type": "markdown", + "id": "f20e37", "metadata": { "citation-manager": { "citations": { @@ -1209,6 +1273,7 @@ }, { "cell_type": "markdown", + "id": "643ba2", "metadata": { "citation-manager": { "citations": { @@ -1241,6 +1306,7 @@ { "cell_type": "code", "execution_count": 11, + "id": "8b3e4c", "metadata": { "collapsed": false, "jupyter": { @@ -1299,11 +1365,14 @@ ] }, "execution_count": 11, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], "source": [ + "# Code for transcribing oral history segment with Whisper API\n", + "\n", "import requests\n", "from openai import OpenAI\n", "import time\n", @@ -1341,11 +1410,11 @@ "# Calculate the actual transcription time\n", "automation_time = end_time - start_time\n", "\n", - "# Calculate the estimated transcription time for 1 hour (3600 seconds) based on the transcription time for audio segment\n", + "# Calculate the estimated transcription time for 1 hour based on the transcription time for audio segment\n", "audio_length_seconds = 153 # 2 minutes and 33 seconds in seconds\n", "estimated_time_for_one_hour = (automation_time / audio_length_seconds) * 3600 # Time for 1 hour (3600 seconds)\n", "\n", - "# Convert estimated time for better readability (hours, minutes, and seconds)\n", + "# Convert estimated time for better readability\n", "hours = int(estimated_time_for_one_hour // 3600)\n", "minutes = int((estimated_time_for_one_hour % 3600) // 60)\n", "seconds = int(estimated_time_for_one_hour % 60)\n", @@ -1367,6 +1436,7 @@ }, { "cell_type": "markdown", + "id": "c666d9", "metadata": { "collapsed": false, "jupyter": { @@ -1379,12 +1449,20 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, + "id": "f5449d", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "sound-franklin-*" + ] }, "outputs": [ { @@ -1403,33 +1481,38 @@ "\u001b[2;4;94mhttps://docsouth.unc.edu/sohp/A-0339/menu.html\u001b[0m\n" ] }, - "execution_count": 12, - "metadata": {}, + "execution_count": 5, + "metadata": { + }, "output_type": "execute_result" }, { "data": { - "text/html": [ - "\n", - " \n", - " " - ], + "text/html": "\n \n ", "text/plain": [ "" ] }, - "execution_count": 12, - "metadata": {}, + "execution_count": 5, + "metadata": { + }, "output_type": "execute_result" } ], "source": [ "from IPython.display import Audio\n", + "from rich.console import Console\n", + "\n", + "# Initialize the console for rich output\n", + "console = Console()\n", + "\n", + "# URL for the audio file on GitHub\n", + "audio_url = \"https://github.com/Dr-Hutchinson/jdh_submission/raw/refs/heads/main/media/A-0339_edited.mp3\"\n", + "\n", + "# Save location for the downloaded audio file\n", + "file_path = \"./A-0339_edited.mp3\"\n", "\n", - "# Add your citation text\n", + "# Displaying citation\n", "citation_text = (\n", " \"[bold]Citation:[/bold]\\n\"\n", " \"[italic]“John Hope Franklin and John Egerton, Conducted by Oral History Interview with John Hope Franklin, \"\n", @@ -1440,14 +1523,13 @@ "# Print the citation with console output\n", "console.print(citation_text, width=console.size.width)\n", "\n", - "\n", "# Load and play the saved audio file\n", - "Audio(file_path)\n", - "\n" + "Audio(file_path)" ] }, { "cell_type": "markdown", + "id": "3cc184", "metadata": { "collapsed": false, "jupyter": { @@ -1455,7 +1537,7 @@ } }, "source": [ - "Based on the professional standard, this transcription would take approximately fifteen to twenty minutes to manually transcribe. Whisper achieved this in less than ten seconds.\n", + "Based on the professional standard, this excerpt would take approximately fifteen to twenty minutes to manually transcribe. Whisper achieved this in less than ten seconds.\n", "\n", "How accurate is the model compared to a human-produced transcript? Due to the stochastic nature of these models, each time you run this code slightly different variations might occur, particularly in the most challenging segments. The code block below visualizes a sample transcription produced by Whisper that was annotated and compared against the original. Notable omissions and discrepancies are highlighted. Whisper’s accuracy is then calculated via a standard benchmark for audio transcription, the word error rate (WER)." ] @@ -1463,11 +1545,18 @@ { "cell_type": "code", "execution_count": 6, + "id": "5fd91f", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + ] }, "outputs": [ { @@ -1494,7 +1583,8 @@ ] }, "execution_count": 6, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -1562,6 +1652,7 @@ }, { "cell_type": "markdown", + "id": "67db38", "metadata": { "citation-manager": { "citations": { @@ -1606,6 +1697,7 @@ }, { "cell_type": "markdown", + "id": "391dcd", "metadata": { "collapsed": false, "jupyter": { @@ -1618,6 +1710,7 @@ }, { "cell_type": "markdown", + "id": "95f2b3", "metadata": { "citation-manager": { "citations": { @@ -1649,11 +1742,19 @@ { "cell_type": "code", "execution_count": 22, + "id": "2421d4", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "figure-lotse-6-30-1945-*" + ] }, "outputs": [ { @@ -1664,7 +1765,8 @@ ] }, "execution_count": 22, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -1682,7 +1784,8 @@ ] }, "execution_count": 22, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -1705,9 +1808,9 @@ "# Resize the image for better visualization\n", "resized_image = image2.resize((new_width, new_height), Image.LANCZOS)\n", "\n", - "# Convert the PIL image to a format compatible with IPython display to avoid the \"Result\" tag\n", - "resized_image.save(\"/tmp/resized_image.png\") # Temporarily save the image\n", - "display(IPImage(filename=\"/tmp/resized_image.png\")) # Display using IPImage to avoid the Result tag\n", + "# Convert the PIL image to a format compatible with IPython display \n", + "resized_image.save(\"/tmp/resized_image.png\") \n", + "display(IPImage(filename=\"/tmp/resized_image.png\")) \n", "\n", "# Format the citation text using rich\n", "citation_text = (\n", @@ -1721,6 +1824,7 @@ }, { "cell_type": "markdown", + "id": "c008c5", "metadata": { "citation-manager": { "citations": { @@ -1743,6 +1847,7 @@ }, { "cell_type": "markdown", + "id": "434058", "metadata": { "citation-manager": { "citations": { @@ -1785,6 +1890,7 @@ { "cell_type": "code", "execution_count": 5, + "id": "5fe8ca", "metadata": { "collapsed": false, "jupyter": { @@ -1813,7 +1919,8 @@ ] }, "execution_count": 5, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -1826,7 +1933,8 @@ ] }, "execution_count": 5, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -1850,7 +1958,8 @@ ] }, "execution_count": 5, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -1863,7 +1972,8 @@ ] }, "execution_count": 5, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -1987,6 +2097,7 @@ }, { "cell_type": "markdown", + "id": "7571cd", "metadata": { "collapsed": false, "jupyter": { @@ -1994,7 +2105,7 @@ } }, "source": [ - "While the image quality is satisfactory and the text is printed using modern typefaces, the OCR still generates errors requiring human correction. Correcting even minor errors necessitates review, representing significant labor when processing a sizable text corpus. The LLM accelerates that task in this case by correcting OCR errors ahead of human review, particularly when guided by detailed instructions and a few examples tailored to the dataset or OCR task.\n", + "While the image quality is satisfactory and the text is printed using modern typefaces, the OCR scan still generates errors requiring human correction. Correcting even minor errors necessitates review, representing significant labor when processing a sizable text corpus. The LLM accelerates that task in this case by correcting OCR errors ahead of human review, particularly when guided by detailed instructions and a few examples tailored to the dataset or OCR task.\n", "\n", "However, there are limits to this prompt engineering technique. Accuracy falls for both OCR models and LLMs alike when processing images containing considerable ‘noise’ and distortion, as in the image below. But recent LLMs like GPT-4 have been trained on multi-modal data, allowing them to process images as well as text. In the code below, GPT-4 is fed a specialized [prompt](https://raw.githubusercontent.com/Dr-Hutchinson/jdh_submission/refs/heads/main/media/prompts/gpt_vision_prompt.txt), a [few examples](https://raw.githubusercontent.com/Dr-Hutchinson/jdh_submission/refs/heads/main/media/prompts/vision_few_shot.txt), the raw OCR output, as well as the original image to help guide the model in correcting OCR errors." ] @@ -2002,11 +2113,19 @@ { "cell_type": "code", "execution_count": 20, + "id": "2bd1c3", "metadata": { "collapsed": false, + "editable": true, "jupyter": { "outputs_hidden": false - } + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "figure-lotse-3-15-1945-*" + ] }, "outputs": [ { @@ -2017,7 +2136,8 @@ ] }, "execution_count": 20, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -2035,7 +2155,8 @@ ] }, "execution_count": 20, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -2059,8 +2180,8 @@ "resized_image = image2.resize((new_width, new_height), Image.LANCZOS)\n", "\n", "# Prepare the image for IPython display\n", - "resized_image.save(\"/tmp/resized_image.png\") # Temporarily save the image\n", - "display(IPImage(filename=\"/tmp/resized_image.png\")) # Display using IPImage to avoid the Result tag\n", + "resized_image.save(\"/tmp/resized_image.png\") \n", + "display(IPImage(filename=\"/tmp/resized_image.png\")) \n", "\n", "# Create formatted citation text with rich\n", "citation_text = (\n", @@ -2075,6 +2196,7 @@ { "cell_type": "code", "execution_count": 41, + "id": "c47057", "metadata": { "collapsed": false, "jupyter": { @@ -2103,7 +2225,8 @@ ] }, "execution_count": 41, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -2116,7 +2239,8 @@ ] }, "execution_count": 41, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -2140,7 +2264,8 @@ ] }, "execution_count": 41, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -2153,7 +2278,8 @@ ] }, "execution_count": 41, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -2167,7 +2293,7 @@ "# Function to fetch text content from a URL\n", "def fetch_text_from_url(url):\n", " response = requests.get(url)\n", - " response.raise_for_status() # Ensure the request was successful\n", + " response.raise_for_status() \n", " return response.text.strip()\n", "\n", "# Function to encode the image to base64 for API call\n", @@ -2232,8 +2358,8 @@ "}\n", "\n", "# Prepare the payload and get GPT-4 OCR output for image\n", - "image_filename = os.path.basename(file_urls[\"image_2\"]) # Get the image filename for reference\n", - "payload = prepare_payload(encoded_image, gpt_vision_prompt, ocr_output, image_filename) # Pass filename to prompt\n", + "image_filename = os.path.basename(file_urls[\"image_2\"]) \n", + "payload = prepare_payload(encoded_image, gpt_vision_prompt, ocr_output, image_filename) \n", "gpt4_ocr_output = get_gpt4_ocr_output(payload)\n", "\n", "# Run comparisons for image_2\n", @@ -2257,6 +2383,7 @@ }, { "cell_type": "markdown", + "id": "7441f7", "metadata": { "citation-manager": { "citations": { @@ -2293,6 +2420,7 @@ }, { "cell_type": "markdown", + "id": "05ad3b", "metadata": { "collapsed": false, "jupyter": { @@ -2305,6 +2433,7 @@ }, { "cell_type": "markdown", + "id": "ccca49", "metadata": { "citation-manager": { "citations": { @@ -2336,6 +2465,7 @@ { "cell_type": "code", "execution_count": 7, + "id": "98d530", "metadata": { "citation-manager": { "citations": { @@ -2524,7 +2654,8 @@ ] }, "execution_count": 7, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" }, { @@ -2660,7 +2791,8 @@ ] }, "execution_count": 7, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -2719,6 +2851,7 @@ }, { "cell_type": "markdown", + "id": "75cfbb", "metadata": { "citation-manager": { "citations": { @@ -2761,6 +2894,7 @@ }, { "cell_type": "markdown", + "id": "b60cb2", "metadata": { "collapsed": false, "jupyter": { @@ -2773,6 +2907,7 @@ }, { "cell_type": "markdown", + "id": "4aecb6", "metadata": { "citation-manager": { "citations": { @@ -2819,6 +2954,7 @@ { "cell_type": "code", "execution_count": 18, + "id": "01120b", "metadata": { "collapsed": false, "jupyter": { @@ -2855,7 +2991,8 @@ ] }, "execution_count": 18, - "metadata": {}, + "metadata": { + }, "output_type": "execute_result" } ], @@ -2893,6 +3030,7 @@ }, { "cell_type": "markdown", + "id": "9a1143", "metadata": { "collapsed": false, "jupyter": { @@ -2905,6 +3043,7 @@ }, { "cell_type": "markdown", + "id": "ba9daa", "metadata": { "collapsed": false, "jupyter": { @@ -2929,6 +3068,7 @@ }, { "cell_type": "markdown", + "id": "b8b14f", "metadata": { "citation-manager": { "citations": { @@ -2975,6 +3115,7 @@ }, { "cell_type": "markdown", + "id": "541c38", "metadata": { "collapsed": false, "jupyter": { @@ -2982,11 +3123,12 @@ } }, "source": [ - "# The Past as Latent Space - Exploring New Frontiers in Digital History" + "# The Past as Latent Space: Exploring New Frontiers in Digital History" ] }, { "cell_type": "markdown", + "id": "ed57cf", "metadata": { "citation-manager": { "citations": { @@ -3070,7 +3212,7 @@ } }, "source": [ - "These case studies demonstrate how LLMs can be prompted to assist digital historians in familiar tasks—transcribing and correcting text, extracting data, even finding connections within archival collections. Yet, it’s often in unexpected domains where LLMs reveal their most intriguing and ethically complex implications. As Ted Underwood argues, these technologies enable us to explore the “latent spaces” of culture - the underlying patterns, assumptions, and potential meanings embedded within the vast datasets that shape our understanding of the past. (Underwood, “Mapping the Latent Spaces of Culture.”) As historians increasingly integrate LLMs into their work, they are discovering that generative AI is not simply streamlining old practices; it is also empowering entirely new forms of historical representation, for both good and ill.\n", + "These case studies demonstrate how LLMs can be prompted to assist digital historians in familiar tasks - transcribing and correcting text, extracting data, even finding connections within archival collections. Yet, it’s often in unexpected domains where LLMs reveal their most intriguing and ethically complex implications. As Ted Underwood argues, these technologies enable us to explore the “latent spaces” of culture - the underlying patterns, assumptions, and potential meanings embedded within the vast datasets that shape our understanding of the past. (Underwood, “Mapping the Latent Spaces of Culture.”) As historians increasingly integrate LLMs into their work, they are discovering that generative AI is not simply streamlining old practices; it is also empowering entirely new forms of historical representation, for both good and ill.\n", "\n", "One area where this technology has been felt is in the classroom. For many instructors, their first encounter with generative AI involves suspicion: are student submissions genuine or AI-produced? This concern, compounded by the frustrating unreliability of current LLM-detection tools, has led many to “AI-proof” their assignments and even some to declare that “the essay is dead.” (Marche, “Will ChatGPT Kill the Student Essay? - The Atlantic.”) Collective responses to the impact of this technology are emerging from organizations like the MLA-CCCC’s Working Group on Writing and AI. (MLA-CCCC Joint Task Force on Writing and AI, “Using the Student Guide to AI Literacy – MLA-CCCC Joint Task Force on Writing and AI.”) Historians are joining these efforts in crafting disciplinary responses to generative AI. (Meadows and Sternfeld, “Artificial Intelligence and the Practice of History.”)\n", "\n", @@ -3078,13 +3220,14 @@ "\n", "While creative pedagogies offer exciting possibilities for using and critiquing LLMs in the classroom, the ethical dilemmas posed by these technologies extend far beyond educational contexts, particularly when it comes to representing historical figures. One notable niche among generative AI services are programmable chatbot ‘personalities,’ such as prominent personalities from the past. Following the release of ChatGPT in November 2022, dialogues with AI-impersonations of iconic and controversial figures were widely shared over social media. App developers quickly tapped into what they perceived as an engaging approach to representing the past. However, these apps did little to address the problems of LLM “hallucinations,” nor did they take into account the impact of training biases in LLM “knowledge.” Users soon reported disturbing conversations with both humanity’s greatest luminaries and its greatest villains. (“Chatbot That Lets You Talk to Jesus and Hitler Is Latest AI Controversy.”) The ability of these apps to “bring history to life” soon gave way to an appreciation that perhaps some parts of the past are better off dead.\n", "\n", - "Such concerns are particularly important as such resurrection can now occur at scale. Researchers are exploring how LLMs can be prompted to emulate human behaviors and serve as proxies for human research subjects in fields as varied as psychology (Cui, Li, and Zhou, “Can AI Replace Human Subjects?”), behavioral economics (Xie et al., “Can Large Language Model Agents Simulate Human Trust Behaviors?”), and public health. (Stade et al., “Large Language Models Could Change the Future of Behavioral Healthcare.”) A compelling study by Stanford researchers in 2023 demonstrates how such emulation could be scaled to simulate entire communities. In their paper “Generative Agents: Interactive Simulacra of Human Behavior,” researchers created a framework to generate a small town populated with residents possessing programmed behaviors and beliefs. LLMs then guided each individual resident as they completed daily tasks and encountered the world around them. As time progressed the inhabitants interacted with each other, reflected on their encounters, and updated their “memories” based on their experiences. Researchers observing the LLM-powered “agents” noted the emergence of spontaneous actions and autonomous planning, leading to unexpected but plausible collective behaviors. (Park et al., “Generative Agents.”) Such paradigms offer potential methods to model historical behaviors at scale and perhaps enable simulation as a new empirical approach. However, historians of earlier forms of computational modeling of human behavior would be quick to remind us of the abuses and errors from previous generations of “artificial intelligence.” (Lepore, If Then.)\n", + "Such concerns are particularly important as such resurrection can now occur at scale. Researchers are exploring how LLMs can be prompted to emulate human behaviors and serve as proxies for human research subjects in fields as varied as psychology (Cui, Li, and Zhou, “Can AI Replace Human Subjects?”), behavioral economics (Xie et al., “Can Large Language Model Agents Simulate Human Trust Behaviors?”), and public health. (Stade et al., “Large Language Models Could Change the Future of Behavioral Healthcare.”) A compelling study by Stanford researchers in 2023 demonstrates how such emulation could be scaled to simulate entire communities. In their paper “Generative Agents: Interactive Simulacra of Human Behavior,” researchers created a framework to generate a small town populated with residents possessing programmed behaviors and beliefs. LLMs then guided each individual resident as they completed daily tasks and encountered the world around them. As time progressed the inhabitants interacted with each other, reflected on their encounters, and updated their “memories” based on their experiences. Researchers observing the LLM-powered “agents” noted the emergence of spontaneous actions and autonomous planning, leading to unexpected but plausible collective behaviors. (Park et al., “Generative Agents.”) Such paradigms offer potential methods to model historical behaviors at scale and perhaps enable simulation as a new empirical approach. However, historians of earlier forms of computational modeling of human behavior would be quick to remind us of the abuses and errors from previous generations of “artificial intelligence” and warn us of the risks of such approaches. (Lepore, If Then.)\n", "\n", - "Indeed, as LLMs increasingly become part of our digital lives, historians should move beyond simply using these technologies to using their scholarly expertise to shape their development and implementation. We have a distinctive perspective to share. LLMs are not just tools; they are also historical sources. And like every source they are flawed, anchored in their time and place, and influenced by a particular and often distorted view of the world. But within these limitations lies real potential to apply the vast historical data on which they are trained. Historians are already using LLMs to explore the “latent space” of the past, and offering pointed critiques on how to contextualize the significance of this technology. By approaching LLMs critically, ethically, and collaboratively, digital historians are contributing to Roy Rosenzweig’s vision of navigating the “unheard-of historical abundance” of the digital age in a manner that ensures that these technologies deepen our understanding of the past, and not distort it." + "Indeed, as LLMs increasingly become part of our digital lives, historians should move beyond simply using these technologies to using their scholarly expertise to shape the debates concerning their development and implementation. We have a distinctive perspective to share. LLMs are not just tools; they are also historical sources. And like every source they are flawed, anchored in their time and place, and influenced by a particular and often distorted view of the world. But within these limitations lies real potential to apply the vast historical data on which they are trained. Historians are already using LLMs to explore the “latent space” of the past, and offering pointed critiques on how to contextualize the significance of this technology. By approaching LLMs critically, ethically, and collaboratively, digital historians are contributing to Roy Rosenzweig’s vision of navigating the “unheard-of historical abundance” of the digital age in a manner that ensures that these technologies deepen our understanding of the past, and not distort it." ] }, { "cell_type": "markdown", + "id": "c2580e", "metadata": { "collapsed": false, "editable": true, @@ -3094,7 +3237,8 @@ "slideshow": { "slide_type": "" }, - "tags": [] + "tags": [ + ] }, "source": [ "I am grateful to Abraham Gibson for extending an invitation to present the preliminary research findings of this article with the Digital History Working Group in May 2022, organized by the Consortium For History of Science, Technology, and Medicine. I would also like to express my appreciation to my colleagues William Mattingly, Patrick Wadden, and Ian Crowe for their insightful commentary on the article, and to the editorial staff and reviewers for the Journal of Digital History. This article was facilitated by a sabbatical semester generously granted by the Office of Academic Affairs at Belmont Abbey College. My thanks to Provost Travis Feezell and Vice Provost David Williams for their support of this project." @@ -3102,6 +3246,7 @@ }, { "cell_type": "markdown", + "id": "a8abe1", "metadata": { "collapsed": false, "jupyter": { @@ -3113,24 +3258,14 @@ ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", + "id": "114f9c", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid character '“' (U+201C) (2626943952.py, line 3)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"/tmp/ipykernel_436/2626943952.py\"\u001b[0;36m, line \u001b[0;32m3\u001b[0m\n\u001b[0;31m
Abdin, Marah, Jyoti Aneja, Hany Awadalla, Ahmed Awadallah, Ammar Ahmad Awan, Nguyen Bach, Amit Bahree, et al. “Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone.” arXiv, August 30, 2024. http://arxiv.org/abs/2404.14219.
\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character '“' (U+201C)\n" - ] - } - ], "source": [ "\n", "
\n", @@ -3175,6 +3310,7 @@ "
Lewis, Patrick, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, et al. “Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks.” arXiv, April 12, 2021. https://doi.org/10.48550/arXiv.2005.11401.
\n", "
Mai, Yifan, and Percy Liang. “Massive Multitask Language Understanding (MMLU) on HELM.” Blog. Center for Research on Foundation Models, Stanford University, May 1, 2024. https://crfm.stanford.edu/2024/05/01/helm-mmlu.html.
\n", "
Marche, Stephen. “Will ChatGPT Kill the Student Essay? - The Atlantic.” The Atlantic, December 6, 2022. https://www.theatlantic.com/technology/archive/2022/12/chatgpt-ai-writing-college-student-essays/672371/.
\n", + "
Marshall, Lindsay. “The Strange World of AP U.S. History.” CONTINGENT, October 20, 2020. https://contingentmagazine.org/2020/10/20/apush/.
\n", "
McCorduck, Pamela. Machines Who Think a Personal Inquiry into the History and Prospects of Artificial Intelligence. 25th anniversary update. Natick, Mass.: A.K. Peters, 2004. http://site.ebrary.com/id/10158052.
\n", "
McMahon, Liv, and Zoe Kleinman. “Google AI Search Tells Users to Glue Pizza and Eat Rocks.” BBC News. Accessed October 16, 2024. https://www.bbc.com/news/articles/cd11gzejgz4o.
\n", "
Meadows, R. Darrell, and Joshua Sternfeld. “Artificial Intelligence and the Practice of History.” The American Historical Review 128, no. 3 (September 26, 2023): 1345–49. https://doi.org/10.1093/ahr/rhad362.
\n", @@ -3212,18 +3348,6 @@ "
\n", "" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -4204,6 +4328,40 @@ "title": "Using Named Entity Recognition to Enhance Access to a Museum Catalog – Document Blog", "type": "post-weblog" }, + "27937/78DL3V96": { + "URL": "https://contingentmagazine.org/2020/10/20/apush/", + "abstract": "Born out of the Cold War, the course has a great contradiction at its heart: why do we teach history?", + "accessed": { + "date-parts": [ + [ + 2024, + 11, + 19 + ] + ] + }, + "author": [ + { + "family": "Marshall", + "given": "Lindsay" + } + ], + "container-title": "CONTINGENT", + "id": "27937/78DL3V96", + "issued": { + "date-parts": [ + [ + 2020, + 10, + 20 + ] + ] + }, + "language": "en-US", + "system_id": "zotero|27937/78DL3V96", + "title": "The Strange World of AP U.S. History", + "type": "webpage" + }, "27937/7D6BEHLB": { "URL": "https://programminghistorian.org/en/lessons/understanding-creating-word-embeddings", "accessed": { @@ -8083,9 +8241,30 @@ "style": "chicago-note-bibliography.csl" }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "argv": [ + "/usr/bin/python3", + "-m", + "ipykernel", + "--HistoryManager.enabled=False", + "--matplotlib=inline", + "-c", + "%config InlineBackend.figure_formats = set(['retina'])\nimport matplotlib; matplotlib.rcParams['figure.figsize'] = (12, 7)", + "-f", + "{connection_file}" + ], + "display_name": "Python 3 (system-wide)", + "env": { + }, "language": "python", - "name": "python3" + "metadata": { + "cocalc": { + "description": "Python 3 programming language", + "priority": 100, + "url": "https://www.python.org/" + } + }, + "name": "python3", + "resource_dir": "/ext/jupyter/kernels/python3" }, "language_info": { "codemirror_mode": { @@ -8097,22 +8276,24 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.12" }, "toc": { "base_numbering": 1, - "nav_menu": {}, + "nav_menu": { + }, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + }, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file