diff --git a/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.pdf b/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..615e109acaf0ceb55e65dff689c805e3f74e60da Binary files /dev/null and b/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.pdf differ diff --git a/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.xml b/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.xml new file mode 100644 index 0000000000000000000000000000000000000000..ebdc3d1924d43f912a404f6b77db2171666f7121 --- /dev/null +++ b/2022_012_lehmann_et_al_v1_1/klassifikation_2022_v1_1.xml @@ -0,0 +1,2445 @@ +<?xml version="1.0" encoding="utf-8"?> +<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:html="http://www.w3.org/1999/html" + xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:xhtml="http://www.w3.org/1999/xhtml"> + <teiHeader> + <fileDesc> + <titleStmt> + <title> + <biblStruct> + <analytic> + <title level="a">Classification of Tragedies and Comedies in Calderón de la + Barca’s Comedias Nuevas</title> + <respStmt> + <resp> + <persName> + <name role="marc_aut"> + <forename>Jörg</forename> + <surname>Lehmann</surname> + </name> + <email>joerg.lehmann@sbb.spk-berlin.de</email> + <idno type="gnd">1054732310</idno> + <idno type="orcid">0000-0003-1334-9693</idno> + </persName> + </resp> + <orgName>Eberhard Karls Universität Tübingen</orgName> + </respStmt> + <respStmt> + <resp> + <persName> + <name role="marc_aut"> + <forename>Sebastian</forename> + <surname>Padó</surname> + </name> + <email>pado@ims.uni-stuttgart.de</email> + <idno type="gnd">1033924393</idno> + <idno type="orcid">0000-0002-7529-6825</idno> + </persName> + </resp> + <orgName>Universität Stuttgart, Institut für Maschinelle + Sprachverarbeitung</orgName> + </respStmt> + <idno type="doi">10.17175/2022_012</idno> + <idno type="ppn">181820763X</idno> + <idno type="zfdg">2022.012</idno> + <idno type="url">https://www.zfdg.de/node/363</idno> + <date when="2022-12-29">29.12.2022</date> + </analytic> + <monogr> + <title level="j">Zeitschrift für digitale Geisteswissenschaften</title> + <respStmt> + <resp>Publiziert von</resp> + <orgName role="marc_pbl">Herzog August Bibliothek</orgName> + </respStmt> + <respStmt> + <resp>Transformation der Word Vorlage nach TEI</resp> + <persName/> + <name role="marc_trc"> + <surname>Baumgarten</surname> + <forename>Marcus</forename> + <idno type="gnd">1192832655</idno> + </name> + </respStmt> + <availability status="free"> + <p>Available at <ref target="https://www.zfdg.de">https://www.zfdg.de</ref></p> + </availability> + <imprint> + <biblScope unit="year">2022</biblScope> + <biblScope unit="artikel">12</biblScope> + </imprint> + </monogr> + </biblStruct> + </title> + </titleStmt> + <editionStmt> + <edition>Elektronische Ausgabe nach TEI P5</edition> + </editionStmt> + <publicationStmt> + <distributor> + <name> + <orgName>Herzog August Bibliothek Wolfenbüttel</orgName> + </name> + </distributor> + <idno type="doi">10.17175/zfdg.01</idno> + <idno type="ppn">0819494402</idno> + <authority> + <name>Herzog August Bibliothek</name> + <address> + <addrLine>Lessingplatz 1</addrLine> + <addrLine>38304 Wolfenbüttel</addrLine> + </address> + </authority> + <authority> + <name>Forschungsverbund Marbach Weimar Wolfenbüttel</name> + <address> + <addrLine>Burgplatz 4</addrLine> + <addrLine>99423 Weimar </addrLine> + </address> + </authority> + <availability status="free"> + <p> Sofern nicht anders angegeben </p> + <licence target="http://creativecommons.org/licenses/by/4.0/">CC BY SA 4.0</licence> + </availability> + <availability status="free"> + <p> Available at <ref target="workID">https://www.zfdg.de; (c) Forschungsverbund + MWW</ref> + </p> + </availability> + </publicationStmt> + <sourceDesc> + <p>Einreichung als Fachartikel in der ZfdG durch die Autor*innen</p> + </sourceDesc> + </fileDesc> + <encodingDesc> + <editorialDecl> + <p>Medienrechte liegen bei den Autor*innen</p> + <p>Transformation der WORD-Vorlage nach XML/TEI-P5 durch TEI-Oxgarage und + XSLT-Skripten</p> + <p xml:lang="de">Lektorat des Textes durch die Redaktion in Person von <persName>Martin de la Iglesia</persName>.</p> + <p>All links checked<date when="2022-11-24">24.11.2022</date></p> + </editorialDecl> + </encodingDesc> + <profileDesc> + <creation>Einreichung als Artikel der Zeitschrift für digitale + Geisteswissenschaften</creation> + <langUsage> + <language ident="en">Text in Englisch</language> + <language ident="de">Abstract in Deutsch</language> + <language ident="en">Abstract in Englisch</language> + </langUsage> + <textClass> + <keywords scheme="gnd"> + <term>Drama<ref target="4012899-4"/></term> + <term>Klassifikation<ref target="4030958-7"/></term> + <term>Cluster-Analyse<ref target="4070044-6"/></term> + <term>Siglo de oro<ref target="4181251-7"/></term> + <term>Calderón de la Barca, Pedro *1600-1681*<ref target="118518399"/></term> + <term>Hispanistik<ref target="4159974-3"/></term> + </keywords> + </textClass> + </profileDesc> + <revisionDesc> + <change when="2023-04-20" who="iglesia" n="1.1" status="published"><p>The following changes were made: addition in paragraph 1 in response to the reviews. E-mail address of Jörg Lehmann updated.</p></change> + </revisionDesc> + </teiHeader> + <text> + <body> + <div> + <div type="abstract"> + <argument xml:lang="en"> + <p>In this study, we aim at distinguishing comedies and tragedies among 112 dramas + written by Calderón de la Barca, using procedures established by distributional + semantics. 15 each of these <term type="dh">comedias nuevas</term> have already + been classified by qualitative researchers as either tragedies or comedies, + respectively; for another 82 dramas the classification was unknown. Four + independent <term type="dh">document embedding</term> methods are explored, which differ from each + other in matrix creation and reduction, and in the calculation of similarity or + distance matrices. The best results – measured against the pre-established + classification of these dramas – are obtained through the classification + procedure that applied the strongest matrix reduction. In addition, a + contrastive vocabulary analysis with <term type="dh">word embeddings</term> is carried out, based + either on word lists produced by the four tested methods, or on the <term + type="dh">log-likelihood </term>probability distribution for two sub-corpora + containing only dramas already determined to be comedies or tragedies. This + step permits the identification of 130 terms that are each discriminative + either of comedies or of tragedies. The outcome shows that the explored methods + identify tragedies with greater accuracy than comedies, indicating that + tragedies show stronger lexical cohesion. It also becomes apparent that one + could more appropriately consider classifications such as ›tragedy‹ and + ›comedy‹ as poles between which gradual differences can be observed, whereby + the ensuing transitional area contains <hi rend="italic">comedias nuevas + </hi>that have been described in prior research as <term type="dh" + >tragicomedias</term> or <term type="dh">comedias mitológicas</term>.</p> + </argument> + <argument xml:lang="de"> + <p>In dieser Studie klassifizieren wir Komödien und Tragödien in einem Korpus von + 112 Dramen Calderón de la Barcas, wobei wir Verfahren der distributionellen + Semantik anwenden. Je 15 dieser <term type="dh">comedias nuevas</term> sind + bereits von qualitativen Forscher*innen als Tragödien bzw. Komödien + klassifiziert worden; bei weiteren 82 Dramen war die Klassifikation unbekannt. + Es werden vier unüberwachte <term type="dh">document embedding</term>-Verfahren + eingesetzt, die sich durch Matrixerstellung und -reduktion sowie durch die + Berechnung von Ähnlichkeits- oder Distanzmatrizen voneinander unterscheiden. + Die besten Ergebnisse – gemessen gegenüber der vorab vorgenommenen + Klassifikation dieser Dramen – erzielt dabei jenes Klassifikationsverfahren, + bei dem die stärkste Matrixreduktion vorgenommen wurde. Darüber hinaus wird + eine kontrastive Vokabularanalyse mit <term type="dh">word embeddings</term> + durchgeführt. Diese basiert entweder auf den Wortlisten der vier erprobten + Verfahren oder auf der <term type="dh" + >Log-Likelihood</term>-Wahrscheinlichkeitsverteilung für zwei Subkorpora, die + ausschließlich als Komödien oder Tragödien bestimmte Dramen enthielten. Dieser + Arbeitsschritt ermöglicht die Identifikation von je 130 Begriffen, die für + Komödien oder Tragödien diskriminativ sind. Das Ergebnis zeigt, dass die + explorierten Verfahren Tragödien mit größerer Treffsicherheit identifizieren + als Komödien, was darauf hindeutet, dass Tragödien mehr distinktive Merkmale + aufweisen. Es zeigt sich aber auch, dass es angemessener ist, Klassifikationen + wie ›Tragödie‹ und ›Komödie‹ als Pole zu denken, zwischen denen graduelle + Unterschiede bestehen und in deren Übergangsbereich <hi rend="italic" + >comedias nuevas</hi> enthalten sind, die in der Forschung als <term + type="dh">tragicomedias</term> oder <term type="dh">comedias + mitológicas</term> bezeichnet wurden.</p> + </argument> + </div> + <div type="chapter"> + <head>1. Preface</head> + + <p>Pedro Calderón de la Barca (1600–1681) counts, along with Félix Lope de Vega + Carpio (1562–1635), as one of the most important playwrights of the Spanish + baroque, also known as the ›Golden Age‹ (<term type="dh">siglo de oro</term>). His extraordinary and immense productivity rested on his unusual dexterity in re-using trivial plots, vocabulary symptomatic of any genre, and character types. However, Calderón’s uniqueness arises from his ability to juxtapose the God-given balance of power with civil law and morality, and to present this opposition as an irresolvable, tragic conflict or an entertaining comedy. His + works include 84 Corpus Christi plays (<term type="dh">autos + sacramentales</term>), 112 <term type="dh">comedias</term> and 41 short pieces + (<term type="dh">bailes, entreméses, jácaras, mojigangas</term>; contemporary + terms also used by Calderón himself). A nearly complete collection of his works + first appeared in the early 20<hi rend="super">th</hi> century from the + Madrid-based publisher Aguilar.<note type="footnote"> <ref type="bibliography" target="#calderon_obras_1951">Calderón de la Barca + 1951–1956</ref>. This publication, however, does not conform to the standards of a + historico-critical edition.</note> Those of his <hi rend="italic">comedias</hi> + which had been published during his lifetime specified the dramas with terms such + as <term type="dh">gran comedia</term> or <term type="dh">comedia famosa</term>. + However, these descriptions did not differentiate between comedies and tragedies. + This was in keeping with the use of language during the Golden Age, as the term + ›comedia‹ was interchangeable with ›play‹ or ›theater piece‹: + <quote>Though the etymology of comedia is simple enough – a play of high + spirits and laughter with a happy ending, – in Early Modern Spain the term + comedia meant ›a play‹ or ›work for the stage‹ in a quite neutral + sense.</quote><note type="footnote"> <ref type="bibliography" target="#sullivan_drama_2018">Sullivan 2018</ref>, p. 33.</note> Because Calderón had never + written any poetics himself, Lope de Vega’s programmatical work <bibl> + <title type="desc">Arte nuevo de hacer comedias en este tiempo</title> + </bibl> + <note type="footnote"> <ref type="bibliography" target="#vega_arte_2010">Lope de Vega 1621</ref>.</note> from 1609 is considered to be a + contemporary reference by whose pragmatic rules Calderón generally oriented + himself, despite some slight modifications. Here, Lope de Vega defines the <term + type="dh">comedia nueva</term> as a play in three acts, and distinguishes the + comedy as a fictional drama involving everyday people, from the tragedy as + pertaining to members of the royal family or people of high descendance and being + based on historic events. Furthermore, Lope characterizes the <hi rend="italic" + >comedia nueva</hi> as a mixture of comedic and tragic elements, thus referring + to the combination of both dramatic genres.<note type="footnote"> This may be + considered a reference to a third genre, which has received little attention up + to now in research. Cf. here <ref type="bibliography" target="#couderc_theatre_2012">Couderc 2012</ref>, pp. 65–75 and 102–109.</note> + Thus, the Spanish playwrights of the 17<hi rend="super">th</hi> century had at + their disposal a central poetological reference, which – superseding Aristotelian + poetics – defined the ›Spanish style‹ as an original idea applying not only to + comedy, but also to tragedy.</p> + <p>After a phase of degradation as being ›irregular‹ according to the doctrines of + French classicism, the historical reception of the Spanish <hi rend="italic" + >comedia nueva</hi> – and especially its understanding of tragedies – became + vitally influenced through the German Enlightenment, the Romantic period and + Idealism. Gotthold Ephraim Lessing (1729–1781) was one of the first in the + German-speaking regions to recognize Calderón’s work. He focused intensely on the + tragedies of the Spanish Golden Age and implemented his theoretical aspirations on + a practical level in a newly founded genre of the middle-class tragic drama. He + was later followed by the Romantics Ludwig Tieck, August Wilhelm and Friedrich + Schlegel, the brothers Grimm and Alexander and Wilhelm von Humboldt, who had all + studied Spanish in Göttingen.<note type="footnote"> Comprehensively in detail + <ref type="bibliography" target="#sullivan_landen_2017">Sullivan 2017</ref>.</note> August Wilhelm Schlegel translated five of Calderón’s + plays for his <bibl> + <title type="desc">Spanisches Theater</title> + </bibl> (Vol. I: 1803, Vol. II: 1809) and examined Calderón in great detail in his <bibl> + <title type="desc">Vorlesungen über dramatische Kunst und Literatur</title> + </bibl> (Lectures on Dramatic Arts and Literature) in Vienna (1809). Wilhelm + Joseph Schelling developed his own theory of tragedies in his presentation <bibl> + <title type="desc">Abhandlung über die Tragödie</title> + </bibl> (Essay on Tragedy) based on Calderón’s work. Even Hegel and Schopenhauer + grappled with the subject of Calderón, and thus it is no wonder that Walter + Benjamin keeps returning to Calderón and his notion of the tragedy again and again + in his <bibl> + <title type="desc">Ursprung des deutschen Trauerspiels</title> + </bibl> (Origin of the German Tragedy).<note type="footnote"> <ref type="bibliography" target="#benjamin_ursprung_1978">Benjamin + 1978</ref>.</note> + </p> + <p>While the interest in the German-speaking regions lay mostly on Calderón’s + tragedies and was, therefore, focused on only a few plays, it was first in the + mid-20<hi rend="super">th</hi> century when serious attempts were made at + examining and classifying the entire body of Calderónian <hi rend="italic" + >comedias nuevas. </hi>It was initially the publishers of Calderón’s <bibl> + <title type="desc">Obras completas</title> + </bibl>, who, in 1951, undertook a binary division of these theater pieces into + <term type="dh">dramas</term> and <term type="dh">comedies</term> + <hi rend="italic">, </hi>thereby distinguishing between ›serious‹ relative to + those resembling tragedies and ›light‹ relative to entertainment-oriented dramas. + In this manner, the modern-day editors of the Aguilar publishing house quite + obviously approached the provided examples of Calderón’s <hi rend="italic" + >comedias</hi> according to the poetic traditions of Antiquity, which, since + the time of Aristotle, have been based on the clear separation of comedy and + tragedy; however the editors proceeded with insufficiently explicit criteria.<note + type="footnote"> Cf. here the introduction <ref type="bibliography" target="#calderon_obras_1951"> + Calderón de la Barca 1951</ref>, pp. 9–34.</note> At the same time, they posed a + pivotal question with this differentiation, which has been heatedly discussed with + opposing positions in the literary research of Calderon’s work from the second + half of the 20<hi rend="super">th</hi> century to the present day. The British + Calderón school (Alexander A. Parker, Bruce Wardropper, Anthony Irving Watson, + Henry W. Sullivan among others) was intensely occupied with Calderónian tragedies. + Their attempts at classification were subjected to a rigorously methodical + critique at the beginning of this millennium by the Spanish researcher Jésus G. + Maestro, who commented, not without sarcasm, on the ›impotence of literary theory‹ + regarding the dramatic genres and the ever-changing attributions accompanying + them.<note type="footnote"> Cf. <ref type="bibliography" target="#maestro_limites_2003">Maestro 2003</ref> and also the discussion by + <ref type="bibliography" target="#arellano_dramaticos_2018">Arellano 2018</ref> on the limits of compiling taxonomies.</note> Now it was left to + the British researcher Henry W. Sullivan to identify, from a qualitative + perspective, twelve criteria according to which the tragic drama of the <hi + rend="italic">siglo de oro</hi> can be characterized. In doing so, Sullivan + focused mainly on thematic traits (father-son conflicts, revenge and honor-based + dramas), extra-literary indications (persons of high social standing),<note + type="footnote"> Usually, the high social standing is explicitly indicated in + the list of <hi rend="italic">dramatis personae</hi> of Calderón’s works, such + as <quote>emperador</quote>, <quote>rey</quote>, <quote>reina</quote>, + <quote>don</quote>, <quote>doña</quote>, <quote>infanta</quote> or + <quote>infante</quote> (emperor, king, queen, esquire, lady, infanta or + infante).</note> characteristics of the plot (unfair judgements or death of the + protagonist), or attributes of reception (creation of <term type="dh">eleos</term> + and <term type="dh">pathos</term> or cathartic endings). He also formulated + exclusionary criteria like the prevalence of themes such as redemption and + damnation, and he also excluded martyr dramas, thus defining tragedies + narrowly.<note type="footnote"> <ref type="bibliography" target="#sullivan_drama_2018">Sullivan 2018</ref>, pp. 362–364.</note> Within the + framework of these criteria, Sullivan was able to identify at least 14 tragedies + in the complete works of Calderónian <hi rend="italic">comedias nuevas.</hi> + </p> + <p>In light of the monumental works of Calderón it is, on the one hand, not + surprising that the classification of the <hi rend="italic">comedias nuevas</hi> – + aside from the Aguilar edition – was never carried out comprehensively:<note + type="footnote"> An attempt at this is being made by the portal <ref + target="http://calderondigital.tespasiglodeoro.it/">Calderón Digital</ref>, + by which around 80 of Calderón’s written texts can be filtered according to + genre characteristics; the researchers responsible for these classifications + are also indicated.</note> Which researcher is prepared to study and classify + 112 dramas? At the same time, it is evident that just this sort of written work is + suitable for the implementation of computational procedures. On the other hand, it + must be understood that a data-based, computational classification of the entire + body of the <hi rend="italic">comedias</hi> has been rendered impossible until + spring 2022, when all of them were made available in an electronic form.<note + type="footnote"> The full collection is available in TEI-XML at <ref + target="https://dracor.org/cal">DraCor</ref>. Not only the 110 <hi + rend="italic">comedias nuevas</hi> listed in the Aguilar edition were made + available, but also two further <hi rend="italic">comedias</hi> attributed to + Calderón, namely <bibl> + <title type="desc">La selva confusa</title> + </bibl> and <bibl> + <title type="desc">Cómo se comunican dos estrellas contrarias</title> + </bibl>. For the discussion of this attribution, see <ref type="bibliography" target="#coenen_selva_2016">Coenen 2016</ref>. The authors + of this study are very thankful to Dr. Simon Kroll and his team at the + University of Vienna for the contribution of more than 50 dramas to this + corpus.</note> Hence, Calderón’s works – with the exception of only a few + studies – have also not yet been analyzed with any methods provided by the <term + type="dh">digital humanities</term> + <hi rend="italic">, </hi>although such a massive corpus quite obviously lends + itself to the examination of structural similarities among works in a particular + genre or differences between dramas of varying genres.<note type="footnote"> For + example, <ref type="bibliography" target="#pena_teatro_2011">Peña-Pimentel 2011</ref>; + <ref type="bibliography" target="#pena_aplicacion_2012">Peña-Pimentel 2012</ref>; + <ref type="bibliography" target="#rosa_role_2018">de la Rosa et al. 2018</ref>; + <ref type="bibliography" target="#ehrlicher_poetica_2020">Ehrlicher et al. + 2020</ref>.</note> Calderón’s work stands out as a rare case in that such a large + body of theater pieces was written by one author within a relatively short period + during the 17<hi rend="super">th</hi> century.</p> + <p>The study at hand<note type="footnote"> This study arose as a part of the project <bibl><title type="desc">QUOTE. Comprehensive Modeling of Conversational Contributions in Prose Texts</title></bibl>, sponsored by the German Research Community (Deutsche + Forschungsgemeinschaft, project No. 350397899). The authors thank Prof. Dr. + Hanno Ehrlicher (University of Tübingen), who commented on the first version of + the article.</note> represents an attempt, based on at least 112 <hi + rend="italic">comedias,</hi> to critically assess the validity of the + distinction between the comedy and the tragedy among these dramas. This goes hand + in hand with assessing the methodical possibilities made available by the digital humanities’ application of <term type="dh" + >distributional semantics </term>procedures for this problem.<note + type="footnote"> Comparable studies on classical French drama have been thus + far presented by, for instance, <ref type="bibliography" target="#schoech_exploration_2017">Schöch 2017</ref> and + <ref type="bibliography" target="#schoech_tools_2013">Schöch 2013</ref>, who approached + the subject with <term type="dh">topic modeling</term> and stylometric methods. For stylometric analysis of + dramas in the <hi rend="italic">siglo de oro</hi> cf. in particular <ref type="bibliography" target="#campion_original_2021">Campión + Larumbel / Cuéllar 2021</ref> and + <ref type="bibliography" target="#cuellar_stylometry_2022">Cuéllar 2022</ref>.</note> Because + thus far only a small portion of the Calderónian <hi rend="italic">comedias</hi> + have been studied, and the majority of them remain entirely unexplored, we expect + that the proven methods can deliver important indications for the classification + of the plays which have yet to be thoroughly analyzed.</p> + </div> + <div type="chapter"> + <head>2. Methodology</head> + + <div type="subchapter"> + <head>2.1 Methodical Basis</head> + + <p>Nowadays, the concept of distributional semantics is used widely in the realm + of computational linguistics. The basic assumption is that the meaning of a + word is established according to how much it is used and how often it co-occurs + with other words within a specific context. Words and documents are represented + in a high-dimensional space; semantic relationships are inferred from the + similarities within that space. For the representation of documents, the + frequencies (absolute or relative) of the words in each document are stored as + matrices of vectors where each word corresponds to a column of the matrix and + every document to a row. The cells of the matrix contain co-occurrence + frequencies; pure frequencies are often replaced through degrees of statistical + association, such as <term type="dh">pointwise mutual information</term> or + <term type="dh">tf-idf</term> (<term type="dh">term frequency–inverse document frequency</term>), in order to counteract the Zipf distribution of + words.<note type="footnote"> Cf. <ref type="bibliography" target="#lowe_theory_2001">Lowe 2001</ref> for details.</note> To + represent the meanings of words, the same kind of matrix is created, with the + target terms forming rows and contextual words forming columns. Such matrices + can serve to compute the distances between single words or texts, to compare + them to each other, to cluster them into groups, and to visualize them. As a + rule, these very large matrices contain thousands of columns and are sparse, i. + e. most of their elements are zero. This calls for reduction to a much smaller + number of dimensions in order to be appropriate for the computation of distance + or similarity matrices. The resulting low dimensional vectors are often + referred to as <term type="dh">word</term> or <term type="dh">document + embeddings</term> and are probably the most common practice for semantic + representation in natural language processing (NLP). They are related to, but + not identical to topic models. The reduction of dimensions is a purely + technical requirement and hardly alters the underlying intention.<note + type="footnote"> A short introduction is given in <ref type="bibliography" target="#jockers_macroanalysis_2013">Jockers 2013</ref>, pp. + 63–67.</note> + </p> + <p>The choice of a distributional approach for the task at hand is based on our + starting assumption, namely the hypothesis that comedies and tragedies – in + accordance with the treatment of each of the different themes – can be + differentiated by observing word choice and word usage. Simply put, it can be + expected that in Calderónian tragedies, terms such as ›honor‹, ›power‹ and + ›death‹ strongly co-occur, while the comedies tend to combine words like + ›love‹, ›disguise‹ and ›jealousy‹. This is quite obviously an approach that + represents an oversimplification – narrative patterns or plot structures, + however, cannot be characterized in this manner. At the same time, the wide + success of approaches based on frequency and co-occurrence of words and common + methods for author recognition demonstrates that such analyses allow for + surprisingly deep understandings even of literary texts.</p> + </div> + <div type="subchapter"> + <head>2.2 Data Basis</head> + + <p>Beginning with the fourteen tragedies identified by Sullivan, yet another was added + to the examined texts, which had apparently remained unknown to him: <bibl><title type="desc">Saber del bien y del mal.</title></bibl><note type="footnote"> Cf. recently to this identification <ref type="bibliography" target="#escudero_amor_2021">Escudero Baztán + 2021</ref>, p. 21.</note> 15 further dramas, which were identified by qualitative + research as comedies and which are often called <term type="dh">comedias + cómicas</term> (or <term type="dh">urbanas</term> or <term type="dh" + >palatinas</term>),<note type="footnote"> See for the most recent overview + of this classification <ref type="bibliography" target="#kroll_sonido_2022">Kroll 2022</ref>, pp. 63–65. Cf. also + <ref type="bibliography" target="#calderon_obras_1951">Calderón de la + Barca 1951</ref>; + <ref type="bibliography" target="#escudero_amor_2021">Escudero Baztán 2021</ref>; + <ref type="bibliography" target="#ehrlicher_einfuehrung_2012">Ehrlicher 2012</ref>; + <ref type="bibliography" target="#maestro_limites_2003">Maestro 2003</ref>; + <ref type="bibliography" target="#parker_mind_1988">Parker 1988</ref>; + <ref type="bibliography" target="#pena_teatro_2011">Peña-Pimentel 2011</ref>; + <ref type="bibliography" target="#tobar_rotonda_2000">Tobar 2000</ref>; + <ref type="bibliography" target="#prat_historia_1950">Valbuena Prat 1950</ref>.</note> make up the + counterpart to the tragedies in this body of work. The other 82 Calderónian <hi + rend="italic">comedias</hi> are available as full digital texts in + modernized and normalized Spanish.<note type="footnote"> For the most part, + these dramas are available under the portal: <ref + target="http://www.cervantesvirtual.com/">Cervantes Virtual</ref> and + the <ref target="http://www.comedias.org/">Association for Hispanic + Classical Theater</ref>. A current overview of all sources can be found + at: <ref target="http://etso.es/">EstilometrÃa aplicada al Teatro del Siglo + de Oro</ref>. Because diacritical symbols used in modern Spanish can be + used according to context, the spelling of certain terms may vary (ex.: solo + / sólo – solo as an adjective means ›sole‹ or ›alone‹, whereas sólo as an + adverb means ›barely‹ or ›merely‹).</note> The spoken texts of the <term + type="dh">dramatis personae</term> were extracted from all 112 plays and + collected for analysis; stage instructions or similar additional texts were not + included. The 15 tragedies were each marked with a T and a consecutive number, + the comedies with a C, and the remaining 82 plays were marked + <quote>Test</quote> and also numbered.<note type="footnote"> See the + appendix below in which this abbreviation was removed and the results of the + applied methods are presented.</note> + </p> + </div> + <div type="subchapter"> + <head>2.3 Research Goal</head> + + <p>In the absence of suitably large bodies of dramatic works beyond the + Spanish-language world, the classification of genre with word or document embeddings is still relatively + new.<note type="footnote"> One exception is the study by <ref type="bibliography" target="#willand_2017">Willand / Reiter + 2017</ref>, cf. here pp. 190–194.</note> Thus, the goal of our study is to + explore various methods and combinations thereof, and to compare the results. + We will compare four approaches, which all follow the same general unobserved + schemes: 1) pre-filtering of the vocabulary; 2) calculation of document embeddings, and, if applicable, dimension reduction; 3) clustering of embeddings; 4) visualization und evaluation. Our corpus provides us with an excellent basis, + as the categories are known in about a quarter of the plays, but not in the + remaining dramas. In this manner, we can simultaneously review the quality of + the process (on the basis of the known categories) and obtain findings on the + yet unclassified dramas. We find this type of methodical comparison to be + important, because it is known that the findings from unobserved distributional + methods depend heavily on the parametrization of the process.<note + type="footnote"> <ref type="bibliography" target="#turney_frequency_2010">Turney / Pantel 2010</ref>; + <ref type="bibliography" target="#bullinaria_representation_2007">Bullinaria / Levy 2007</ref>.</note> + </p> + </div> + <div type="subchapter"> + <head>2.4 Practical Application</head> + + <p>All analyses were implemented with the statistics software R. The + pre-processing of the texts was mostly carried out using the R package + quanteda, as it also enables the exclusion of Spanish stop words, punctuation + and numbers, and the conversion of the prepared corpus of texts to be processed + in other packages. As was revealed in the course of exploration, only a small + number (viz., 308) of Spanish stop words were retained in the quanteda package. + One exploration showed that the exclusion of function words from the matrices + did not lead to significantly different results, thus the stop word list was + considerably expanded manually.<note type="footnote"> These word lists are + documented in the R code, which was published together with the body of + dramas on <ref target="https://doi.org/10.5281/zenodo.6669603">Zenodo</ref>. + Cf. <ref type="bibliography" target="#lehmann_classifikation_2022">Lehmann 2022</ref>.</note> Furthermore, the analysis of the different + methods employed, in particular the tf-idf statistics, showed that the grouping + results were quite negatively affected by names of characters, places, and + countries within the texts, also in their adjectivized form, as these elements + of speech tend to reflect idiosyncrasies of single pieces rather than + stereotypical genre characteristics. These proper names were likewise – + primarily through the list of dramatis personae<hi rend="italic"> – + </hi>compiled and removed from the texts; the number of terms to be excluded + from the corpus thus rose above 800, additionally to the 308 stop words + contained in the quanteda package. As a rule, the frequency of the words in + each drama was calculated, subsequently the frequencies were normalized per + document. This took place wherever the distance and similarity matrices for + grouping were generated. When calculating the similarity between documents + using cosine similarity this could be omitted, because they remain constant in + relation to the vector lengths. Consistently throughout the analyses, work was + done with inflected or conjugated forms of words; a lemmatization or a stemming + of these words was not carried out. In this way linguistic information that + might help in the classification of literary genres (and with respect to style, + authorial signals or diachronic positionality) was preserved.</p> + </div> + </div> + <div type="chapter"> + <head>3. Results</head> + + <div type="subchapter"> + <head>3.1 Experiment 0</head> + + <p>In a first exploration, we applied a well-established method, Skip-gram,<note + type="footnote"> <ref type="bibliography" target="#mikolov_representations_2013">Mikolov et al. 2013</ref>.</note> to the body of text in order to + assess whether <hi rend="italic">word embeddings</hi> could tell us something + interesting about the text and which word pairs within the entire body of 112 + dramas exhibited the highest number of similarities. We reduced the matrix to + the 1,000 terms with the highest log-likelihoods and calculated the cosine + similarity between all pairs of vectors. Cosine similarity, or more precisely, + the cosine of the angle between two vectors, is a widely used measure of + similarity which determines to what extent two vectors ›point‹ in the same + direction in the high dimensional space. Cosine ranges between 0 and 1, and a + high cosine indicates that two terms are found in similar contexts.</p> + <p>Word pairings with a very high cosine similarity value of more than 0.75 are, + for instance, <quote>cielo</quote> and <quote>muerte</quote> (heaven, death), + <quote>esperanza</quote> and <quote>desdichas</quote> (hope, despair), + <quote>poder</quote> and <quote>temor</quote> (power, fear), + <quote>poder</quote> and <quote>gusto</quote> (power, taste), + <quote>honor</quote> and <quote>alma</quote> (honor, soul) or + <quote>alma</quote> and <quote>muerte</quote> (soul, death). One of the + highest cosine similarity values, at 0.96, showed that the word pairing + <quote>honor</quote> and <quote>muerte</quote> – honor and death – can be + determined as a major theme throughout the entire body of work. Indeed, these + first results proved to be surprisingly clear, in that, by using the Skip-gram + algorithm, central themes in the Calderónian <hi rend="italic">comedias</hi> + could be identified, even when they deal with the intersection of social + conventions (honor) and individuality (taste, soul, fear, social or actual + death).</p> + <p>Conversely, word pairings like <quote>honor</quote> and <quote>poder</quote> + (honor, power; 0.58), <quote>amores</quote> and <quote>agravios</quote> + (love, infidelity, each in plural form; 0.69), <quote>gracia</quote> and + <quote>corte</quote> (grace, court; 0.63) or <quote>gracia</quote> and + <quote>culpa</quote> (grace, guilt; 0.60) showed lesser cosine similarity + values. Cosine similarity values under 0.5 exhibit only weakly developed + commonalities in the contexts; this could be observed for the word pairings + <quote>amar</quote> and <quote>honra</quote> (loving, reputation), + <quote>muere</quote> and <quote>sepulcro</quote> (he / she / it dies, + grave), <quote>muerte</quote> and <quote>engaño</quote> (death, deceit), + <quote>mueran</quote> and <quote>suerte</quote> (they may die, fate), + <quote>amores</quote> and <quote>honra</quote> (love, reputation) and also + <quote>mentira</quote> and <quote>gracia</quote> (lie, grace). First and + foremost, it is apparent that the central themes in Calderón’s works + (<quote>Amor, honor y poder</quote><note type="footnote"> Cf. <ref type="bibliography" target="#escudero_amor_2021">Escudero Baztán 2021</ref>.</note> – love, honor, and + power) do not necessarily have to be interconnected with one another. This can + be attributed to the fact that comedies and tragedies can be distinguished from + each other through differing combinations of these terms. It is to be expected + that the combination <quote>honor</quote> and <quote>poder</quote> is more + characteristic of tragedies, and the combination <quote>amar</quote> and + <quote>honra</quote> is more characteristic for comedies, but not for the + entire body of work. We will come back to this point later.</p> + </div> + <div type="subchapter"> + <head>3.2 Experiment 1</head> + + <p>With the first experiment, our goal was to be able to explore the validity of + the <hi rend="italic">document embeddings</hi>. We take advantage of the known + (or: labeled) tragedies and comedies to evaluate our document clusterings as + follows, in the spirit of cluster purity<note type="footnote"> <ref type="bibliography" target="#manning_introduction_2008">Manning et al. + 2008</ref>.</note> analysis: we assign each cluster to the class that the majority + of documents with known affiliation belongs to. We then consider the other + known classes of documents in this cluster, and compute purity, that is, the + degree of agreement between these classes and the majority class, as a measure + of success of our clustering. Our setup has the additional aspect that our data + set includes documents for which the ›true‹ class is unknown. Since purity only + considers documents with known classes, this makes the measure hard to + interpret for clusters that consist predominantly or entirely of such + documents. For such clusters — which we call underdetermined — we refrain from + discussing purity in detail. After carrying out the preprocessing steps + described above, we explored the following four methods: 1) Reduction of the + matrix through the deletion of words according to their frequency and + appearance within the texts; calculating the distance matrix according to + relative frequencies, clustering with the Ward.D2 algorithm<note + type="footnote"> <ref type="bibliography" target="#ward_function_1963">Ward 1963</ref>.</note> based on the Euclidian distance. 2) + Reduction of the matrix through the deletion of <term type="dh">sparse + terms</term> which only appear in a few documents, calculation of the + distance matrix based on relative frequencies, clustering based on the + Euclidian distance with the Ward.D2 distance algorithm. 3) Part-of-speech + tagging in each of the dramas, extraction of verbs, nouns and adjectives, + calculation of the cosine similarity values between the documents, calculation + of the distance matrix, clustering with the Ward.D2 distance algorithm. 4) + Calculation of the tf-idf statistics, calculation of the cosine similarity + values between the documents, calculation of the distance matrix and clustering + with the Ward.D2 distance algorithm. We discuss the results of each method.</p> + <p>The first method represented a conservative approach: only the 1,094 words with + a frequency > 120 and appearing in at least half of the documents were + included. The document word matrix was filled with mere frequencies; no + dimension reduction was carried out. The grouping was carried out through a + clustering with the Ward.D2 distance algorithm. <ref type="graphic" + target="#klassifikation_2022_001">Figure 1</ref> shows the resulting dendrogram. + Recall that among the documents that form the leaf nodes of the dendrogram, + some are known as comedies (CXX), some as tragedies (TXX), but most are unknown + regarding their status (<quote>Test</quote>).</p> + <figure> + <graphic xml:id="klassifikation_2022_001" + url=".../medien/klassifikation_2022_001.png"> + <desc> + <ref type="graphic" target="#abb1">Fig. 1</ref>: Ward.D2 clustering of + 112 Calderónian Comedias. [Lehmann 2022] <ref type="graphic" + target="#klassifikation_2022_001"/> + </desc> + </graphic> + </figure> + <p>Read from left to right, the first cluster represents a pure tragedy cluster + which includes 29 dramas; 10 of these had already been characterized as + tragedies. The third cluster from the left side depicts a pure comedy cluster; + here 22 dramas are included, of which 10 had already been classified as + tragedies. The two additional clusters must be described either as undefined or + mixed clusters, as they either contain only 1 comedy (second cluster from the + left, comprising 39 dramas) and therefore cannot be described as pure, or 4 comedies and 5 tragedies (the cluster to the right, comprising 22 dramas). + Together, these two clusters contain more than half of the plays, namely 61 + works. We conclude that with regard to the main research question, this + approach does not appear to be especially effective, as only 20 of the 30 + previously marked dramas (or 67%) were assigned in a clear fashion, while the + remaining 10 comedies and tragedies mutually appeared in the clusters. + However, the still relatively high dimensionality of the <hi rend="italic" + >document embeddings</hi> makes a failure analysis challenging.</p> + <p>The goal of the second process is to create a low dimensional representation + that is easier to interpret, in order to gain more insight into the + distribution of the two genres. First, only terms which appear in at least 80% + of all of the documents (i. e. in at least 90 plays) are retained; in other + words, the sparsity is limited to 20%. This reduces the number of terms to a + more compact total of 496. Again, a frequency-based word-document matrix is + established and normalized, whereby the frequency of each of the remaining + terms in each drama is divided by the sum of frequencies of <hi rend="italic" + >all</hi> the words in the text. Finally, a distance matrix is established, + based upon the Euclidian distance, and again, clustering is conducted using the + Ward.D2 distance algorithm. </p> + <figure> + <graphic xml:id="klassifikation_2022_002" + url=".../medien/klassifikation_2022_002.png"> + <desc> + <ref type="graphic" target="#abb2">Fig. 2</ref>: Ward.D2 clustering of + 112 Calderónian Comedias. Euclidian distance on the basis of a sparsity + of 20%. [Lehmann 2022] <ref type="graphic" + target="#klassifikation_2022_002"/> + </desc> + </graphic> + </figure> + <p>The dendrogram illustrates three clusters: In the first cluster to the left, + all 15 comedies and 16 further dramas appear. The cluster on the right contains + 14 tragedies and, likewise, 33 dramas of unknown classification. The cluster in + the middle is mixed; it contains 1 tragedy (T4: <bibl> + <title type="desc">El mayor monstruo del mundo</title> + </bibl>) and 33 additional dramas of unknown classification. Through this + process, which only deals with 496 words, 29 of 30 classified dramas, or 97%, + were correctly assigned.<note type="footnote"> Basically, we attempted to alter + only one parameter between each of the analyses, thus using the Euclidian + distance. As an alternative, during the second procedure, we also used the + Manhattan distance, whereby the distance is defined by the sum of absolute + values. The results were clearly less satisfactory than the above + representations resulting from the use of the Euclidian distance: Only two + thirds (67%) of all previously identified tragedies and comedies were + correctly clustered.</note> + </p> + <p>Both of these automatic procedures, in which the fundamental matrices are + reduced on the basis of word frequencies, establish a transitional zone between + tragedy and comedy. This observation presents us with the question of whether + it would be more appropriate, in light of distributional semantics, to consider + classifications like ›tragedy‹ and ›comedy‹ as poles between which gradual + differences appear, showing the resulting overlap in regard to the applied word + selection. In the matter of Calderónian dramas, this seems quite sensible, as + themes such as ›honor‹ and ›power‹ can just as well be included in comedic + plots as in those of the famous honor tragedies. </p> + <p>Comedies may also present serious subjects in a lighthearted, entertaining + manner. For example, power struggles between royal families can be indirectly + alluded to within the framework of a mythological play; the allegory would have + been quite understandable for the court audience at the time.<note + type="footnote"> This possibility was already mentioned by <ref type="bibliography" target="#greer_power_1988">Greer 1988</ref> in an + example from <bibl> + <title type="desc">Fieras afemina amor</title> + </bibl>.</note> + </p> + <p>One possible fundamental critique on simple <hi rend="italic">document + embedding </hi>methods, like those we have observed thus far, is the total + absence of linguistic structure. For this reason, we made the decision to + subject all of the dramas to <term type="dh">part-of-speech tagging</term> + <hi rend="italic">,</hi> including only verbs, nouns and adjectives from each + play in the corpus for clustering.<note type="footnote"> This kind of method + was used by <ref type="bibliography" target="#willand_2017">Willand / Reiter 2017</ref>, pp. 191f.</note> For testing the third + procedure, therefore, a second corpus is established, in which each of the + drama texts include only verbs, nouns and adjectives in their basic forms. All + proper names are once more filtered out of the matrix created for this purpose + – they had been falsely recognized as adjectives – and subsequently a + calculation is made, based on the non-normalized frequencies of the cosine + similarities. This similarity matrix is converted to a distance matrix and, + once again, clustered with the Ward.D2 algorithm. The results are depicted in a + dendrogram.</p> + <figure> + <graphic xml:id="klassifikation_2022_003" + url=".../medien/klassifikation_2022_003.png"> + <desc> + <ref type="graphic" target="#abb3">Fig. 3</ref>: Ward.D2 clustering of + 112 Calderónian Comedias. Cosine similarity based on verbs, nouns and + adjectives. [Lehmann 2022] <ref type="graphic" + target="#klassifikation_2022_003"/> + </desc> + </graphic> + </figure> + <p>The first cluster to the left, which might be identified as a comedy cluster, + contains 14 comedies, 5 tragedies (T1: <bibl> + <title type="desc">A secreto agravio, secreta venganza</title> + </bibl>; T2: <bibl> + <title type="desc">El alcalde de Zalamea</title> + </bibl>; T5: <bibl> + <title type="desc">El médico de su honra</title> + </bibl>;<note type="footnote"> This outcome is especially interesting, because, + according to <ref type="bibliography" target="#couderc_theatre_2012">Couderc 2012</ref>, p. 104<bibl>, + <title type="desc">A secreto agravio, secreta venganza</title> + </bibl> and <bibl> + <title type="desc">El médico de su honra</title> + </bibl> can be described as tragicomedies and <bibl> + <title type="desc">A secreto agravio, secreta venganza</title> + </bibl> is the only play by Calderón which uses the term + <quote>tragicomedia</quote> (tragicomedy) in the spoken text.</note> T6: <bibl> + <title type="desc">El pintor de su deshonra</title> + </bibl>; T13: <bibl> + <title type="desc">Las tres justicias en una</title> + </bibl>) and 18 additional plays of unknown classification. The cluster to the + right is mostly a tragedy cluster, because it contains 10 tragedies and 49 + additional plays, but also 1 comedy (C3: <bibl> + <title type="desc">El encanto sin encanto</title> + </bibl>). In the middle between these two categories is an undefined cluster, + containing 15 plays marked with <quote>Test</quote>. With regard to the plays + identified thus far as tragedies and comedies, 80% of these dramas were + correctly clustered; however, this result applies only if clusters are + identified by the majority of previously identified dramas.<note + type="footnote"> As an alternative, a normalized matrix was established and + a Ward.D2 clustering based on the Euclidian distance was carried out. The + results are clearer, since 4 tragedies as well as 14 comedies were + assigned to a non-mixed cluster. However, the remaining 11 tragedies and 1 comedy formed a mixed cluster, so that all in all only a purity of 60% + in the clustering was reached. See the R code in <ref type="bibliography" target="#lehmann_classifikation_2022">Lehmann 2022</ref>.</note> + </p> + <p>Taking into consideration the previously tested methods, it seems advisable to + focus on every term that carries meaning and thus leads to a differentiation + between the categories. The fourth method we tried was based on the tf-idf + statistics, thus underlying a measure of association commonly used in <term + type="dh">text mining</term>, whereby terms can be evaluated for their + significance within a document or body of work. With the tf-idf statistics, the + weight of each term per document is calculated; the <hi rend="italic">term + frequency</hi> (<hi rend="italic">tf</hi>) is multiplied by the <hi + rend="italic">inverse document frequency</hi> (<hi rend="italic">idf</hi>). + The latter depends not on individual documents, but rather on the total number + of all documents in the corpus. In this way, the tf-idf statistics considers + the relative significance of words which appear frequently in the corpus to + determine how relevant the term is for a document within the corpus under + study. Once more, the proper names are removed, the cosine similarity for the + vectors is calculated, the similarity matrix is converted into a distance + matrix and clustering is carried out with a Ward.D2 algorithm. The results are + depicted in a dendrogram.</p> + <figure> + <graphic xml:id="klassifikation_2022_004" + url=".../medien/klassifikation_2022_004.png"> + <desc> + <ref type="graphic" target="#abb4">Fig. 4</ref>: Ward.D2 clustering of + 112 Calderónian Comedias. Cosine similarity on the basis tf-idf values. + [Lehmann 2022] <ref type="graphic" target="#klassifikation_2022_004"/> + </desc> + </graphic> + </figure> + <p>This image shows three clusters: The first one to the left can best be + described as a comedy cluster. In addition to all of the 15 comedies, however, + it also contains 5 tragedies, exactly the same five ones as in the + part-of-speech based analysis conducted previously (T1, T2, T5, T6, T13), as + well as 20 other dramas. The cluster on the right, with 8 tragedies and 51 + further plays, can be considered a tragedy cluster. The smallest one in the + middle is hard to define, since it is only weakly determined and thus cannot be + understood as pure; it contains only 2 dramas clearly identified as tragedies + and eleven others of unknown classification. In comparison with the dramas + already identified as tragedies or comedies, this result shows that 8 of 15 + tragedies and, respectively, all comedies have been clustered correctly; this + correlates to a recognition rate of 76%.<note type="footnote"> Here, + alternatively, a Ward.D2 clustering was also carried out based on the + Euclidian distance. The result shows five clusters, three of which contain + four dramas labeled with <quote>Test</quote>. The remaining two clusters + consisted of one mixed cluster containing 15 comedies, 12 tragedies, and + 40 further plays; and another cluster containing 3 tragedies as well as + 38 further plays. These results confirm the unreliability of this approach + with respect to clustering.</note> Compared against the models considered + above, this recognition rate seems to be satisfactory. </p> + <p>The four methods explored here differ by the choice of data as well as by the + choice of distance or similarity metrics. Three of the four generated robust to + very good results. The process of employing the strongest matrix reduction + produced the best findings. However, only one approach yielded a clustering + result that would arguably approximate the classification of researchers + applying qualitative analyses.</p> + </div> + <div type="subchapter"> + <head>3.3 Experiment 2</head> + + <p>In a second experiment, we assess to what extent the document clusterings we + found in the first experiment were based on word choice or word use being + consistent with the two genres. To do so, we analyze the word lists upon which + the clusters found by the four methods were based. In addition, we calculate + the log-likelihood distribution over the vocabulary for the sets of (predicted) + comedies and tragedies of each method. This approach determines the 200 words + with the highest log-likelihood values for each genre, and these lists can be + compared across methods (contrastive vocabulary analysis with <hi rend="italic" + >word embeddings</hi>).</p> + <p>Recall that the first procedure in experiment 1 (Ward.D2 clustering based on + the Euclidian distance between normalized word frequencies) created a + clustering in which only the first and the fourth clusters could be clearly + assessed as comedy or, relatively, tragedy clusters. For both of these + clusters, the probability margin for each word is evaluated based on the + previously established matrix, and the 15 terms with the highest probability + margin for each were investigated. These 15 selected terms for both comedy and + tragedy clusters with the highest probability margins give an impression of the + cluster formation. For the comedy cluster, the terms <quote>don</quote>, + <quote>casa</quote>, <quote>calle</quote>, <quote>papel</quote>, + <quote>caballero</quote>, <quote>puerta</quote>, <quote>dama</quote>, + <quote>padre</quote>, <quote>hermano</quote>, <quote>saber</quote>, + <quote>cuarto</quote>, <quote>amigo</quote>, <quote>hombre</quote>, + <quote>sé</quote>, and <quote>señora</quote> (esquire, house, street, paper, + knight, door, lady, father, brother, knowledge, room, friend, man, I know, and + madam) appeared. Very interesting is the word <quote>papel</quote>, since it + points to the paper or card fanning the intrigue; however, beyond this term, + the word list does not seem to be significantly distinctive of comedies. By + contrast, for the tragedy cluster, the words <quote>rey</quote>, + <quote>muerte</quote>, <quote>dios</quote>, <quote>cielo</quote>, + <quote>hoy</quote>, <quote>vida</quote>, <quote>sol</quote>, + <quote>valor</quote>, <quote>mar</quote>, <quote>tierra</quote>, + <quote>gran</quote>, <quote>rigor</quote>, <quote>mundo</quote>, + <quote>quiero</quote>, and <quote>poder</quote> (king, death, God, heaven, + today, life, sun, value / valor, sea, earth, grand, severity, world, I want, + and power) were especially frequent. At any rate, people of high social + standing, death, God, valor and power stand out as being characteristic terms + relating to these storylines.</p> + <p>The 496 words selected for their sparsity of 20% enable a preview of terms + which carry a strong distinction with regard to the separation of comedies and + tragedies. For the comedy cluster, meaningful terms like <quote>don</quote>, + <quote>casa</quote>, <quote>dama</quote>, <quote>calle</quote>, + <quote>puerta</quote>, <quote>sé</quote>, <quote>señor</quote>, + <quote>caballero</quote>, <quote>bien</quote>, <quote>cuarto</quote>, + <quote>papel</quote>, <quote>señora</quote>, <quote>saber</quote>, + <quote>amigo</quote>, and <quote>celos</quote> (esquire, house, lady, + street, door, I know, lord, knight, good, room, paper, madam, knowledge, + friend, and zeal) are present. For the tragedy cluster, words like + <quote>rey</quote>, <quote>señor</quote>, <quote>dios</quote>, + <quote>hoy</quote>, <quote>muerte</quote>, <quote>cielo</quote>, + <quote>sol</quote>, <quote>quiero</quote>, <quote>rigor</quote>, + <quote>mundo</quote>, <quote>gran</quote>, <quote>valor</quote>, + <quote>alma</quote>, <quote>viento</quote>, and <quote>sangre</quote> (king, + lord, God, today, death, heaven, sun, I want, severity, world, grand, + value / valor, soul, wind, and blood) appear. At first glance, the high degree + of consistency of both lists of words from the first and second procedures may + come as a surprise. Then again, it appears that the high degree of purity in + the clustering of the second procedure quite obviously depends upon the + condensed and precise selection of distinct terms. </p> + <p>With regard to the third procedure – based upon a <hi rend="italic" + >part-of-speech </hi>tagged corpus – the most frequent words found in the + clusters in the underlying matrix illustrate why it does not lead to compelling + results: Not surprisingly, the most frequent words here are the verbs + <quote>ser</quote> (to be) and <quote>haber</quote> (to have), followed by a + list of much less frequent additional verbs, like <quote>ver</quote>, + <quote>decir</quote>, <quote>estar</quote>, <quote>dar</quote>, + <quote>poder</quote>, <quote>saber</quote>, <quote>hacer</quote>, + <quote>tener</quote>, <quote>ir</quote>, <quote>querer</quote>, + <quote>venir</quote> (seeing, saying, being, giving, being able, knowing, + doing, having, going, wanting, and coming). This is then followed by a list of + nouns, like <quote>señor</quote>, <quote>vida</quote>, <quote>cielo</quote> or + <quote>don</quote> (lord, life, heaven or esquire). In light of the fact + that these frequently used words seem to have little ability to distinguish + between comedies and tragedies, the results of the clustering can be described + as rather poor.</p> + <p>In the fourth procedure – based on the tf-idf matrix – an approach analogous to + methods 1 and 2 is applied. The 15 terms that show the highest probability + margin within the comedy cluster are: <quote>don</quote>, <quote>doña</quote>, + <quote>tapada</quote>, <quote>hermana</quote>, <quote>calle</quote>, + <quote>hermano</quote>, <quote>coche</quote>, <quote>amiga</quote>, + <quote>anoche</quote>, <quote>papel</quote>, <quote>cuarto</quote>, + <quote>aposento</quote>, <quote>reja</quote>, and <quote>casa</quote> + (esquire, lady, veil, sister, street, brother, carriage, friend, last night, + paper, room, chamber, grid, and house). In the tragedy cluster, terms such as + <quote>arma</quote>, <quote>dioses</quote>, <quote>cristianos</quote>, + <quote>templo</quote>, <quote>montes</quote>, <quote>cueva</quote>, + <quote>ciencias</quote>, <quote>cruz</quote>, <quote>muro</quote>, + <quote>reino</quote>, <quote>pastor</quote>, <quote>rey</quote>, + <quote>cristiano</quote>, <quote>cajas</quote>, and <quote>guerra</quote> + (arms, gods, Christians, temple, mountains, cave, sciences, cross, wall, + kingdom, pastor, king, Christian, crates, and war) are characteristic. While + the frequent terms selected for the comedy cluster seem, for the most part, to + be less discriminating, save for the typical allusions to veiling and masking + or intrigue through forgery, the terms relating to tragedy reflect, at least, + military and Christian themes as well as the aristocratic descent of the + protagonists.</p> + <p>An open question at this point is how robust these methods are. Thus, in the + next step, we test the word lists created in the steps above and base them on a + larger body of works. We extend our data basis to clusters, expand the body of + plays identified as comedies or tragedies and create two somewhat larger + subgroups. From the dramas hitherto marked as <quote>Test</quote>, we choose 16 + which were unanimously clustered as being ›tragedy‹ by all four procedures, as + well as ten which were unanimously clustered as ›comedy‹. For the comedies, we + corroborated this classification on the basis of secondary literature;<note + type="footnote"> Nearly all of these dramas fall in the category + <quote>Comedias cómicas</quote> described by <ref type="bibliography" target="#kroll_sonido_2022">Kroll 2022</ref>, pp. 64–65. + However, there are two exceptions: In contrast to Kroll’s estimation, who + puts <bibl> + <title type="desc">No hay cosa como callar</title> + </bibl> into the category <quote>Tragedias y dramas de honor</quote>, we + classify this drama as comedy, since all the four employed methods were in + agreement. By comparison, we dismissed <bibl> + <title type="desc">Las manos blancas no ofenden</title> + </bibl> from the list of comedies, since the estimation of <ref type="bibliography" target="#prat_historia_1950">Valbuena Prat + 1950</ref>, who counts this play amongst <quote>obras exclusivamente + cómicas</quote> (p. 541), was not corroborated by the procedures applied + by us.</note> moreover, all of these dramas were included in the collection + of comedies by the editors of the Aguilar edition. In this manner, we generate + two new subgroups, one for tragedies, containing 31 plays, and one for + comedies, containing 25 plays.<note type="footnote"> Cf. for a comparative + method <ref type="bibliography" target="#peirsman_identification_2010">Peirsman et al. 2010</ref>.</note> Both of these subgroups are converted + into matrices using the prevalent preprocessing techniques, whereby all of the + terms found in less than four of the plays are filtered out. For the remaining + words, the 200 most informative for each subgroup are identified for inclusion, + using the log-likelihood function, with which discriminative terms can be + found. The comparison of the results for each subgroup shows that only 70 terms + appear in both lists, while 130 terms for each (almost exactly two-thirds) are + discriminative for either the tragedy or the comedy subgroup.</p> + <p>The analysis of these 130 discriminative terms for each subgroup proves to be + very revealing. In the case of the comedies, we discover references to certain + themes (<quote>ama</quote>, <quote>amiga</quote>, <quote>carta</quote>, + <quote>celoso</quote>, <quote>desdichas</quote>, <quote>desengaño</quote>, + <quote>escondido</quote>, <quote>favor</quote>, <quote>joyas</quote>, + <quote>juego</quote>, <quote>máscara</quote>, <quote>papeles</quote>, + <quote>secreto</quote>, <quote>tapada</quote>, <quote>vestido</quote> – + mistress, girlfriend, letter, jealous, misfortune, disappointment, veiled, + favor, jewelry, game, mask, papers, secret, hidden / stashed, disguise), + typical indications relating to the mythological background of the comedies + (<quote>astrólogo</quote>, <quote>duende</quote>, <quote>forastero</quote>, + <quote>jardines</quote>, <quote>ninfas</quote> – astrologer, elf / gnome, + foreigner, gardens, nymphs) and also the appearance of some rather surprising + terms (like <quote>enemigo</quote>, <quote>pendencia</quote>, + <quote>razón</quote> or <quote>saber</quote> – enemy, brawl, reason or + knowledge).</p> + <p>By contrast, among the tragedies we find references to the (mostly high) + standing of the characters (<quote>convento</quote>, <quote>corona</quote>, + <quote>emperador</quote>, <quote>esclavo</quote>, <quote>infanta</quote>, + <quote>infante</quote>, <quote>majestad</quote>, <quote>reina</quote>, + <quote>reinar</quote>, <quote>reino</quote>, <quote>rey</quote>, + <quote>tirano</quote>, <quote>villano</quote> – cloister, crown, emperor, + slave, infant, infanta, highness, queen, ruling, kingdom, king, tyrant or + villain), the contents of the plot (<quote>cristo</quote>, <quote>cruz</quote>, + <quote>desdichado</quote>, <quote>divina</quote>, esperanza, + <quote>gloria</quote>, <quote>laurel</quote>, <quote>lealtad</quote>, + <quote>libertad</quote>, <quote>morir</quote>, <quote>poder</quote>, + <quote>salud</quote>, <quote>sangre</quote>, <quote>traÃción</quote>, + <quote>triste</quote>, <quote>triunfo</quote>, <quote>venganza</quote>, + <quote>victoria</quote> – Christ, cross, misery, divine, hope, fame, laurel, + devotion, freedom, dying, power, health, blood, treason, sad, triumph or + revenge, victory) and a few surprises as well (<quote>ciencias</quote>, + <quote>enamorado</quote>, <quote>sueño</quote> – sciences, enamored or + dream). Altogether, the word lists determined log-likelihoods in the two + subgroups outline the contents of the comedies and tragedies much more + precisely than the word lists based on each cluster.</p> + </div> + <div type="subchapter"> + <head>3.4 Experiment 3</head> + + <p>In our final experiment, we move beyond the analysis of documents in terms of + words, as in experiment 2, to an analysis of the usage of individual words + across the two genres. For this purpose, we used the embedding method + fastText<note type="footnote"> <ref type="bibliography" target="#bojanowski_word_2017">Bojanowski et al. 2017</ref>.</note> and the R + package of the same name. In each subgroup, the ten nearest neighbor terms of + interest are established, so that each word which was identified as pertaining + to both genres is visible, along with the terms found closest to it within the + text. In contrast to Skip-gram, fastText is more appropriate for smaller bodies + of text, as it does not compute an <hi rend="italic">embedding </hi>for each + word. Instead, <hi rend="italic">embeddings</hi> for parts of words are + calculated (for instance, for <quote>honor</quote>: <quote>hon</quote>, + <quote>ono</quote>, <quote>nor</quote>, etc.) and accumulated to create an + <hi rend="italic">embedding</hi> for the whole word. In this way, more + robust representations emerge for rarely used or unknown words.<note + type="footnote"> <ref type="bibliography" target="#papay_scenarios_2018">Papay et al. 2018</ref>.</note> + </p> + <p>In order to contrast the terms in each subgroup, we will illustrate in the + following the ten nearest neighbor terms per subgroup together with the + similarities for each, whereby the maximum possible similarity is represented + by the number 1.</p> + <p>The keyword <quote>honor</quote>, which is found not only in comedies, but also + in tragedies, when assessed within the comedy subgroup, shows no common + neighboring terms in the tragedy subgroup, nor were they found for the word + <quote>hado</quote> (fate). In other words, both terms are used in comedies + and tragedies, but within completely different contexts according to each. It + becomes apparent that the terms ›honor‹ and ›fate‹ appearing in tragedies are + more clearly outlined within the context and the meaning of the terms more + precisely defined. For example, ›honor‹, within the context of the tragedy, + refers to the loss thereof, or defamation, for which the remedy is obviously + associated with possible death.</p> + <table> + <row> + <cell>Comedia</cell> + <cell>Tragedia</cell> + </row> + <row> + <cell cols="2">honor</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item>pundonor 0.81 (honorability)</item> + <item>ofrecer 0.80 (offer)</item> + <item>lograr 0.79 (achieve)</item> + <item>honrar 0.79 (to honor)</item> + <item>obedecer 0.78 (obey)</item> + <item>menor 0.78 (minor)</item> + <item>reconocer 0.78 (acknowledge)</item> + <item>rencor 0.77 (grudge)</item> + <item>confesar 0.77 (confess)</item> + <item>ofender 0.77 (offend)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>satisfación 0.81 (satisfaction)</item> + <item>sujeción 0.78 (subjection)</item> + <item>oración 0.77 (prayer)</item> + <item>rigor 0.76 (rigor)</item> + <item>maldición 0.76 (curse)</item> + <item>opinión 0.75 (opinión)</item> + <item>satisfecha 0.75 (satisfied)</item> + <item>satisfacción 0.75 (satisfaction)</item> + <item>honra 0.75 (honor)</item> + <item>acción 0.75 (action)</item> + </list> + </cell> + </row> + <row> + <cell cols="2">hado (fate)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item>hallado 0.92 (found)</item> + <item>amado 0.91 (loved)</item> + <item>hablado 0.91 (spoken)</item> + <item>madrugado 0.90 (gotten up at dawn)</item> + <item>echado 0.90 (thrown)</item> + <item>mirado 0.89 (looked)</item> + <item>negado 0.89 (denied)</item> + <item>pecado 0.89 (sinned)</item> + <item>tocado 0.87 (touched)</item> + <item>enfadado 0.87 (angry)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>estimado 0.92 (estimated)</item> + <item>librado 0.91 (liberated)</item> + <item>engañado 0.90 (enchanted)</item> + <item>sobrado 0.88 (surplus)</item> + <item>nombrado 0.88 (named)</item> + <item>tratado 0.88 (treated)</item> + <item>rendido 0.87 (surrendered)</item> + <item>desengañado 0.87 (disenchanted)</item> + <item>mostrado 0.87 (shown)</item> + <item>estrado 0.87 (stage)</item> + </list> + </cell> + </row> + <trailer xml:id="tab01"> + <ref type="intern" target="#tab1">Tab. 1</ref>: 10 nearest neighbor terms + for »honor« and »hado«. [Lehmann / Padó 2022] </trailer> + </table> + + <p>The many similar word endings in this table may be baffling at first glance, + but hardly surprising: All of Calderón’s plays are written in verses. Through + this metric alone, the selection of possible neighboring words is drastically + limited.<note type="footnote"> An example from the tragedy <bibl> + <title type="desc">La gran Cenobia</title> + </bibl> + <hi rend="italic">,</hi> where <quote>honor</quote> rhymes with + <quote>rigor</quote>: <quote>[Libio:] Por verme con alto honor, / La + muerte á Abdenato di, / Mi misma sangre vendÃ, / A mi patria fui traidor. + / Llegó el rigor / A castigarme, y á ser / Mi verdugo osado y fuerte; / + Pues advierte, / ¿Qué tengo ya que perder, / Perdido el miedo á la + muerte?</quote> There are also examples of two words that rhyme within a + verse, such as in the comedy <bibl> + <title type="desc">Cuál es mayor perfección, hermosura o + discreción</title> + </bibl>, where <quote>honor</quote> rhymes with <quote>pundonor</quote>: + <quote>[Beatriz:] ¿Félix, restado su honor / y yo sabidora de ello / y no + tratar de enmendarlo? / Eso no; que por mi mesmo / pundonor debo + acudirle.</quote> + </note> To make things worse, the similar inflections and conjugations of the + Spanish language also left Calderón with a very narrow selection of possible + neighboring words when composing his dramatic works.</p> + <p>Other terms which were used in both subgroups also produce a similar pattern. + The words <quote>fineza</quote>, <quote>justicia</quote>, and + <quote>amistad</quote> (nicety, justice, friendship) yielded only one or two + common neighboring words within both subgroups (represented in bold type); + these terms are found in both comedies and tragedies alike, but within very + different contexts. While these three terms within the comedic context tend to + reflect the profane, their appearance in the tragic context reflects the formal + authority of the court and its jurisdiction as well as seriousness and the + realm of divine providence and justice.</p> + <table> + <row> + <cell>Comedia</cell> + <cell>Tragedia</cell> + </row> + <row> + <cell cols="2">fineza (nicety)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item><hi rend="bold">firmeza</hi> 0.84 (firmness)</item> + <item>fianza 0.81 (pledge)</item> + <item>importuna 0.81 (important)</item> + <item>fina 0.80 (fine)</item> + <item>impida 0.80 (impede)</item> + <item>implica 0.79 (implies)</item> + <item>naturaleza 0.79 (nature)</item> + <item>nobleza 0.78 (nobility)</item> + <item>templanza 0.78 (temperance)</item> + <item>belleza 0.77 (beauty)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>fiereza 0.84 (fierceness)</item> + <item>gloria 0.78 (glory)</item> + <item>peregrina 0.77 (pilgrim)</item> + <item>indignación 0.77 (indignation)</item> + <item>insignia 0.77 (insignia)</item> + <item>ofrecà 0.76 (offered)</item> + <item>grandeza 0.76 (greatness)</item> + <item><hi rend="bold">firmeza</hi> 0.75 (firmness)</item> + <item>imperial 0.75 (imperial)</item> + <item>ignorancia 0.75 (ignorance)</item> + </list> + </cell> + </row> + <row> + <cell cols="2">justicia (justice)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item><hi rend="bold">justa</hi> 0.83 (just)</item> + <item>hidalga 0.78 (noble)</item> + <item>acompañada 0.77 (accompanied)</item> + <item><hi rend="bold">malicia</hi> 0.77 (malice)</item> + <item>salida 0.76 (departure)</item> + <item>diligencia 0.75 (diligence)</item> + <item>hidalguÃa 0.75 (nobility)</item> + <item>historia 0.75 (history)</item> + <item>dispensación 0.75 (dispensation)</item> + <item>traición 0.75 (treason)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item><hi rend="bold">justa</hi> 0.83 (just)</item> + <item>justiciero 0.82 (avenging)</item> + <item>licencia 0.80 (licence)</item> + <item>precia 0.79 (precious)</item> + <item>milicia 0.79 (militia)</item> + <item><hi rend="bold">malicia</hi> 0.78 (malice)</item> + <item>usted 0.77 (you)</item> + <item>gusta 0.77 (like)</item> + <item>estudiar 0.77 (study)</item> + <item>condición 0.76 (condition)</item> + </list> + </cell> + </row> + <row> + <cell cols="2">amistad (friendship)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item>dad 0.85 (giving)</item> + <item>vanidad 0.83 (vanity)</item> + <item><hi rend="bold">mitad</hi> 0.83 (half)</item> + <item>debéis 0.83 (owe)</item> + <item>decid 0.81 (decide)</item> + <item><hi rend="bold">calidad</hi> 0.81 (quality)</item> + <item>mirad 0.80 (look)</item> + <item>libertad 0.80 (freedom)</item> + <item>perdonad 0.79 (forgive)</item> + <item>podáis 0.79 (can)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>acudid 0.82 (attend)</item> + <item><hi rend="bold">calidad</hi> 0.82 (quality)</item> + <item>ofrezca 0.81 (offer)</item> + <item>seguridad 0.81 (safety)</item> + <item>fealdad 0.77 (ugliness)</item> + <item>temeridad 0.77 (recklessness)</item> + <item><hi rend="bold">mitad</hi> 0.77 (half)</item> + <item>sacad 0.76 (pull)</item> + <item>firmeza 0.76 (firmness)</item> + <item>salid 0.76 (get out)</item> + </list> + </cell> + </row> + <trailer xml:id="tab02"> + <ref type="intern" target="#tab2">Tab. 2</ref>: 10 nearest neighbor terms + for »fineza«, »justicia« and »amistad«. [Lehmann / Padó 2022] </trailer> + </table> + + <p>However, other terms clearly show overlaps with regards to the nearest neighbor + terms; for instance, <quote>celos</quote>, <quote>gusto</quote> or + <quote>muera</quote> (zeal / jealousy, taste, he / she / it dies) each share + three or four nearest neighbor terms within the ten words in the selection.</p> + <table> + <row> + <cell>Comedia</cell> + <cell>Tragedia</cell> + </row> + <row> + <cell cols="2">celos (zeal, jealousy)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item><hi rend="bold">celosos</hi> 0.91 (jealous)</item> + <item><hi rend="bold">recelos</hi> 0.90 (suspicions)</item> + <item>duelos 0.89 (duel)</item> + <item><hi rend="bold">cielos</hi> 0.85 (heavens)</item> + <item>puestos 0.84 (posts)</item> + <item>palos 0.83 (sticks)</item> + <item>dellos 0.83 (from them)</item> + <item>desconsuelos 0.82 (hopelessness)</item> + <item>opuestos 0.82 (opposites)</item> + <item>laberintos 0.82 (mazes)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>consuelos 0.91 (consolations)</item> + <item><hi rend="bold">recelos</hi> 0.91 (suspicions)</item> + <item><hi rend="bold">celosos</hi> 0.90 (jealous)</item> + <item>antojos 0.89 (cravings)</item> + <item>pueblos 0.89 (villages)</item> + <item>regalos 0.88 (gifts)</item> + <item>demos 0.88 (we give)</item> + <item><hi rend="bold">cielos</hi> 0.87 (heavens)</item> + <item>caballos 0.87 (horses)</item> + <item>verlos 0.87 (see them)</item> + </list> + </cell> + </row> + <row> + <cell cols="2">gusto (taste)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item>admito 0.87 (admitted)</item> + <item>visto 0.86 (seen)</item> + <item>susto 0.86 (scare)</item> + <item><hi rend="bold">justo</hi> 0.84 (just)</item> + <item>gasto 0.84 (expense)</item> + <item><hi rend="bold">disgusto</hi> 0.84 (disgust)</item> + <item>pedido 0.83 (order)</item> + <item>considero 0.82 (consider)</item> + <item>adentro 0.82 (in)</item> + <item>pecado 0.82 (sin)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item><hi rend="bold">justo</hi> 0.87 (just)</item> + <item>desprecio 0.85 (contempt)</item> + <item>precio 0.84 (prize)</item> + <item>justiciero 0.84 (righteousness)</item> + <item><hi rend="bold">disgusto</hi> 0.83 (displeasure)</item> + <item>precepto 0.82 (precept)</item> + <item>preciso 0.82 (precise)</item> + <item>profano 0.82 (profane)</item> + <item>favorecido 0.82 (favored)</item> + <item>convencido 0.82 (convinced)</item> + </list> + </cell> + </row> + <row> + <cell cols="2">muera (he / she / it dies)</cell> + </row> + <row> + <cell> + <list type="unordered"> + <item><hi rend="bold">muriera</hi> 0.89 (dying)</item> + <item><hi rend="bold">muerta</hi> 0.89 (dead)</item> + <item>defuera 0.85 (outside)</item> + <item><hi rend="bold">muralla</hi> 0.85 (wall)</item> + <item>muestra 0.84 (sample)</item> + <item><hi rend="bold">manera</hi> 0.83 (way)</item> + <item>mira 0.82 (look)</item> + <item>enferma 0.81 (sick)</item> + <item>dondequiera 0.81 (anywhere)</item> + <item>cólera 0.81 (anger)</item> + </list> + </cell> + <cell> + <list type="unordered"> + <item>viviera 0.94 (living)</item> + <item><hi rend="bold">muriera</hi> 0.94 (dying)</item> + <item><hi rend="bold">muerta</hi> 0.92 (dead)</item> + <item><hi rend="bold">muralla</hi> 0.91 (wall)</item> + <item>diera 0.90 (giving)</item> + <item>madera 0.90 (wood)</item> + <item><hi rend="bold">manera</hi> 0.90 (way)</item> + <item>viera 0.90 (watching)</item> + <item>hermosura 0.89 (beauty)</item> + <item>matara 0.89 (kill)</item> + </list> + </cell> + </row> + <trailer xml:id="tab03"> + <ref type="intern" target="#tab3">Tab. 3</ref>: 10 nearest neighbor terms + for »celos«, »gusto« and »muera«. [Lehmann / Padó 2022] </trailer> + </table> + + <p>This analysis illustrates that the differences between tragedies and comedies + do not merely consist of different vocabularies, but rather, that even shared + vocabularies are substantially <hi rend="italic">used in a different way</hi>. + The more central for the genre, the more distinguishable the usage – at least, + this is the tendency our results have shown so far.</p> + </div> + </div> + <div type="chapter"> + <head>4. Discussion of the Results and Outlook</head> + + <p>The comparison of the methods shows that with two of them – clustering of dramas + on the basis of verbs, nouns, and adjectives and clustering on the basis of tf-idf + values – results can be reached that approximate expert judgments. Both methods + are considered standard procedures in text mining. In order + for the clustering to reach a purity of 70% and beyond, however, comprehensive + filtering was needed, extending beyond the usual punctuation and stop words to + further function words, proper nouns and their adjectivized forms. A part of the + latter can only be manually assembled for each corpus under study, which requires + considerable effort. A rather good purity of the clustering can be reached fairly + fast by conducting a massive reduction of the output matrix to a sparsity of 20%, + thus considering only terms which appear in at least 80% of all of the documents. </p> + <p>The preliminary observations of this study considering the comparison of the four + explored methods permit us to identify further dramas of each category (sixteen + tragedies and ten comedies) which could be regarded, with a high probability, as + being either tragedies or comedies. They also point to characteristic mixtures of + the vocabularies in use as well as to contradictory results. This particularly + concerns comedic passages in the dramas – even when they appear within a tragedy – + but also any terms that reflect themes that are typical for comedies or tragedies, + extra-literary attributes or plot characteristics. </p> + <p>One particular example would be <bibl> + <title type="desc">Amor, honor y poder</title> + </bibl>, a title unknown to the authors in this study before the analysis began. + Though it is commonly classified as a comedy because of its happy ending, the + intrigue deals with unhappy relations between two pairs of characters and is + therefore dominated by a semantics typical of tragedies. While the methods + employed in this study all classify this drama as a tragedy, another exception is + formed by <bibl> + <title type="desc">No hay cosa como callar</title> + </bibl>. Again, all the four procedures classify this drama unanimously, in this + case as a comedy, and so does the Aguilar edition. The judgments of qualitative + research, however, are more divided: While Alexander A. Parker classified it in + 1962 as a tragedy, he later revised his judgment and described it as a + <quote>comedy of intrigue</quote>, and Simon Kroll puts it into the section + <quote>Tragedias y dramas de honor</quote>.<note type="footnote"> Cf. + <ref type="bibliography" target="#parker_definition_1962">Parker + 1962</ref>, p. 228; + <ref type="bibliography" target="#parker_mind_1988">Parker 1988</ref>, pp. + 181–182; <ref type="bibliography" target="#kroll_sonido_2022">Kroll 2022</ref>, p. 63.</note> Certainly, + the analysis conducted here will inspire further debates, since such variations in + the classification of a drama may be resolved by a differentiated examination: The + vocabulary in <bibl> + <title type="desc">No hay cosa como callar</title> + </bibl> may be one typical for comedies, but the plot as well as other qualitative + criteria might support its classification as a tragedy.</p> + <p>Also interesting is the insight that the Calderónian tragedies, obviously because + of the way the words are used within the text, are much more reliably identifiable + than the comedies. This is underlined by the way in which all of the four applied + methods identified the group of so-called <term type="dh">comedias + religiosas</term>: <bibl> + <title type="desc">El José de las mujeres, El purgatorio de san Patricio, Judas + Macabeo, La cisma de Ingalaterra, La exaltación de la cruz, La sibila del + Oriente y gran reina de Sabá, Las cadenas del demonio, Los dos amantes del + cielo, and Origen, pérdida y restauración de la Virgen del Sagrario</title> + </bibl>. All these dramas are consistently marked by the use of a tragic + vocabulary. On the other hand and with regard to the comedies, it is quite obvious + that they are much harder to define than tragedies. This is true, for example, + with respect to a group of comedies which are frequently regarded as <hi + rend="italic">comedias mitológicas</hi>. The mythological plays <bibl> + <title type="desc">El castillo de Lindabridis, El mayor encanto amor, La puente + de Mantible</title> + </bibl>, and <bibl> + <title type="desc">Los tres mayores prodigios</title> + </bibl> exhibit very strong tragedy signals in our analysis, whereas most other + dramas classified as <hi rend="italic">comedias mitológicas</hi> exhibit mixed + signals.<note type="footnote"> For the assessments of these works as <hi + rend="italic">comedias mitológicas</hi>, see <ref type="bibliography" target="#kroll_sonido_2022">Kroll 2022</ref>; + <ref type="bibliography" target="#castro_alquimia_2001">Castro de Moux + 2001</ref>; <ref type="bibliography" target="#greer_power_1988">Greer 1988</ref>; + <ref type="bibliography" target="#cancelliere_teatro_2000">Cancelliere 2000</ref>; + <ref type="bibliography" target="#arellano_teatro_2000">Arellano 2000</ref>; + <ref type="bibliography" target="#pena_teatro_2011">Peña-Pimentel 2011</ref>. </note> + </p> + <p>Certainly, with regard to dramas stipulated on the basis of our analysis which, up + to now, have received very little attention, the binary separation of <hi + rend="italic">dramas</hi> and <hi rend="italic">comedias</hi> previously + conducted by the publishers of the Aguilar edition must be viewed with a critical + eye. A good example for this is provided by <bibl> + <title type="desc">Amar después de la muerte</title> + </bibl>, which stood out through the use of tragic vocabulary as identified by the + most precise clustering approach (method 2). This classification was verified by + the historical-critical edition presented by Jorge Checa.<note type="footnote"> + <ref type="bibliography" target="#checa_calderon_2010">Checa (Ed.) 2010</ref>.</note> Since Checa, in the preface of his analysis, discusses a + series of criteria regarding the designation of tragedies according to Parker and + Sullivan, this insight presents an invitation to qualitatively working researchers + to work systematically and to consistently implement these established criteria + for classification on an entire sequence of plays. The status of the group of + dramas called <hi rend="italic">comedias mitológicas</hi> – as with those + recognized by Parker and Sullivan as being <quote>on the brink of tragedy</quote><note type="footnote"> Cf. <ref type="bibliography" target="#parker_mind_1988">Parker 1988</ref>, pp. 58, 181, 182; + <ref type="bibliography" target="#sullivan_drama_2018">Sullivan 2018</ref>, pp. 70, + 316, 321.</note> – should therefore be discussed anew with regard to their + designated categories and the vocabularies used. The same is true concerning the + scarcely examined group of dramas which can be classified as + <quote>tragicomedias</quote>. The intermediate area found between comedies and + tragedies throughout these methods points to this in an emphatic way. In the sense + of the digital humanities, this conclusion represents an + invitation to qualitative researchers to take a deeper look at the texts they have + already examined and to create lists of characteristic words for each category to + be distinguished. </p> + <p>The approach performed through distributional semantics contributes only one + factor among others – albeit an arguably important one – to the classification of + plays, in particular when, as is the case here, lexical and semantic analyses go + hand in hand. This is especially relevant in view of the large number of works + which have yet been only scarcely researched or not at all. The systematic + comparison of various methods, as carried out here, presents the opportunity to + better evaluate the results of heterogeneous corpora (plays by various playwrights + or from different centuries). The implementation of these tested procedures on, + for example, all available dramas in the <hi rend="italic">siglo de oro, + </hi>would provide a broader basis for the achieved results upon which + characteristic lexica for comedies and tragedies could be identified. Precisely, + however, the example of Calderón with his 112 <hi rend="italic">comedias + nuevas</hi> illustrates that the methods explored here provide qualitative + researchers with information, which may stimulate further analyses. Potentially, + the current undertakings aiming at the presentation of all of the Calderónian + dramas as historical-critical editions<note type="footnote"> A critical new + edition of the complete body of <hi rend="italic">comedias </hi>is in progress + under the direction of Ignacio Arellano within the series <bibl> + <title type="desc">Biblioteca Aurea hispánica</title> + </bibl> from the Vervuert publishing house. Currently, however, only 21 titles + have been published. This editing project can be seen as the most reliable + textual basis; the editing principles are clarified in <ref type="bibliography" target="#arellano_editar_2007">Arellano 2007</ref>. + Additionally, the <bibl> + <title type="desc">Partes de las comedias</title> + </bibl>, which appeared during Calderón’s lifetime, are available in a modern + edition in six volumes through the Madrid-based publisher Fundación José + Antonio de Castro, newly edited under the direction of Luis Iglesias + Feijo.</note> can take up the findings presented in this study.</p> + </div> + <div type="chapter"> + <head>5. Appendix</head> + + <p>Abbreviations: T = Tragedy, C = Comedy, M = Mixed Cluster, U = Undefined + Cluster</p> + <table> + <row> + <cell>Brief description and name of drama</cell> + <cell>Euklid Ward.D2</cell> + <cell>Euklid Ward Sparse20</cell> + <cell>POS Cosine</cell> + <cell>tf-idf Cosine</cell> + </row> + <row> + <cell>T1-A secreto agravio</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>T2-El alcalde de Zalamea</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>T3-El mágico prodigioso</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T4-El mayor monstruo del mundo</cell> + <cell>T</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T5-El médico de su honra</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>T6-El pintor de su deshonra</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>T7-El prÃncipe constante</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T8-La devoción de la Cruz</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T9-La hija del aire. Primera parte</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T10-La hija del aire. Segunda parte</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T11-La vida es sueño</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T12-La gran Cenobia</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>T13-Las tres justicias en una</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>T14-Los cabellos de Absalon</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>T15-Saber del bien y del mal</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>C1-Casa con dos puertas mala es de guardar</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C2-También hay duelo en las damas</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C3-El encanto sin encanto</cell> + <cell>M</cell> + <cell>C</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>C4-Fuego de dios en el querer bien</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C5-El astrólogo fingido</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C6-El maestro de danzar</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C7-La dama duende</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C8-Los empeños de un acaso</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C9-Mejor está que estaba</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C10-Peor está que estaba</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C11-Primero soy yo</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C12-Mañanas de abril y mayo </cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C13-Antes que todo es mi dama</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C14-No siempre lo peor es cierto</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>C15-Dicha y desdicha del nombre</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test1-Afectos de odio y amor</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test2-El galan fantasma</cell> + <cell>M</cell> + <cell>C</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test3-Las fortunas de Androméda y Perseo</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test4-Los dos amantes del cielo (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test5-Amor, honor y poder (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test6-La cisma de Ingalaterra (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test7-En esta vida todo es verdad y todo mentira</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test8-La aurora en Copacabana</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test9-Las cadenas del demonio (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test10-Amado y aborrecido</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test11-Amar después de la muerte o el Tuzanà de la Alpujarra</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>T</cell> + </row> + <row> + <cell>Test12-Las armas de la hermosura</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test13-Celos, aun del aire, matan</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test14-Darlo todo y no dar nada</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test15-Eco y Narciso</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test16-Fieras afemina amor</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test17-Luis Pérez el Gallego</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test18-El mayor encanto, amor (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test19-La púrpura de la rosa</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test20-El sitio de Breda</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test21-Nadie fÃe su secreto</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test22-No hay burlas con el amor (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test23-El escondido y la tapada (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test24-No hay cosa como callar (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test25-Las Manos Blancas No Ofenden</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test26-Con quien vengo, vengo (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test27-Céfalo y Pocris (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test28-La puente de Mantible (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test29-El castillo de Lindabridis (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test30-El monstruo de los jardines</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test31-La fiera el rayo y la piedra</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test32-Para vencer a amor, querer vencerle</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test33-Lances de amor y fortuna</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test34-Hombre pobre todo es trazas (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test35-Judas Macabeo (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test36-El alcaide de sà mismo (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test37-El purgatorio de san Patricio (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test38-La banda y la flor</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test39-Un castigo en tres venganzas</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test40-Bien vengas mal</cell> + <cell>C</cell> + <cell>C</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test41-Mañana será otro dÃa (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test42-La sibila del Oriente y gran reina de Sabá (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test43-Argenis y Poliarco (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test44-El jardin de Falerina</cell> + <cell>M</cell> + <cell>M</cell> + <cell>C</cell> + <cell>T</cell> + </row> + <row> + <cell>Test45-Los tres mayores prodigios (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test46-Origen, pérdida y restauración de la Virgen del Sagrario + (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test47-La desdicha de la voz (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test48-El secreto a voces</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test49-El Faetonte</cell> + <cell>M</cell> + <cell>M</cell> + <cell>C</cell> + <cell>T</cell> + </row> + <row> + <cell>Test50-La exaltación de la cruz (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test51-El agua mansa (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test52-La niña de Gómez Arias</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>T</cell> + </row> + <row> + <cell>Test53-Los hijos de la fortuna, Teágenes y Cariclea</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test54-Agradecer y no amar</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test55-Amigo amante y leal</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test56-El golfo de las sirenas</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test57-Gustos y disgustos son no más que imaginación</cell> + <cell>M</cell> + <cell>T</cell> + <cell>C</cell> + <cell>U</cell> + </row> + <row> + <cell>Test58-El acaso y el error</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test59-El José de las mujeres (T)</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test60-Los tres afectos de amor piedad desmayo y valor</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test61-Cada uno para sÃ</cell> + <cell>C</cell> + <cell>C</cell> + <cell>U</cell> + <cell>C</cell> + </row> + <row> + <cell>Test62-El conde Lucanor</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test63-Dar tiempo al tiempo (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test64-Mujer, llora y vencerás</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test65-Cuál es mayor perfección, hermosura o discreción (C)</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test66-El laurel de Apolo</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test67-Ni amor se libra de amor</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test68-El mayor monstruo los celos</cell> + <cell>T</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test69-El postrer duelo de españa</cell> + <cell>M</cell> + <cell>C</cell> + <cell>C</cell> + <cell>C</cell> + </row> + <row> + <cell>Test70-El gran prÃncipe de Fez</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test71-Fineza contra fineza</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test72-El segundo Scipión</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test73-La señora y la criada</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test74-Basta callar</cell> + <cell>M</cell> + <cell>C</cell> + <cell>T</cell> + <cell>U</cell> + </row> + <row> + <cell>Test75-De una causa dos efectos</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test76-Hado y divisa de Leonido y Marfisa</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test77-La estatua de Prometeo</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test78-Apolo y Climene</cell> + <cell>M</cell> + <cell>M</cell> + <cell>U</cell> + <cell>T</cell> + </row> + <row> + <cell>Test79-Duelos de amor y lealtad</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test80-Auristela y Lisidante</cell> + <cell>M</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + </row> + <row> + <cell>Test81-Cómo se comunican dos estrellas contrarias</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>C</cell> + </row> + <row> + <cell>Test82-La selva confusa</cell> + <cell>M</cell> + <cell>T</cell> + <cell>T</cell> + <cell>U</cell> + </row> + </table> + </div> + </div> + <div type="bibliography"> + <head>Bibliography</head> + <listBibl> + <bibl xml:id="arellano_teatro_2000">Ignacio Arellano: El Teatro de Corte y Calderón. In: Atti della Tavola Rotonda sulla + Singolarità Storica e Estetica di »La púrpura de la rosa« di Calderón de la Barca. + Ed. by MarÃa Luisa Tobar. Messina 2000, pp. 31–53. <ptr type="gbv" cRef="345272080"/></bibl> + <bibl xml:id="arellano_editar_2007">Ignacio Arellano: Editar a Calderón. Hacia una edición crÃtica de las comedias + completas. Frankfurt / Main 2007. (= Comedias completas de Calderón, 5) <ptr type="gbv" cRef="538319577"/></bibl> + <bibl xml:id="arellano_dramaticos_2018">Ignacio Arellano: Calderón y los géneros dramáticos, con otras cuestiones anejas. + Honor, amor, legitimación polÃtica y autoridad de las taxonomÃas. In: Rilce. Revista + de FilologÃa Hispánica 34 (2018), pp. 100–126. DOI: <ref + target="https://doi.org/10.15581/008.34.1.100-26">10.15581/008.34.1.100-26</ref> <ptr type="gbv" cRef="171309456"/></bibl> + <bibl xml:id="benjamin_ursprung_1978">Walter Benjamin: Ursprung des deutschen Trauerspiels. Frankfurt / Main 1978. (= + Suhrkamp-Taschenbuch Wissenschaft, 225) <ptr type="gbv" cRef="011350512"/></bibl> + <bibl xml:id="bojanowski_word_2017">Piotr Bojanowski / Edouard Grave / Armand Joulin / Tomas Mikolov: Enriching Word + Vectors with Subword Information. In: Transactions of the Association for + Computational Linguistics 5 (2017), pp. 135–146. PDF. [<ref + target="https://aclanthology.org/Q17-1010.pdf">online</ref>] </bibl> + <bibl xml:id="bullinaria_representation_2007">John Andrew Bullinaria / Joseph P. Levy: Extracting Semantic Representations from Word Co-occurrence Statistics. A Computational Study. In: Behavior Research Methods + 39 (2007), pp. 510–526. DOI: <ref target="https://doi.org/10.3758/BF03193020" + >10.3758/BF03193020</ref> <ptr type="gbv" cRef="129578975"/></bibl> + <bibl xml:id="calderon_obras_1951">Pedro Calderón de la Barca: Obras completas. Textos Ãntegros según las primeras + ediciones y los manuscritos autógrafos. Ed. by Ãngel Valbuena Briones / Luis Astrana + MarÃn. 3 vols. Madrid 1951–1956. <ptr type="gbv" cRef="125347006"/></bibl> + <bibl xml:id="calderon_comedias_2007">Pedro Calderón de la Barca: Comedias y otras obras. Madrid 2007–2010. <ptr type="gbv" cRef="547883641"/></bibl> + <bibl xml:id="campion_original_2021">Miguel Campión Larumbe / Ãlvaro Cuéllar: Discernir entre original y refundición en el teatro del Siglo de Oro a través de la estilometrÃa. El caso de El mejor amigo, el + muerto. In: TalÃa. Revista de estudios teatrales 3 (2021), pp. 59–69. DOI: <ref + target="https://doi.org/10.5209/tret.74021">10.5209/tret.74021</ref> + </bibl> + <bibl xml:id="cancelliere_teatro_2000">Enrica Cancelliere: Calderón e il Teatro di Corte. In: Atti della Tavola Rotonda + sulla Singolarità Storica e Estetica di »La púrpura de la rosa« di Calderón de la + Barca. Ed. by MarÃa Luisa Tobar. Messina 2000, pp. 55–76. <ptr type="gbv" cRef="345272080"/></bibl> + <bibl xml:id="castro_alquimia_2001">MarÃa Esther Castro de Moux: Alquimia y gnosticismo en Fortunas de Andrómeda y Perseo + de Calderón: In: Actas del V Congreso Internacional. Ed. by Christoph Strosetzki. + (Asociación Internacional Siglo de Oro (AISO), Münster, 20.–24.07.1999) Frankfurt / + Main 2001, pp. 319–330. <ptr type="gbv" cRef="337332312"/></bibl> + <bibl xml:id="checa_calderon_2010">Jorge Checa (Ed.): Pedro Calderón de la Barca: Amar después de la muerte. Edición y + estudio. Kassel 2010. (= Teatro del Siglo de Oro / Ediciones crÃticas, 167) + <ptr type="gbv" cRef="623997010"/></bibl> + <bibl xml:id="coenen_selva_2016">Erik Coenen: »La selva confusa« y »Cómo se comunican dos estrellas contrarias«: + comedias gemelas. In: Revista de filologÃa española 96 (2016), pp. 61–80. DOI: <ref + target="https://doi.org/10.3989/rfe.2016.03">10.3989/rfe.2016.03</ref> + </bibl> + <bibl xml:id="couderc_theatre_2012">Christophe Couderc: Le théâtre tragique au Siècle d’or. Cristóbal de Virués, Lope de + Vega, Calderón de la Barca. Paris 2012. <ptr type="gbv" cRef="1601095228"/></bibl> + <bibl xml:id="cuellar_stylometry_2022">Ãlvaro Cuéllar: Stylometry and Spanish Golden Age Theatre: An Evaluation of + Authorship Attribution in a Control Group of Undisputed Plays. In: Digital + Stylistics in Romance Studies and Beyond. Ed. by Christof Schöch / José Calvo Tello / + Ulrike Henny-Krahmer / Robert Hesselbach / Daniel Schlör. [Forthcoming]</bibl> + <bibl xml:id="ehrlicher_einfuehrung_2012">Hanno Ehrlicher: Einführung in die spanische Literatur und Kultur des Siglo de Oro. + Berlin 2012. <ptr type="gbv" cRef="715983598"/></bibl> + <bibl xml:id="ehrlicher_poetica_2020">Hanno Ehrlicher / Jörg Lehmann / Nils Reiter / Marcus Willand: La poética dramática + desde una perspectiva cuantitativa: la obra de Calderón de la Barca. In: Revista de + Humanidades Digitales 5 (2020), pp. 1–25. DOI: <ref + target="https://doi.org/10.5944/rhd.vol.5.2020.27716" + >10.5944/rhd.vol.5.2020.27716</ref> + </bibl> + <bibl xml:id="escudero_amor_2021">Juan Manuel Escudero Baztán: Amor, honor y poder o el universo dramático de Calderón. + Madrid et al. 2021. (= Comedias completas de Calderón, 24) <ptr type="gbv" cRef="1749191083"/></bibl> + <bibl xml:id="greer_power_1988">Margaret Rich Greer: The Play of Power: Calderón’s »Fieras afemina amor« and »La + estatua de Prometeo«. In: Hispanic Review 56 (1988), issue 3, pp. 319–341. <ptr type="gbv" cRef="129509124"/></bibl> + <bibl xml:id="jockers_macroanalysis_2013">Matthew Jockers: Macroanalysis. Digital Methods & Literary History. Urbana, IL + et al. 2013. <ptr type="gbv" cRef="726039551"/></bibl> + <bibl xml:id="kroll_sonido_2022">Simon Kroll: Sonido y afecto en Calderón. Un estudio de las asonancias. Kassel 2022. + <ptr type="gbv" cRef="1804113506"/></bibl> + <bibl xml:id="lehmann_classifikation_2022">Jörg Lehmann: Classification of Tragedies and Comedies in Calderón de la Barca’s + Comedias Nuevas [Data set]. In: zenodo.org. Version 1 from 20.06.2022. DOI: <ref + target="https://doi.org/10.5281/zenodo.6669603">10.5281/zenodo.6669603</ref> + </bibl> + <bibl xml:id="vega_arte_2010">Félix Lope de Vega: Arte nuevo de hacer comedias en este tiempo. Dirigido a la + Academia de Madrid. Madrid 1621 [1609]. In: books.google.de. Original from la + Biblioteca de Catalunya, digitized on 31.03.2010. [<ref + target="https://books.google.de/books?id=Ihh5oI6I4TsC">online</ref>] </bibl> + <bibl xml:id="lowe_theory_2001">Will Lowe: Towards a Theory of Semantic Space. Proceedings of the Annual Meeting of + the Cognitive Science Society 23 (2001), pp. 576–581. [<ref + target="https://escholarship.org/uc/item/0wk159m0">online</ref>]</bibl> + <bibl xml:id="maestro_limites_2003">Jesús G. Maestro: Los lÃmites de una interpretación trágica y contemporánea del + teatro calderonniano: El prÃncipe constante. In: Teatro calderoniano sobre el + tablado: Calderón y su puesta en escena a través de los siglos. Ed. by Manfred Tietz. + (Coloquio Anglogermano sobre Calderón, Firenze 10.–14.07.2002) Stuttgart 2003, pp. + 285–327. (= Archivum Calderonianum, 10) <ptr type="gbv" cRef="371289408"/></bibl> + <bibl xml:id="manning_introduction_2008">Christopher D. Manning / Prabhakar Raghavan / Hinrich Schütze: Introduction to + Information Retrieval. Cambridge, UK 2008. <ptr type="gbv" cRef="538088214"/></bibl> + <bibl xml:id="mikolov_representations_2013">Tomas Mikolov / Ilya Sutskever / Kai Chen / Greg Corrado / Jeffrey Dean: Distributed + Representations of Words and Phrases and Their Compositionality. In:Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural + Information Processing Systems. Ed. by Chris Burges et al. (NeurIPS 26, Lake Tahoe, + NV, 05.–10.12.2013), pp. 3111–3119. [<ref + target="https://papers.nips.cc/paper/2013/hash/9aa42b31882ec039965f3c4923ce901b-Abstract.html" + >online</ref>] <ptr type="gbv" cRef="783828063"/></bibl> + <bibl xml:id="papay_scenarios_2018">Sean Papay / Sebastian Padó / Ngoc Thang Vu: Addressing Low-Resource Scenarios with + Character-aware Embeddings. In: Subword and Character Level Models in NLP – + proceedings of the second workshop. Ed. by Association for Computational Linguistics. + (NAACL-HLT 16, New Orleans, 06.06.2018) Stroudsburg, PA, 2018, pp. 32–37. DOI: <ref + target="http://dx.doi.org/10.18653/v1/W18-1204">10.18653/v1/W18-1204</ref> + </bibl> + <bibl xml:id="parker_definition_1962">Alexander Augustine Parker: Towards a Definition of Calderonian Tragedy. In: Bulletin + of Hispanic Studies 39 (1962), pp. 222–237. <ptr type="gbv" cRef="129851906"/></bibl> + <bibl xml:id="parker_mind_1988">Alexander Augustine Parker: The Mind and Art of Calderón. Essays on the Comedias. Ed. + by Deborah Kong. Cambridge et al. 1988. <ptr type="gbv" cRef="025317172"/></bibl> + <bibl xml:id="pena_teatro_2011">Miriam A. Peña-Pimentel: El Gracioso en el Teatro de Calderón. Un Análisis desde las + Humanidades Digitales. London / Ontario 2011. (= Electronic Thesis and Dissertation + Repository, 307) [<ref target="https://ir.lib.uwo.ca/etd/3070">online</ref>]</bibl> + <bibl xml:id="peirsman_identification_2010">Yves Peirsman / Dirk Geeraerts / Dirk Speelman: The Automatic Identification of Lexical Variation between Language Varieties. In: Natural Language Engineering 16 + (2010), issue 4, pp. 469–491. DOI: <ref + target="https://dx.doi.org/10.1017/S1351324910000161" + >10.1017/S1351324910000161</ref> <ptr type="gbv" cRef="188854029"/></bibl> + <bibl xml:id="pena_aplicacion_2012">Miriam A. Peña-Pimentel: Aplicación de mapas de tópicos al análisis semántico de + algunas comedias de Calderón. In: Calderón virtual. Anuario calderoniano 5 (2012), + pp. 115–130. <ptr type="gbv" cRef="727545906"/></bibl> + <bibl xml:id="rosa_role_2018">Javier de la Rosa / Adriana Soto-Corominas / Juan Luis Suárez: The Role of Emotions + in the Characters of Pedro Calderón de la Barca’s autos sacramentales. In: Emotion + and the Seduction of the Senses, Baroque to Neo-Baroque. Ed. by Lisa Beaven / Angela + Ndalianis. (Conference, Melbourne, 27.–29.11.2013) Kalamazoo 2018, pp. 99–125. (= + Studies in medieval and early modern culture, 59) <ptr type="gbv" cRef="1645566358"/></bibl> + <bibl xml:id="schoech_tools_2013">Christof Schöch: Fine-Tuning our Stylometric Tools. Investigating Authorship and + Genre in French Classical Drama. In: Digital Humanities Conference 2013. Hg. von + European Association for Digital Humanities. (DH 2013, Lincoln, NE, 16.-19.07.2013) + Lincoln, NE 2013. <ptr type="gbv" cRef="771005539"/></bibl> + <bibl xml:id="schoech_exploration_2017">Christof Schöch: Topic Modeling Genre: An Exploration of French Classical and + Enlightenment Drama. In: Digital Humanities Quarterly 11 (2017), pp. 1–53. [<ref + target="http://www.digitalhumanities.org/dhq/vol/11/2/000291/000291.html" + >online</ref>] </bibl> + <bibl xml:id="sullivan_landen_2017">Henry Wells Sullivan: Calderón in deutschen und niederen Landen. Eine + dreihundertjährige Rezeptionsgeschichte. Berlin 2017. <ptr type="gbv" cRef="825775477"/></bibl> + <bibl xml:id="sullivan_drama_2018">Henry Wells Sullivan: Tragic Drama in the Golden Age of Spain. Kassel 2018. (= Teatro + del Siglo de Oro / Estudios de literatura, 133) <ptr type="gbv" cRef="1028592345"/></bibl> + <bibl xml:id="tobar_rotonda_2000">MarÃa Luisa Tobar: Atti della Tavola Rotonda sulla Singolarità Storica e Estetica di + »La púrpura de la rosa« di Calderón de la Barca. Messina 2000. <ptr type="gbv" cRef="345272080"/></bibl> + <bibl xml:id="turney_frequency_2010">Peter D. Turney / Patrick Pantel: From Frequency to Meaning: Vector Space Models of + Semantics. In: Journal of Artificial Intelligence Research 37 (2010), pp. 141–188. + DOI: <ref target="https://doi.org/10.1613/jair.2934">10.1613/jair.2934</ref> + <ptr type="gbv" cRef="215076869"/></bibl> + <bibl xml:id="prat_historia_1950">Ãngel Valbuena Prat: Historia de la literatura española. 4 vols. 3rd edition. + Barcelona 1950. Vol. 2: Los Siglos de oro, pp. 479–571. <ptr type="gbv" cRef="08470523X"/></bibl> + <bibl xml:id="ward_function_1963">Joe H. Ward: Hierarchical Grouping to Optimize an Objective Function. In: Journal of + the American Statistical Association 58 (1963), pp. 236–244. <ptr type="gbv" cRef="129497681"/></bibl> + <bibl xml:id="willand_2017">Marcus Willand / Nils Reiter: Geschlecht und Gattung. Digitale Analysen von Kleists + ›Familie Schroffenstein‹. In: Kleist-Jahrbuch 2017. Ed. by Andrea Allerkamp / Günter + Blamberger / Ingo Breuer / Barbara Gribnitz / Hannah Lotte Lund / Martin Roussel. + Stuttgart 2017, pp. 177–195. <ptr type="gbv" cRef="897046382"/></bibl> + + </listBibl> + </div> + + + <div type="abbildungsnachweis"> + <head>List of Figures and Tables</head> + + <desc type="graphic" xml:id="abb1">Ward.D2 clustering of 112 Calderónian Comedias. [Lehmann 2022]<ref type="graphic" target="#klassifikation_2022_001"/></desc> + <desc type="graphic" xml:id="abb2">Ward.D2 clustering of 112 Calderónian Comedias. Euclidian distance on the + basis of a sparsity of 20%. [Lehmann 2022]<ref type="graphic" target="#klassifikation_2022_002"/></desc> + <desc type="graphic" xml:id="abb3">Ward.D2 clustering of 112 Calderónian Comedias. Cosine similarity based on + verbs, nouns and adjectives. [Lehmann 2022]<ref type="graphic" target="#klassifikation_2022_003"/></desc> + <desc type="graphic" xml:id="abb4">Ward.D2 clustering of 112 Calderónian Comedias. Cosine similarity on the + basis tf-idf values. [Lehmann 2022]<ref type="graphic" target="#klassifikation_2022_004"/></desc> + <desc type="table" xml:id="tab1"><ref target="#tab01" type="intern">Tab. 1</ref>: 10 + nearest neighbor terms for »honor« and »hado«. [Lehmann / Padó 2022]<ref + type="graphic" target="#klassifikation_2022_t1"/> + </desc> + <desc type="table" xml:id="tab2"><ref target="#tab02" type="intern">Tab. 2</ref>: 10 + nearest neighbor terms for »fineza«, »justicia« and »amistad«. [Lehmann / Padó + 2022]<ref type="graphic" target="#klassifikation_2022_t2"/> + </desc> + <desc type="table" xml:id="tab3"><ref target="#tab03" type="intern">Tab. 3</ref>: 10 + nearest neighbor terms for »celos«, »gusto« and »muera«. [Lehmann / Padó 2022]<ref + type="graphic" target="#klassifikation_2022_t3"/> + </desc> + </div> + </body> + </text> +</TEI>