index.json

[{"authors":null,"categories":null,"content":"I\u0026rsquo;m Elena. I\u0026rsquo;m a computational linguist: I\u0026rsquo;m interested in Linguistics, technology and the intersection between them. I currently work at the NLP\u0026amp;IR research group at UNED University, where I\u0026rsquo;m pursuing my PhD under the supervision of Julio Gonzalo and Constantine Lignos. I\u0026rsquo;m particularly interested in studying how we can use technology to understand language contact and language change. My research has led to the creation of Observatorio Lázaro, an observatory that automatically monitors anglicism usage in the Spanish press.\nPrior to that, I spent a decade working on different language technology projects at various organizations, such as the Information Sciences Institute at University of Southern California, Fundéu, Molino de Ideas, McLean Hospital or UNED Digital Humanities Lab.\nI am also highly involved in dissemination activities that bridge the gap between Linguistics and the general public: I write a column about language at Spanish newspaper elDiario.es, a column that was awarded with the Miguel Delibes National Journalism Award in 2017. I sometimes write at linguistics magazine Archiletras, where I also serve as editorial board member. In 2016 I wrote the pop linguistics book Anatomía de la Lengua.\n","date":-62135596800,"expirydate":-62135596800,"kind":"section","lang":"en","lastmod":-62135596800,"objectID":"598b63dd58b43bce02403646f240cd3c","permalink":"/author/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/author/admin/","section":"author","summary":"I\u0026rsquo;m Elena. I\u0026rsquo;m a computational linguist: I\u0026rsquo;m interested in Linguistics, technology and the intersection between them. I currently work at the NLP\u0026amp;IR research group at UNED University, where I\u0026rsquo;m pursuing my PhD under the supervision of Julio Gonzalo and Constantine Lignos. I\u0026rsquo;m particularly interested in studying how we can use technology to understand language contact and language change. My research has led to the creation of Observatorio Lázaro, an observatory that automatically monitors anglicism usage in the Spanish press.","tags":null,"title":"","type":"author"},{"authors":null,"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"section","lang":"en","lastmod":-62135596800,"objectID":"d41d8cd98f00b204e9800998ecf8427e","permalink":"/author/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/author/","section":"author","summary":"","tags":null,"title":"Authors","type":"author"},{"authors":["Andrew Rueda","Elena Álvarez Mellado","Constatine Lignos"],"categories":null,"content":"","date":1716588000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1716588000,"objectID":"b66ff99dd3d9062edbc2f66183ab0776","permalink":"/publication/lrec2024/","publishdate":"2024-05-25T00:00:00+02:00","relpermalink":"/publication/lrec2024/","section":"publication","summary":"","tags":[],"title":"CoNLL#: Fine-grained Error Analysis and a Corrected Test Set for CoNLL-03 English","type":"publication"},{"authors":[],"categories":null,"content":"","date":1715810400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1715810400,"objectID":"92e234ee33cee4d2b986708880276281","permalink":"/talk/fondosue/","publishdate":"2024-05-16T00:00:00+02:00","relpermalink":"/talk/fondosue/","section":"talk","summary":"Round table organized by Spanish newspaper elDiario.es on artificial intelligence","tags":[],"title":"Digitalización e inteligencia artificial","type":"talk"},{"authors":[],"categories":null,"content":"","date":1687816800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1687816800,"objectID":"bd07d1db0a210e2c09a0c98a85eff2e4","permalink":"/talk/elex2023/","publishdate":"2023-06-27T00:00:00+02:00","relpermalink":"/talk/elex2023/","section":"talk","summary":"Keynote on Lázaro Observatory and automatic detection of anglicisms at eLex 2023. Recipient of the Adam Kilgarriff Prize.","tags":[],"title":"Adam Kilgarriff Lecture at eLex 2023","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1683928800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1683928800,"objectID":"c2b2c04fc75ed368882c46613b30ee2f","permalink":"/talk/asetrad/","publishdate":"2023-05-13T00:00:00+02:00","relpermalink":"/talk/asetrad/","section":"talk","summary":"Socia de honor de Asetrad (Asociación Española de Traductores, Correctores e Intérpretes)","tags":[],"title":"Socia de honor de Asetrad: acceptance speech","type":"talk"},{"authors":[],"categories":null,"content":"","date":1678143600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1678143600,"objectID":"6ee1cae0f2097bfdf58ae5b5c8e02da5","permalink":"/talk/untemaaldia_solo/","publishdate":"2023-03-07T00:00:00+01:00","relpermalink":"/talk/untemaaldia_solo/","section":"talk","summary":"Interview for the daily podcast from elDiario.es Un tema al día with Juanlu Sánchez.","tags":[],"title":"Un tema al día: ¿Solo o sólo?","type":"talk"},{"authors":[],"categories":null,"content":"","date":1674428400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1674428400,"objectID":"344da643cefe5b9adcc3f0135562b771","permalink":"/talk/noosfera/","publishdate":"2023-01-23T00:00:00+01:00","relpermalink":"/talk/noosfera/","section":"talk","summary":"Radio interview at Noosfera on Linguistics and Computational Linguistics","tags":[],"title":"Radio interview at Noosfera","type":"talk"},{"authors":null,"categories":null,"content":"","date":1659045600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1659045600,"objectID":"de5195bbb4d671b0e74fea90a0b3eafe","permalink":"/project/pylazaro/","publishdate":"2022-07-29T00:00:00+02:00","relpermalink":"/project/pylazaro/","section":"project","summary":"A Python library that automatically detects lexical borrowings (or loanwords) in Spanish","tags":["Demo"],"title":"pylazaro","type":"project"},{"authors":["Elena Álvarez Mellado","Constatine Lignos"],"categories":null,"content":"","date":1653429600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1653429600,"objectID":"2faa615609cd4fb98b2b3228c8459f17","permalink":"/publication/lrec2022/","publishdate":"2022-05-25T00:00:00+02:00","relpermalink":"/publication/lrec2022/","section":"publication","summary":"","tags":[],"title":"Borrowing or Codeswitching? Annotating for Finer-Grained Distinctions in Language Mixing","type":"publication"},{"authors":["Elena Álvarez Mellado","Constatine Lignos"],"categories":null,"content":"","date":1653343200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1653343200,"objectID":"5be1be70461546cfd6caa523a80b2509","permalink":"/publication/acl2022/","publishdate":"2022-05-24T00:00:00+02:00","relpermalink":"/publication/acl2022/","section":"publication","summary":"This work presents a new resource for borrowing identification and analyzes the performance and errors of several models on this task. We introduce a new annotated corpus of Spanish newswire rich in unassimilated lexical borrowings—words from one language that are introduced into another without orthographic adaptation—and use it to evaluate how several sequence labeling models (CRF, BiLSTM-CRF, and Transformer-based models) perform. The corpus contains 370,000 tokens and is larger, more borrowing-dense, OOV-rich, and topic-varied than previous corpora available for this task. Our results show that a BiLSTM-CRF model fed with subword embeddings along with either Transformerbased embeddings pretrained on codeswitched data or a combination of contextualized word embeddings outperforms results obtained by a multilingual BERT-based model.","tags":[],"title":"Detecting Unassimilated Borrowings in Spanish: An Annotated Corpus and Approaches to Modeling","type":"publication"},{"authors":null,"categories":null,"content":"","date":1640905200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1640905200,"objectID":"b1065c95b7fe1cdad6fad0e7e7376b38","permalink":"/project/coalas/","publishdate":"2021-12-31T00:00:00+01:00","relpermalink":"/project/coalas/","section":"project","summary":"COrpus of AngLicisms in the SpAnish PresS. With Constantine Lignos","tags":["Demo"],"title":"COALAS 🐨","type":"project"},{"authors":null,"categories":null,"content":"","date":1640905200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1640905200,"objectID":"b19a4b8696613acb90af3e43b6ad2d5e","permalink":"/project/observatoriolazaro/","publishdate":"2021-12-31T00:00:00+01:00","relpermalink":"/project/observatoriolazaro/","section":"project","summary":"An observatory of anglicism usage in the Spanish press.","tags":["Demo"],"title":"Observatorio Lázaro","type":"project"},{"authors":null,"categories":null,"content":"","date":1638226800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638226800,"objectID":"9f6452f501c1db3668b6638c15d561ec","permalink":"/project/lazarobot/","publishdate":"2021-11-30T00:00:00+01:00","relpermalink":"/project/lazarobot/","section":"project","summary":"A Twitter bot that tweets new anglicisms found in the Spanish press.","tags":["Demo"],"title":"@LazaroBot","type":"project"},{"authors":[],"categories":null,"content":"","date":1634940000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1634940000,"objectID":"766117193f50e862c9a2e561f8f93e99","permalink":"/talk/trabalengua2021/","publishdate":"2021-10-23T00:00:00+02:00","relpermalink":"/talk/trabalengua2021/","section":"talk","summary":"Cosas que aprendí mirando 200.000 anglicismos.","tags":[],"title":"Trabalengua 2021: Cosas que aprendí mirando 200.000 anglicismos","type":"talk"},{"authors":[],"categories":null,"content":"","date":1632348000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1632348000,"objectID":"58fe1989ae0954c54a4f84ff6b828b71","permalink":"/talk/unidiomasinfronteras/","publishdate":"2021-09-23T00:00:00+02:00","relpermalink":"/talk/unidiomasinfronteras/","section":"talk","summary":"Radio interview at Un idioma sin fronteras at the Spanish National Radio","tags":[],"title":"Radio interview at RNE","type":"talk"},{"authors":["Elena Álvarez Mellado","Luis Espinosa Anke","Julio Gonzalo Arroyo","Constatine Lignos","Jordi Porta Zamorano"],"categories":null,"content":"","date":1632088800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1632088800,"objectID":"c56fd508fb94a1499ce2d41d07ad007c","permalink":"/publication/iberlef2021/","publishdate":"2021-09-20T00:00:00+02:00","relpermalink":"/publication/iberlef2021/","section":"publication","summary":"This paper summarizes the main ﬁndings of the ADoBo 2021 shared task, proposed in the context of IberLef 2021. In this task, we invited participants to detect lexical borrowings (coming mostly from English) in Spanish newswire texts. This task was framed as a sequence classiﬁcation problem using BIO encoding. We provided participants with an annotated corpus of lexical borrowings which we split into training, development and test splits. We received submissions from 4 teams with 9 diﬀerent system runs overall. The results, which range from F1 scores of 37 to 85, suggest that this is a challenging task, especially when out-of-domain or OOV words are considered, and that traditional methods informed with lexicographic in-formation would beneﬁt from taking advantage of current NLP trends.","tags":[],"title":"Overview of ADoBo 2021: Automatic Detection of Unassimilated Borrowings in the Spanish Press","type":"publication"},{"authors":[],"categories":null,"content":"","date":1623535200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1623535200,"objectID":"d6f6f2b42852ca5dfb6a267c68620717","permalink":"/talk/lengwitch/","publishdate":"2021-06-13T00:00:00+02:00","relpermalink":"/talk/lengwitch/","section":"talk","summary":"Divulgación lingüística en la red (conversation on Twitch with Lengwitch)","tags":[],"title":"Twitch: Divulgación lingüística en la red","type":"talk"},{"authors":["Elena Álvarez-Mellado"],"categories":null,"content":"","date":1612911600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1612911600,"objectID":"88e4e87db40459974cb5a2f9c2b67ddc","permalink":"/publication/scil/","publishdate":"2021-02-10T00:00:00+01:00","relpermalink":"/publication/scil/","section":"publication","summary":"In this work, we present: (1) a corpus of European Spanish newswire annotated with anglicisms; (2) a sequence labeling model to extract English lexical borrowings (or anglicisms) from Spanish newswire; and (3) a tracking corpus of anglicism usage in the Spanish press.","tags":[],"title":"Extracting English Lexical Borrowings from Spanish Newswire","type":"publication"},{"authors":null,"categories":null,"content":"","date":1612134000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1612134000,"objectID":"f9c86d784fabd8d05837d11b9cfb417e","permalink":"/project/adobo/","publishdate":"2021-02-01T00:00:00+01:00","relpermalink":"/project/adobo/","section":"project","summary":"A shared task on automatic detection of borrowings at IberLEF 2021. Organized with Luis Espinosa Anke, Julio Gonzalo, Constantine Lignos and Jordi Porta.","tags":null,"title":"ADoBo","type":"project"},{"authors":[],"categories":null,"content":"","date":1604271600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1604271600,"objectID":"8b265dd93a8717df0051b57f29533fd2","permalink":"/talk/cope/","publishdate":"2020-11-02T00:00:00+01:00","relpermalink":"/talk/cope/","section":"talk","summary":"Interview for radio program La Tarde at COPE with Pilar Cisneros.","tags":[],"title":"Radio interview: La Tarde de la COPE","type":"talk"},{"authors":[],"categories":null,"content":"","date":1603231200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1603231200,"objectID":"5153c70f2eb1bc93c2ec54a6f0ed4ff2","permalink":"/talk/jelo/","publishdate":"2020-10-21T00:00:00+02:00","relpermalink":"/talk/jelo/","section":"talk","summary":"Observatorio Lázaro featured at Julia en la Onda radio program at Onda Cero.","tags":[],"title":"Radio interview: Julia en la Onda","type":"talk"},{"authors":[],"categories":null,"content":"","date":1603144800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1603144800,"objectID":"72999388c4c8b73e7f37e52154cdae50","permalink":"/talk/grijelmo/","publishdate":"2020-10-20T00:00:00+02:00","relpermalink":"/talk/grijelmo/","section":"talk","summary":"Article by Álex Grijelmo for El País about Observatorio Lázaro.","tags":[],"title":"El País: 20 anglicismos nuevos cada día","type":"talk"},{"authors":null,"categories":null,"content":"","date":1601503200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1601503200,"objectID":"06e993e050860041e5cfc6ebf9498261","permalink":"/project/caravaggio/","publishdate":"2020-10-01T00:00:00+02:00","relpermalink":"/project/caravaggio/","section":"project","summary":"A PyTorch model that classifies Spanish text as being easy to read (plain language) or not.","tags":["Demo"],"title":"Caravaggio","type":"project"},{"authors":null,"categories":null,"content":"","date":1601416800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1601416800,"objectID":"5f9bc1c615b1210836b7cb7ac02eb2e2","permalink":"/project/morssa/","publishdate":"2020-09-30T00:00:00+02:00","relpermalink":"/project/morssa/","section":"project","summary":"A scraper for extracting the text of news articles via RSS.","tags":null,"title":"Morssa","type":"project"},{"authors":[],"categories":null,"content":"","date":1599688800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1599688800,"objectID":"0c213c83454780e75560655639d02e75","permalink":"/talk/conlalenguafuera/","publishdate":"2020-09-10T00:00:00+02:00","relpermalink":"/talk/conlalenguafuera/","section":"talk","summary":"Interview for linguistic podcast Con la lengua fuera con Macarena Gil and Nerea Fernández de Gobeo.","tags":[],"title":"Podcast: Interview at Con la lengua fuera","type":"talk"},{"authors":["Elena Álvarez-Mellado"],"categories":null,"content":"","date":1590962400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1590962400,"objectID":"e558c014fcfe9598914db811026633f2","permalink":"/publication/iberlef/","publishdate":"2020-06-01T00:00:00+02:00","relpermalink":"/publication/iberlef/","section":"publication","summary":"This paper documents two sequence-labeling models for NER in Spanish: a conditional random field model with handcrafted features and a BiLSTM-CRF model with word and character embeddings. Both models were trained and tested using CAPITEL (an annotated corpus of newswire written in European Spanish) and were submitted to the shared task on Spanish NER at IberLEF 2020. The best result was obtained by the CRF model, which produced an F1 score of 84.39 on the test set and was ranked #6 on the shared task.","tags":[],"title":"Two models for Named Entity Recognition in Spanish: submission for the CAPITEL Shared Task at IberLEF 2020","type":"publication"},{"authors":["Elena Álvarez-Mellado. Advisor: Constantine Lignos"],"categories":null,"content":"","date":1589493600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1589493600,"objectID":"1e523bb1019aa408f6ace45239e6c188","permalink":"/publication/lazaro/","publishdate":"2020-05-15T00:00:00+02:00","relpermalink":"/publication/lazaro/","section":"publication","summary":"The use of lexical borrowings from English (often called anglicisms) in the Spanish press evokes great interest, both in the Hispanic linguistics community and among the general public. Anglicism usage in Spanish language has been previously studied within the field of corpus linguistics. Prior work has traditionally relied on manual inspection of corpora, with the limitations that implies. This thesis proposes a model for automatic extraction of unadapted anglicisms in Spanish newswire. This thesis introduces: (1) an annotated corpus of 21,570 newspaper headlines (325,665 tokens) written in European Spanish annotated with unadapted anglicisms and (2) two sequence-labeling models to perform automatic extraction of unadapted anglicisms: a conditional random field model with handcrafted features and a BiLSTM-CRF model with word and character embeddings. The best results are obtained by the CRF model, with an F1 score of 89.60 on the development set and 87.82 on the test set. Finally, a practical application of the CRF model is presented: an automatic pipeline that performs daily extraction of anglicisms from the main national newspapers of Spain.","tags":[],"title":"Lázaro: An Extractor of Emergent Anglicisms in Spanish Newswire","type":"publication"},{"authors":["Elena Álvarez-Mellado"],"categories":null,"content":"","date":1583103600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1583103600,"objectID":"7ae7fb904f1cc88af68516172808592a","permalink":"/publication/anglicisms/","publishdate":"2020-03-02T00:00:00+01:00","relpermalink":"/publication/anglicisms/","section":"publication","summary":"The extraction of anglicisms (lexical borrowings from English) is relevant both for lexicographic purposes and for NLP downstream tasks. We introduce a corpus of European Spanish newspaper headlines annotated with anglicisms and a baseline model for anglicism extraction. In this paper we present: (1) a corpus of 21,570 newspaper headlines written in European Spanish annotated with emergent anglicisms and (2) a conditional random field baseline model with handcrafted features for anglicism extraction. We present the newspaper headlines corpus, describe the annotation tagset and guidelines and introduce a CRF model that can serve as baseline for the task of detecting anglicisms. The presented work is a first step towards the creation of an anglicism extractor for Spanish newswire.","tags":[],"title":"An Annotated Corpus of Emerging Anglicisms in Spanish Newspaper Headlines","type":"publication"},{"authors":["Elena Álvarez-Mellado"],"categories":null,"content":"","date":1580598000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1580598000,"objectID":"1150d3ba333f1cea3379e2157f806787","permalink":"/publication/political/","publishdate":"2020-02-02T00:00:00+01:00","relpermalink":"/publication/political/","section":"publication","summary":"This paper documents a corpus of political speeches in Spanish. The documents in the corpus belong to the Christmas speeches that have been delivered yearly by the head of state of Spain since 1937. The historical period covered by these speeches ranges from the Spanish Civil War and the Francoist dictatorship up until today. As a result, the corpus reflects some of the most significant events and political changes in the recent history of Spain. Up until now, the speeches as a whole had not been collected into a single, systematic and reusable resource, as most of the texts were scattered among different sources. The paper describes: (1) the composition of the corpus; (2) the Python interface that facilitates querying and analyzing the corpus using the NLTK and spaCy libraries and (3) a set of HTML visualizations aimed at the general public to navigate the corpus and explore differences between TF-IDF frequencies.","tags":[],"title":"A corpus of Spanish political speeches from 1937 to 2019","type":"publication"},{"authors":[],"categories":null,"content":"","date":1577142000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1577142000,"objectID":"1830eff1f4b567b168aa1015a0d0166d","permalink":"/talk/franco/","publishdate":"2019-12-24T00:00:00+01:00","relpermalink":"/talk/franco/","section":"talk","summary":"Project of visualization of Spanish political speeches featured at elDiario.es.","tags":[],"title":"elDiario.es: Así han evolucionado los discursos de Navidad","type":"talk"},{"authors":null,"categories":null,"content":"","date":1576796400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1576796400,"objectID":"4e34b6cb5ef23df8bf0194c9563e1a49","permalink":"/project/orgulloysatisfa/","publishdate":"2019-12-20T00:00:00+01:00","relpermalink":"/project/orgulloysatisfa/","section":"project","summary":"Analysis and visualizations in Python of a corpus of Spanish political speeches from 1937 to 2019.","tags":["Demo"],"title":"Corpus of political speeches","type":"project"},{"authors":null,"categories":null,"content":"","date":1576796400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1576796400,"objectID":"bc7eadbab5f5f97e13945177ebd6267a","permalink":"/project/ner4podcasts/","publishdate":"2019-12-20T00:00:00+01:00","relpermalink":"/project/ner4podcasts/","section":"project","summary":"Named Entity Recognition for podcast transcripts. With Julian Fernandez, Kristen Sheets and Linxuan Yang.","tags":["Demo"],"title":"NER4Podcasts","type":"project"},{"authors":["Elena Álvarez-Mellado","Eben Holderness","Nicholas Miller","Fyonn Dhang","Philip Cawkwell","Kirsten Bolton","James Pustejovsky","Mei-Hua Hall"],"categories":null,"content":"","date":1565388000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565388000,"objectID":"1135ca95f94aa8dd5d050eb390c91e83","permalink":"/publication/louhi/","publishdate":"2019-08-10T00:00:00+02:00","relpermalink":"/publication/louhi/","section":"publication","summary":"Predicting which patients are more likely to be readmitted to a hospital within 30 days after discharge is a valuable piece of information in clinical decision-making. Building a successful readmission risk classifier based on the content of Electronic Health Records (EHRs) has proved, however, to be a challenging task. Previously explored features include mainly structured information, such as sociodemographic data, comorbidity codes and physiological variables. In this paper we assess incorporating additional clinically interpretable NLP-based features such as topic extraction and clinical sentiment analysis to predict early readmission risk in psychiatry patients.","tags":[],"title":"Assessing the Efficacy of Clinical Sentiment Analysis and Topic Extraction in Psychiatric Readmission Risk Prediction","type":"publication"},{"authors":null,"categories":null,"content":"","date":1559253600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1559253600,"objectID":"a518097d90871df0af4d168cc8912a47","permalink":"/project/figurativelanguage/","publishdate":"2019-05-31T00:00:00+02:00","relpermalink":"/project/figurativelanguage/","section":"project","summary":"A project on annotation and classification of non literal tweets. With Qingwen Ye and Julia Cathcart.","tags":null,"title":"Figurative language classification","type":"project"},{"authors":["Elena Álvarez-Mellado","Leticia Martín-Fuertes Moreno"],"categories":null,"content":"","date":1542668400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1542668400,"objectID":"13ded017eb042ae47bfb24fcdb77e857","permalink":"/publication/aracne/","publishdate":"2018-11-20T00:00:00+01:00","relpermalink":"/publication/aracne/","section":"publication","summary":"Aracne es un proyecto de lingüística de corpus que tiene como propósito medir cuantitativamente cómo ha evolucionado el lenguaje de la prensa española entre 1914 y 2014, con especial atención a los rasgos de riqueza léxica. El proyecto ha consistido en la creación de un corpus de prensa de dos millones de palabras, confeccionado a partir de noticias extraídas de las hemerotecas de cuatro periódicos centenarios (El Norte de Castilla, El Diario de Mallorca, El Heraldo de Aragón y La Vanguardia). A partir de este corpus, se han obtenido distintas medidas de riqueza por décadas (variación, densidad léxica y complejidad de los artículos) y se han comparado los valores obtenidos en las distintas épocas. Los resultados muestran que la riqueza y la complejidad de los textos periodísticos se han mantenido notablemente estables en los últimos cien años.","tags":[],"title":"Aracne: estudio de la variación lingüística en la prensa española entre 1914 y 2014 (Aracne: a study of language change in the Spanish press between 1914 and 2014)","type":"publication"},{"authors":[],"categories":null,"content":"   ","date":1516921200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1516921200,"objectID":"3ed8ba2828c135c15c31bc773b1f2093","permalink":"/talk/valladolid/","publishdate":"2018-01-26T00:00:00+01:00","relpermalink":"/talk/valladolid/","section":"talk","summary":"Acceptance speech for the [Miguel Delibes National Journalism Award](https://es.wikipedia.org/wiki/Premio_Nacional_de_Periodismo_Miguel_Delibes) 2017. The Miguel Delibes National Award of Journalism is awarded by Valladolid Press Association (Asociación de Prensa de Valladolid) to one person each year for newspaper articles on language and linguistics.","tags":[],"title":"Premio Nacional de Periodismo Miguel Delibes: acceptance speech","type":"talk"},{"authors":[],"categories":null,"content":"","date":1516662000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1516662000,"objectID":"205e6c4ce308a9070ea236af6c1ca25a","permalink":"/talk/uned/","publishdate":"2018-01-23T00:00:00+01:00","relpermalink":"/talk/uned/","section":"talk","summary":"Interview for UNED University.","tags":[],"title":"Interview for UNED","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1513551600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1513551600,"objectID":"80105bfc678f7452c917e3724cc6a125","permalink":"/talk/apv/","publishdate":"2017-12-18T00:00:00+01:00","relpermalink":"/talk/apv/","section":"talk","summary":"Media clip and interview for the Miguel Delibes Journalism Award awarded by the Valladolid Press Association. Includes interview with  eldiario.es newspaper director Ignacio Escolar.","tags":[],"title":"Interview for Asociación de Prensa de Valladolid","type":"talk"},{"authors":null,"categories":null,"content":"","date":1485903600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485903600,"objectID":"546ae2fa78edb6fdf7c294340639a8d7","permalink":"/project/subtitles/","publishdate":"2017-02-01T00:00:00+01:00","relpermalink":"/project/subtitles/","section":"project","summary":"A corpus of Spanish subtitles from LOTR, Star Wars, OITNB, GoT, HIMYM, etc.","tags":["Demo"],"title":"Subtitles Corpus","type":"project"},{"authors":[],"categories":null,"content":"   ","date":1473026400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1473026400,"objectID":"a78ec067a145e65fa04dfede3d0e4f89","permalink":"/talk/yorokobu/","publishdate":"2016-09-05T00:00:00+02:00","relpermalink":"/talk/yorokobu/","section":"talk","summary":"Interview for Yorokobu magazine by Mar Abad","tags":[],"title":"Interview for Yorokobu magazine by Mar Abad","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1453590000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1453590000,"objectID":"8c90b75b9ef259bbc9ee94e2854a8009","permalink":"/talk/aracne/","publishdate":"2016-01-24T00:00:00+01:00","relpermalink":"/talk/aracne/","section":"talk","summary":"Aracne is a corpus linguistics project developed by Fundeu on the evolution of the Spanish language on the media during the 20th century. Project report, visualization and conclusions at: http://fundeu.es/aracne/.","tags":[],"title":"Proyecto Aracne","type":"talk"},{"authors":null,"categories":null,"content":"","date":1451516400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1451516400,"objectID":"2d070a6937adb7cd87e2d90b27c4a534","permalink":"/project/aracne/","publishdate":"2015-12-31T00:00:00+01:00","relpermalink":"/project/aracne/","section":"project","summary":"A corpus linguistics project supported by Fundeu on the evolution of the Spanish language on the media during the 20th century. With Leticia Martín-Fuertes and Molino de Ideas.","tags":["Demo"],"title":"Aracne","type":"project"},{"authors":[],"categories":null,"content":"   ","date":1445119200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1445119200,"objectID":"87907585ecce9615c7193acc4fc624ee","permalink":"/talk/desayuno/","publishdate":"2015-10-18T00:00:00+02:00","relpermalink":"/talk/desayuno/","section":"talk","summary":"Introducing the projects on Computational Linguistics and Digital Humanities developed by Molino de Ideas.","tags":[],"title":"Desayuno con Molino de Ideas","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1433368800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1433368800,"objectID":"61961deccfa59783607744f39b4a7a32","permalink":"/talk/apidays/","publishdate":"2015-06-04T00:00:00+02:00","relpermalink":"/talk/apidays/","section":"talk","summary":"   ","tags":[],"title":"Introducing the Natural Language Processing track","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1416265200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1416265200,"objectID":"d7893336d015eb89831caf0691493c70","permalink":"/talk/papis/","publishdate":"2014-11-18T00:00:00+01:00","relpermalink":"/talk/papis/","section":"talk","summary":"Human sentiments are broad and diverse. Products, brands and people can make us feel a wide range of emotions: happiness, anger, disappointment… Users and consumers express these feelings and opinions on Twitter, Facebook, blogs and comments, within the reach of companies and organizations. Understanding language is the key to learn what the community thinks about a particular product and to make predictions about it. Mastering language, however, is tricky: the diversity of vocabulary, the differences between regions and the use of irony and metaphors makes of sentiment analysis a complex and fascinating task.","tags":[],"title":"Overcoming challenges in understanding text automatically","type":"talk"},{"authors":[],"categories":null,"content":"   ","date":1387321200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1387321200,"objectID":"d77b608ff92ffa4dea2c2ce92b5228b5","permalink":"/talk/zaragoza/","publishdate":"2013-12-18T00:00:00+01:00","relpermalink":"/talk/zaragoza/","section":"talk","summary":"Introductory seminar on language technology for undergrads in Linguistics at Universidad de Zaragoza.","tags":[],"title":"Todo lo que un filólogo no sabe que sabe hacer","type":"talk"},{"authors":["Elena Álvarez-Mellado"],"categories":null,"content":"","date":1257807600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1257807600,"objectID":"2c7c8049736f3907ea55798e072015a0","permalink":"/publication/azrael/","publishdate":"2009-11-10T00:00:00+01:00","relpermalink":"/publication/azrael/","section":"publication","summary":"AZRAEL is a rule-based automatic language detector based on the syllable structure coded in Prolog. The first version only detected Spanish language and was released in 2009. Since then, French, Italian, Portuguese, Catalan, Latin and Basque language have been added to the project. The doc folder contains the report and poster for the 2009 version of AZRAEL. This project received in 2009 the First Award at the VIII Arquimedes Contest for Young Researchers awarded by the Spanish Ministry of Science and Education.","tags":[],"title":"AZRAEL: A-Z Reconocedor Automático del Español (Automatic Language Detector of Spanish)","type":"publication"},{"authors":null,"categories":null,"content":"","date":1251756000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1251756000,"objectID":"8fa145813ec0398351935a202b20c998","permalink":"/project/azrael/","publishdate":"2009-09-01T00:00:00+02:00","relpermalink":"/project/azrael/","section":"project","summary":"A rule-based automatic language detector based on the syllable structure of words. Current supported languages: Spanish, French, Italian, Portuguese, Catalan, Latin and Basque.","tags":["Demo"],"title":"AZRAEL","type":"project"}]