Neue Datei hochladen

368563f6 · Marcus Baumgarten · 368563f6
Commit 368563f6 authored 2 years ago by Marcus Baumgarten
--- a/klfnameresolver.py
+++ b/klfnameresolver.py
+# This module contains functions for resolve abbreviations according to the rules presented in:
+# http://wiki-de.genealogy.net/Kartei_Leipziger_Familien
+import collections
+import re
+from functools import lru_cache
+@lru_cache(maxsize=None)
+def resolvename(name):
+    """
+    This function is used for the actual resolving of the abbreviation. It checks if the given name matches any of the
+    abbreviation rules and resolves them accordingly.
+    :param name: the input name
+    :return: the resolved abbreviation
+    """
+    # abbreviation dictionary
+    # the wiki states that J can stand for either Johann or Hans, since Johann is treated below it is resolved as Hans
+    # Matth. could either be Matthias or Matthäus
+    abbreviationdict = collections.OrderedDict({
+        "Ad.": "Adolf",
+        "Alfr.": "Alfred",
+        "Alb.": "Albrecht",
+        "A.": "Anna",
+        "B.": "Barbara",
+        "C.": "Catharina",
+        "Cl.": "Clara",
+        "d. J.": "der Jüngere",
+        "Dan.": "Daniel",
+        "Dietr.": "Dietrich",
+        "E.": "Elisabeth",
+        "Ed.": "Eduard",
+        "El.": "Eleonore",
+        "Em.": "Emmanuel",
+        "F.": "Friedrich",
+        "Fd.": "Ferdinand",
+        "Flor.": "Florentine",
+        "Fr.": "Friederike",
+        "Franz.": "Franziska",
+        "Fz.": "Franz",
+        "G.": "Georg",
+        "Gf.": "Gottfried",
+        "Gi.": "Gottlieb",
+        "Go.": "Gottlob",
+        "Greg.": "Gregor",
+        "Gust.": "Gustav",
+        "H.": "Heinrich",
+        "Hen.": "Henriette",
+        "Herm.": "Hermann",
+        "Hier.": "Hieronymus",
+        "J.": "Hans",
+        "Joach.": "Joachim",
+        "Jul.": "Juliane",
+        "K.": "Karl",
+        "L.": "Luise",
+        "Leb.": "Leberecht",
+        "Leop.": "Leopold",
+        "Lud.": "Ludwig",
+        "M.": "Maria",
+        "Ma.": "Martha",
+        "Matth.": "Matthias",
+        "Math.": "Mathilde",
+        "Md.": "Magdalena",
+        "Mg.": "Margaretha",
+        "Mich.": "Michael",
+        "Mz.": "Moritz",
+        "Nik.": "Nikolaus",
+        "Osk.": "Oskar",
+        "Osw.": "Oswald",
+        "Ott.": "Ottilie",
+        "P.": "Paul",
+        "Pa.": "Pauline",
+        "Phil.": "Philipp",
+        "Reg.": "Regina",
+        "Rich.": "Richard",
+        "Rob": "Robert",
+        "Ros.": "Rosina",
+        "Rud.": "Rudolf",
+        "S.": "Sophia",
+        "Sab.": "Sabina",
+        "Sam.": "Samuel",
+        "Seb.": "Sebastian",
+        "Sib.": "Sibylle",
+        "Sid.": "Sidonia",
+        "Sieg.": "Siegmund",
+        "Sus.": "Susanne",
+        "Ther.": "Theresia",
+        "Tob.": "Tobias",
+        "U.": "Ursula",
+        "W.": "Wilhelm",
+        "We.": "Wilhelmine",
+        "Wilh.": "Wilhelm",
+        "Wolfg.": "Wolfgang"
+    })
+    # patterns
+    # not all the rules are implemented yet, mainly the ones found in the sample
+    femalepattern = re.compile(r"[A-Za-z\s.]+[ae]$")
+    bracketpattern = re.compile(r"[(].*[)]")
+    christianpattern = re.compile(r"[X][ae]*")
+    christpattern = re.compile(r"[X][ophian]{3}")
+    namespattern = re.compile(r"[A-Za-z]+[.]")
+    # pattern.search was used to check if a pattern matches since pattern.match only checks if the string is beginning
+    # with the pattern
+    # resolve a spelling error
+    if name == "Joh. Christia":
+        name = "Johann Christian"
+    # resolve all names that are not abbreviated equal for both genders
+    if namespattern.search(name):
+        for abbreviation, resolve in abbreviationdict.items():
+            name = name.replace(abbreviation, resolve)
+    # resolve all the Chis
+    if christpattern.search(name):
+        name = name.replace("X", "Christ")
+    if christianpattern.search(name):
+        name = name.replace("X", "Christian")
+    # resolve male names
+    if namespattern.search(name) and not femalepattern.search(name):
+        name = name.replace("Aug.", "August")
+        name = name.replace("Ant.", "Anton")
+        name = name.replace("Fried.", "Friedrich")
+        name = name.replace("Joh.", "Johann")
+    # resolve female names
+    if namespattern.search(name) and femalepattern.search(name):
+        name = name.replace("Ant.", "Antonie")
+        name = name.replace("Aug.", "Auguste")
+        name = name.replace("Fried.", "Friederike")
+        name = name.replace("Joh.", "Johanna")
+    # get rid of brackets
+    if bracketpattern.search(name):
+        name = name.replace("(", "")
+        name = name.replace(")", "")
+    return name