Skip to content
Snippets Groups Projects
Commit 368563f6 authored by Marcus Baumgarten's avatar Marcus Baumgarten
Browse files

Neue Datei hochladen

parents
No related branches found
No related tags found
No related merge requests found
# This module contains functions for resolve abbreviations according to the rules presented in:
# http://wiki-de.genealogy.net/Kartei_Leipziger_Familien
import collections
import re
from functools import lru_cache
@lru_cache(maxsize=None)
def resolvename(name):
"""
This function is used for the actual resolving of the abbreviation. It checks if the given name matches any of the
abbreviation rules and resolves them accordingly.
:param name: the input name
:return: the resolved abbreviation
"""
# abbreviation dictionary
# the wiki states that J can stand for either Johann or Hans, since Johann is treated below it is resolved as Hans
# Matth. could either be Matthias or Matthäus
abbreviationdict = collections.OrderedDict({
"Ad.": "Adolf",
"Alfr.": "Alfred",
"Alb.": "Albrecht",
"A.": "Anna",
"B.": "Barbara",
"C.": "Catharina",
"Cl.": "Clara",
"d. J.": "der Jüngere",
"Dan.": "Daniel",
"Dietr.": "Dietrich",
"E.": "Elisabeth",
"Ed.": "Eduard",
"El.": "Eleonore",
"Em.": "Emmanuel",
"F.": "Friedrich",
"Fd.": "Ferdinand",
"Flor.": "Florentine",
"Fr.": "Friederike",
"Franz.": "Franziska",
"Fz.": "Franz",
"G.": "Georg",
"Gf.": "Gottfried",
"Gi.": "Gottlieb",
"Go.": "Gottlob",
"Greg.": "Gregor",
"Gust.": "Gustav",
"H.": "Heinrich",
"Hen.": "Henriette",
"Herm.": "Hermann",
"Hier.": "Hieronymus",
"J.": "Hans",
"Joach.": "Joachim",
"Jul.": "Juliane",
"K.": "Karl",
"L.": "Luise",
"Leb.": "Leberecht",
"Leop.": "Leopold",
"Lud.": "Ludwig",
"M.": "Maria",
"Ma.": "Martha",
"Matth.": "Matthias",
"Math.": "Mathilde",
"Md.": "Magdalena",
"Mg.": "Margaretha",
"Mich.": "Michael",
"Mz.": "Moritz",
"Nik.": "Nikolaus",
"Osk.": "Oskar",
"Osw.": "Oswald",
"Ott.": "Ottilie",
"P.": "Paul",
"Pa.": "Pauline",
"Phil.": "Philipp",
"Reg.": "Regina",
"Rich.": "Richard",
"Rob": "Robert",
"Ros.": "Rosina",
"Rud.": "Rudolf",
"S.": "Sophia",
"Sab.": "Sabina",
"Sam.": "Samuel",
"Seb.": "Sebastian",
"Sib.": "Sibylle",
"Sid.": "Sidonia",
"Sieg.": "Siegmund",
"Sus.": "Susanne",
"Ther.": "Theresia",
"Tob.": "Tobias",
"U.": "Ursula",
"W.": "Wilhelm",
"We.": "Wilhelmine",
"Wilh.": "Wilhelm",
"Wolfg.": "Wolfgang"
})
# patterns
# not all the rules are implemented yet, mainly the ones found in the sample
femalepattern = re.compile(r"[A-Za-z\s.]+[ae]$")
bracketpattern = re.compile(r"[(].*[)]")
christianpattern = re.compile(r"[X][ae]*")
christpattern = re.compile(r"[X][ophian]{3}")
namespattern = re.compile(r"[A-Za-z]+[.]")
# pattern.search was used to check if a pattern matches since pattern.match only checks if the string is beginning
# with the pattern
# resolve a spelling error
if name == "Joh. Christia":
name = "Johann Christian"
# resolve all names that are not abbreviated equal for both genders
if namespattern.search(name):
for abbreviation, resolve in abbreviationdict.items():
name = name.replace(abbreviation, resolve)
# resolve all the Chis
if christpattern.search(name):
name = name.replace("X", "Christ")
if christianpattern.search(name):
name = name.replace("X", "Christian")
# resolve male names
if namespattern.search(name) and not femalepattern.search(name):
name = name.replace("Aug.", "August")
name = name.replace("Ant.", "Anton")
name = name.replace("Fried.", "Friedrich")
name = name.replace("Joh.", "Johann")
# resolve female names
if namespattern.search(name) and femalepattern.search(name):
name = name.replace("Ant.", "Antonie")
name = name.replace("Aug.", "Auguste")
name = name.replace("Fried.", "Friederike")
name = name.replace("Joh.", "Johanna")
# get rid of brackets
if bracketpattern.search(name):
name = name.replace("(", "")
name = name.replace(")", "")
return name
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment