Skip to content
Snippets Groups Projects
Commit 2dbb899a authored by Marcus Baumgarten's avatar Marcus Baumgarten
Browse files

Neue Datei hochladen

parent 715e1270
No related branches found
No related tags found
No related merge requests found
# This module contains functions for resolve abbreviations according to the rules presented in:
# http://wiki-de.genealogy.net/Kartei_Leipziger_Familien
import collections
import re
from functools import lru_cache
@lru_cache(maxsize=None)
def resolveroccu(occu):
"""
This function is used for the actual resolving of the abbreviation. It checks if the given occupational designation
matches any of the abbreviation rules and resolves them accordingly.
:param name: the input name
:return: the resolved abbreviation
"""
# abbreviation dictionary
abbreviationdict = collections.OrderedDict({
"- u.": "$",
# extension of the specifications of the KLF; Problem: problem: e.g. red and tan tanners, "$" to recognise that it is not a separate profession
"B.": "&Bürger", # "&" to be able to distinguish legal status from profession
"Bg.": "Bürger", # extension of the specifications of the KLF
"Bgmstr.": "Bürgermeister", # extension of the specifications of the KLF
"Br.": "Brauer",
"Brbr.": "Branntweinbrenner",
"E.": "Einwohner", # "&" to be able to distinguish legal status from profession
"Fl.": "Fleischer",
"Gl.": "Glaser",
"GuS.": "Gold- und Silberdrahtzieher",
"h.": "händler", # ending of word
"Kr.": "Kramer",
"L.": "Leinenweber",
"m.": "macher", # ending of word
"Z.": "Zimmermann",
"Zg.": "Zimmergeselle",
"u. ": "", # "u." stands for "und" (and) and have to deleted, extension of the specifications of the KLF
"-": " ",
# extension of the specifications of the KLF; occupations are partly separated with a hyphen and not with a "u."
",": "",
# extension of the specifications of the KLF; if there are more than two professions, there is a comma as a separator
"zu ": "@"
# extension of the specifications of the KLF; place indication is not a profession, "@" to recognise place indication
})
# patterns
# not all the rules are implemented yet, mainly the ones found in the sample
bracketpattern = re.compile(r"[(].*[)]")
occupattern = re.compile(r"[A-Za-z]+[.]")
# pattern.search was used to check if a pattern matches since pattern.match only checks if the string is beginning
# with the pattern
if occupattern.search(occu):
for abbreviation, resolve in abbreviationdict.items():
occu = occu.replace(abbreviation, resolve)
# get rid of brackets
if bracketpattern.search(occu):
occu = occu.replace("(", "")
occu = occu.replace(")", "")
return occu
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment