Skip to content
Snippets Groups Projects
Commit 43edfb81 authored by Marcus Baumgarten's avatar Marcus Baumgarten
Browse files

Neue Datei hochladen

parent e3e8b6b3
No related branches found
No related tags found
No related merge requests found
import julian
import placefinder
import time
def provincesURI(time):
"""
This function defines the URIs of different regions at different times.
:param time: year to which an administrative assignment should be made
:return: dictionary of GOV object URIs and the textual description of the respective province
"""
# for times before 1872
if time <= 1871:
return ({
"object_190122": "A 01 Provinz Holstein",
# Problem in the GOV: The places are not linked to the historical offices, so the province is never found
"adm_131053": "A 02 Provinz Lauenburg",
"object_1081716": "A 03 Provinz Brandenburg (ohne Berlin)",
# if Berlin is meant, it will be recorded before
"object_190330": "A 04 Provinz Hessen-Nassau",
"object_268785": "A 05 Provinz Hohenzollern",
"object_284443": "A 05 Provinz Hohenzollern",
# Hohenzollern-Sigmaringen goes 1850 to Hohenzollerschen Landen
"adm_368500": "A 06 Provinz Ostpreußen",
"adm_368480": "A 07 Provinz Pommern",
"object_211667": "A 08 Provinz Posen",
"object_279654": "A 09 Provinz Sachsen",
"adm_368470": "A 10 Provinz Schlesien",
"object_190325": "A 11 Provinz Westfalen",
"object_213750": "A 12 Provinz Westpreußen",
"object_1047283": "A 13 Rheinprovinz", # Provinz Jülich-Kleve-Berg until 1822
"object_405464": "A 13 Rheinprovinz", # Provinz Großherzogtum Niederrhein until 1822
"object_190337": "A 13 Rheinprovinz",
"BERLINJO62PM": "A 14 Provinz Berlin",
"object_257607": "B 01 Amt Bergedorf",
"adm_369040": "B 02 Hansestadt Bremen",
"adm_369020": "B 03 Stadt Hamburg",
"LUBECKJO53IU": "B 04 Stadt Lübeck",
"adm_136412": "B 05 Stadt Frankfurt am Main",
"object_217406": "B 06 Fürstentum Lippe-Detmold",
"object_217818": "B 07 Fürstentum Schaumburg-Lippe",
"object_218152": "B 08 Fürstentum Waldeck-Pyrmont",
"object_352387": "B 09 Großherzogtum Oldenburg",
"object_217952": "B 10 Großherzogtum Baden",
"object_218147": "B 11 Hessen",
"object_217750": "B 12 Großherzogtum Mecklenburg-Schwerin",
"object_217749": "B 13 Großherzogtum Mecklenburg-Strelitz (einschließlich des Fürstentums Ratzeburg)",
"object_190873": "B 14 Herzogtum Anhalt",
"object_217954": "B 15 Herzogtum Braunschweig",
"object_218153": "B 16 Herzogtum Nassau",
"object_190098": "B 17 Herzogtum Schleswig",
"object_190729": "B 18 Königreich Württemberg",
"object_217953": "B 19 Königreich Bayern",
"object_190327": "B 20 Königreich Hannover",
"object_218149": "B 21 Königreich Sachsen",
"object_275299": "B 22 Kurfürstentum Hessen", # here equated with Kurhessen
"object_284442": "B 23 Landgrafschaft Hessen-Homburg",
"": "B 24 Thüringische Staaten", # is divided into many sub-states as follows
"object_218143": "B 24 Thüringische Staaten", # Sachsen-Weimar-Eisenach
"object_284441": "B 24 Thüringische Staaten", # Reuß Jüngere Linie
"object_218134": "B 24 Thüringische Staaten", # Reuß Ältere Linie
"object_218137": "B 24 Thüringische Staaten", # Sachsen-Altenburg
"object_218138": "B 24 Thüringische Staaten", # Sachsen-Coburg-Gotha
"object_265487": "B 24 Thüringische Staaten", # Sachsen Gotha
"object_218142": "B 24 Thüringische Staaten", # Sachsen-Meiningen
"object_218150": "B 24 Thüringische Staaten", # Schwarzburg-Rudolstadt
"object_218151": "B 24 Thüringische Staaten", # Schwarzburg-Sondershausen
"object_218141": "B 24 Thüringische Staaten" # Sachsen-Hildburghausen, has no subordinate objects
})
# for times after 1989
elif time >= 1990:
return ({
"BERLINJO62PM": "Land Berlin",
"object_218149": "Freistaat Sachsen",
"adm_369080": "Land Baden-Württemberg",
"adm_369090": "Freistaat Bayern",
"adm_369120": "Land Brandenburg",
"adm_369040": "Freie Hansestadt Bremen",
"object_1259992": "Freie und Hansestadt Hamburg",
"adm_369060": "Land Hessen",
"adm_369130": "Land Mecklenburg-Vorpommern",
"adm_369030": "Land Niedersachsen",
"adm_369050": "Land Nordrhein-Westfalen",
"adm_369070": "Land Rheinland-Pfalz",
"adm_369100": "Saarland",
"adm_369150": "Land Sachsen-Anhalt",
"adm_369010": "Land Schleswig-Holstein",
"adm_369160": "Freistaat Thüringen"
})
def provinceFinder(govid, referenceYear, client):
"""
This function determines the historical-administrative affiliation to an object at a given time.
:param govid: GOV identifier (string)
:param referenceYear: year to which an administrative assignment should be made
:param client: connection to the GOV-Webservice
:return: province or "None"
"""
# if this variable is 1, the program will be stopped for one second in case of internet connection failures
# this prevents an abort of the program due to internet problems, but leads to a longer runtime
withSleeping = 0
# definition of prohibited object types
bannedObjectTypes = placefinder.bannedObjects()
# assignment of objects to be found and historical-administrative units
provinces = provincesURI(referenceYear)
# if GOV identifier empty, then return None
if govid == "":
return ("None")
govidBefore = 0 # Initialisierung
# following loop jumps one level up in the membership tree per iteration
# number of 10 is currently chosen arbitrarily, in the hope that no tree has more levels
for ab in range(0, 10):
# here, possible superordinate objects are included, which are appropriate in time
govidsList = [] # list A, priority
# List B (non priority) is required if no object fits so well that it is included in List A.
nonPrioGovidsList = [] # list B, non priority
# termination condition: if the same object is examined twice in a row, then abort
# query is used to improve the runtime, so that the same object is not searched max. 10 times
if govid == govidBefore:
print("Error: Object can no longer take a meaningful step (GOV-ID, GOV-ID before):", govid, govidBefore)
break
# since "govid" changes, the previous one must be cached
govidBefore = govid
# check if the object already matches a province
try: # if yes, then there is no KeyError
province = provinces[govid]
return (province)
except KeyError:
# information about the object is obtained from the web service (a dictionary that is composed of dictionary)
if withSleeping == 1:
for run in range(1000):
try:
govidInfo = callWebservice(govid, client)
except: # if the connection is just gone the program should not crash
time.sleep(1)
print("Status: Sleeping for 1 s.")
if run == 999:
print("Status: Connection error")
else:
govidInfo = callWebservice(govid, client)
# from this the entry "part-of" is required
govidInfoSuperior = govidInfo['part-of']
# if "part-of" is empty, then the info is in "located-in" if necessary
if len(govidInfoSuperior) == 0:
govidInfoSuperior = govidInfo["located-in"]
# every superior object is now searched
# The date can be in three places: 1. in timespan (), in begin-year, end-year, 3. in year
for superior in range(len(govidInfoSuperior)):
# if timespan available
# if timespan is not None, use the years from it
if govidInfoSuperior[superior]["timespan"] is not None:
yearBegin = begincalculator(govidInfoSuperior[superior])
yearEnd = endcalculator(govidInfoSuperior[superior])
# check if the timespan matches the searched time
# if yes a list is extended
if yearBegin <= referenceYear and yearEnd >= referenceYear:
govid = govidInfoSuperior[superior]["ref"]
if callWebservice(govid, client)["type"][0]["value"] not in bannedObjectTypes:
govidsList.append(govid)
else:
if callWebservice(govidInfoSuperior[superior]["ref"], client)["type"][0][
"value"] not in bannedObjectTypes:
nonPrioGovidsList.append(govidInfoSuperior[superior]["ref"])
# if timespan not available
else:
try:
# begin is determined
if govidInfoSuperior[superior]["begin-year"] is None:
yearBegin = 1 # sets begin to year 1
else:
yearBegin = govidInfoSuperior[superior]["begin-year"]
# end is determined
if govidInfoSuperior[superior]["end-year"] is None:
yearEnd = 9999 # set end to year 9999
else:
yearEnd = govidInfoSuperior[superior]["end-year"]
# if an object has an assumed time (start 1, end 9999), then always list B (problem otherwise e.g. with KIRORFJO40NS, adm_137138)
if yearBegin == 1 or yearEnd == 9999:
if callWebservice(govidInfoSuperior[superior]["ref"], client)["type"][0][
"value"] not in bannedObjectTypes:
nonPrioGovidsList.append(govidInfoSuperior[superior]["ref"])
# comparison with reference time
elif yearBegin <= referenceYear and yearEnd >= referenceYear:
govid = govidInfoSuperior[superior]["ref"]
if callWebservice(govid, client)["type"][0]["value"] not in bannedObjectTypes:
govidsList.append(govid)
else:
if callWebservice(govidInfoSuperior[superior]["ref"], client)["type"][0][
"value"] not in bannedObjectTypes:
nonPrioGovidsList.append(govidInfoSuperior[superior]["ref"])
except TypeError:
print(
"Error: A problem has occurred in the calculation of time spans. Presumably there are letters as numbers:",
print(govidInfoSuperior[superior]))
pass;
# if one of the objects in list A or B is one of the target objects, then take the
for i in govidsList: # list A
try:
province = provinces[i]
return (province) # Search was successful!
except KeyError:
continue
for i in nonPrioGovidsList: # list B
try:
province = provinces[i]
return (province) # Search was successful!
except KeyError:
continue
# if list A is empty, then list B should be used
if len(govidsList) == 0:
# if list B is also empty, then you should try to fill it further
if len(nonPrioGovidsList) == 0: # Example: Case LIEHA2JO62RV, which has no part-of
for a in range(len(govidInfoSuperior)):
# the type of the following object is of interest (not the previous one)
if callWebservice(govidInfoSuperior[a]["ref"], client)["type"][0][
"value"] not in bannedObjectTypes:
nonPrioGovidsList.append(govidInfoSuperior[a]["ref"])
govidsList = nonPrioGovidsList
# rate objects in list A or B
# delete duplicate values
# duplicate affiliations to the same object at different times may exist (e.g. adm_144024), but this is recognized below
govidsList = list(set(govidsList))
# if list contains only one object, then this is the appropriate one to perform the next iteration
if len(govidsList) == 1:
govid = govidsList[0]
# if list contains no object, then cancel
elif len(govidsList) == 0:
# mandatory abort, because no object could be determined to perform the next iteration
break;
else: # case where list contains more than one value
closerInTime = [] # initialization
# each object in the list is checked to see how close the time limits are to the reference time
for elementGovidsList in govidsList:
# a simple list comprehension to find the index is inappropriate, since the searched value can occur several times
# therefore a list is created
indexList = [] # results are stored in this list
for counter, resultPartOf in enumerate(govidInfoSuperior):
if resultPartOf["ref"] == elementGovidsList:
indexList.append(counter)
if len(indexList) == 0:
index = None
print("Error: The object name does not occur.")
for index in indexList:
if govidInfoSuperior[index][
"timespan"] is not None: # if timespan is given, then it is more detailed
yearBegin = begincalculator(govidInfoSuperior[index])
yearEnd = endcalculator(govidInfoSuperior[index])
# if only one year, but no begin or end
elif govidInfoSuperior[index]["begin-year"] is None and \
govidInfoSuperior[index]["end-year"] is None and \
govidInfoSuperior[index]["year"] is not None:
yearBegin = govidInfoSuperior[index]["year"]
yearEnd = govidInfoSuperior[index]["year"]
else: # if no timespan
yearBegin = govidInfoSuperior[index]["begin-year"]
if yearBegin is None: # if there is no value
yearBegin = 1
yearEnd = govidInfoSuperior[index]["end-year"]
if yearEnd is None:
yearEnd = 9999
diffBegin = abs(yearBegin - referenceYear)
diffEnd = abs(yearEnd - referenceYear)
clusterDict = {
"object": elementGovidsList,
"diffbegin": diffBegin,
"diffend": diffEnd,
"begin-year": yearBegin,
"end-year": yearEnd
}
closerInTime.append(clusterDict) # list of dictionaries
diff = 9999 # initialization
# In the following it is examined which of the chronologically obvious results is the closest in time.
# it is irrelevant whether the difference lies before or after the reference time
for counter, i in enumerate(closerInTime):
# Equal comparisons are critical in cases where time limits overlap (e.g. object_289942 --> until 1920, since 1920)
if int(i["diffbegin"]) < diff:
diff = int(i["diffbegin"])
closestInTime = counter
elif int(i["diffbegin"]) == diff:
# search the absolute value of the start (not the difference)
yearBegin = i["begin-year"]
# if reference period is smaller than diffbegin
if referenceYear <= yearBegin:
# if it is "begin" and the other "end", then take the one with the end
# if the previous is no end (then neither + nor - 0), then take after new
if (closerInTime[closestInTime]["diffend"] + diff) != 0 and (
closerInTime[closestInTime]["diffend"] - diff) != 0:
closestInTime = counter
# larger
elif referenceYear > yearBegin:
# if the previous one is no beginning (then neither + nor - 0), then move to new one
if (closerInTime[closestInTime]["diffbegin"] + diff) != 0 and (
closerInTime[closestInTime]["diffbegin"] - diff) != 0:
closestInTime = counter
if int(i["diffend"]) < diff:
diff = int(i["diffend"])
closestInTime = counter
elif int(i["diffend"]) == diff:
# search the beginning of the year
yearEnd = i["end-year"]
# if reference period smaller than diffbegin
if referenceYear <= yearEnd:
# take this if the previous (closestInTime) is a start or no end
if (closerInTime[closestInTime]["diffend"] + diff) != 0 and (
closerInTime[closestInTime]["diffend"] - diff) != 0:
closestInTime = counter
# larger
elif referenceYear > yearEnd:
# take this if the previous one is not a beginning
if (closerInTime[closestInTime]["diffbegin"] + diff) != 0 and (
closerInTime[closestInTime]["diffbegin"] - diff) != 0:
closestInTime = counter
# object with the closest reference time is selected
# if the reference time is the same, the last object is selected (<=)
# Reason: In several regularly occurring special cases (e.g. some places in Poznan) the right one is rather behind
govid = closerInTime[closestInTime]["object"]
return ("None")
def callWebservice(govid, client):
"""
This function calls the GOV webservice.
Eine Internetverbindung ist notwendig.
:param govid: GOV identifier
:param client: connection to the GOV-Webservice
:return: information of the GOV about the corresponding GOV identifier
"""
gotObject = client.service.getObject(govid)
return (gotObject)
def begincalculator(data):
"""
This function converts the timespan data of an object (available as Julian date) into a year number, which describes the beginning of the affiliation.
:param data: time information about administrative affiliations
:return: year as integer
"""
timespan = data["timespan"]
if timespan["begin"] is not None:
begin = timespan["begin"]
jd = begin["jd"] - 2400000 # julian date
yearBegin = julian.from_jd(jd, fmt='mjd')
yearBegin = int(yearBegin.year) # must be int to compare it
else:
yearBegin = 1 # then set the start to a very early year
return (yearBegin)
def endcalculator(data):
"""
This function converts the timespan data of an object (available as Julian date) into a year number, which describes the end of membership.
:param data: time information about administrative affiliations
:return: year as integer
"""
timespan = data["timespan"]
if timespan["end"] is not None:
end = timespan["end"]
jd = end["jd"] - 2400000 # julian date
yearEnd = julian.from_jd(jd, fmt='mjd')
yearEnd = int(yearEnd.year) # must be int to compare it
else:
yearEnd = 9999 # then set the end to a very late year
return (yearEnd)
def mainProvinceFinder(resultPlaceFinder, filename, client, time):
"""
This function assigns the identified urban names to a historical province.
:param resultPlaceFinder: list of dictionaries, which contains the identification for each location
:param filename: name of the file/source
:param client: connection to the GOV-Webservice
:param time: year to which an administrative assignment should be made
:return: list of dictionaries containing urbanonym, source, GOV-identifier and assigned provinces
"""
# perform clustering for each urbanonym of the identification
provincesDictList = []
for counter, i in enumerate(resultPlaceFinder):
# only edit entries that match the source
if i["filename"] != filename:
continue; # only happens with data loaded from CSV
govid = i["id"] # GOV identifier
# if identification has failed, then clustering cannot be successful
if govid != "NONE":
# trigger clustering if identification is successful
resultProvinceFinder = provinceFinder(govid, time, client)
else:
resultProvinceFinder = "NONE"
provincesDict = {
"original name": i["original name"],
"filename": i["filename"],
"id": govid,
"province": resultProvinceFinder
}
provincesDictList.append(provincesDict)
return (provincesDictList)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment