diff --git a/placefinder.py b/placefinder.py new file mode 100644 index 0000000000000000000000000000000000000000..1260b6c37fe4e3bc8ecaf1ec37f4ae9b0955b8d5 --- /dev/null +++ b/placefinder.py @@ -0,0 +1,693 @@ +from Levenshtein import distance +from haversine import haversine +import copy +import qualitychecker + + +def placeFinder(locNameClean, miniGOV, gedcomMetaInfos, bannedObjectTypes): + """ + This function prepares urban names for identification. + :param locNameClean: name of place after cleansing + :param miniGOV: list of all objects in the Mini-GOV + :param gedcomMetaInfos: content for one line of the file "quality.csv" + :param bannedObjectTypes: list of banned object types + :return: list of GOV identifier, longitude, latitude, and information about identification process + """ + # searching in with row of gedcomMetaInfos the information to the actual file is in + clusterMeanList = gedcomMetaInfos["cluster midpoints"] + + # initiate find() to return some values of parameter of the seltected object + resultOfFind = find(miniGOV, locNameClean, clusterMeanList, bannedObjectTypes) + positionMiniGOV = resultOfFind[0] # number of row of selected object in Mini-GOV + selectInfo = resultOfFind[1] # information about the way of selecting/identifiying object + # if find() was not successfull then selectedRowOfMiniGOV is -1 and parameter should named with "NONE" + if positionMiniGOV != -1: + govid = miniGOV[positionMiniGOV]["GOV-Kennung"] # ID of GOV object + longitude = miniGOV[positionMiniGOV]["geographische Länge"] # longitude + latitude = miniGOV[positionMiniGOV]["geographische Breite"] # latitude + return [govid, longitude, latitude, selectInfo] + govid = "NONE" + longitude = "NONE" + latitude = "NONE" + return [govid, longitude, latitude, selectInfo] + + +def find(miniGOV, locNameClean, clusterMeanList, bannedObjectTypes): + """ + This function identifies an adjusted urbanonym. + :param miniGOV: list of all objects in the Mini-GOV + :param locNameClean: name of place/urbanonym after cleansing + :param clusterMeanList: list of means of coordinates for the clusters in a source + :param bannedObjectTypes: list of banned object types + :return: index of the line in "miniGOV" of the identified location and information about the type of identification + """ + # headline of the column with the relevant information (acutal name) ob objects + keyMiniGOV = "aktueller Name" + + # banned characteristics of value + if locNameClean == "unrealisticSequenceOfStringsAlpha": + selectInfo = "Not selected on the basis of prohibited content." + return ([-1, selectInfo]) + elif locNameClean == "unrealisticSequenceOfStringsBeta": + selectInfo = "Not selected based on prohibited specification." + return ([-1, selectInfo]) + + # define rank order of some types (priority) + # if there are several matches, it is more likely to be a "Stadt" (more urban) than a "Ort" (more rural) + orderRankObjectTypes = ["Kreisfreie Stadt", + "Stadt", + "Dorf", + "Pfarrdorf", + "Ort", + "Ortsteil", + "Ortschaft", + "Wohnplatz", + "Weiler"] + + # cleaned location data can contain several urbanonyms (z. e.g. places in brackets, hyphen as separation) + # these must be checked individually if one of them cannot be identified + # "valueList" contains all sub-urbanonyms to be examined and starts with the entire cleaned-up designation + valueList = [locNameClean] + # search for hyphens + if "-" in locNameClean: + positionMinus = locNameClean.find("-") + # first add what comes before the hyphen + valueList.append(locNameClean[:positionMinus]) + valueList.append(locNameClean[positionMinus + 1:]) + # search for brackets + if "(" in locNameClean and ")" in locNameClean: + positionBreakedOpen = locNameClean.find("(") + positionBreakedClose = locNameClean.find(")") + # first add what is not in brackets + # assumption: the brackets are not in front and have a space before (therefore -1) + valueList.append(locNameClean[:positionBreakedOpen - 1]) + valueList.append(locNameClean[positionBreakedOpen + 1:positionBreakedClose]) + # if no indication of it can be identified, then check the part up to the first space + if " " in locNameClean: + positionSpace = locNameClean.find(" ") + valueList.append(locNameClean[:positionSpace]) + + # testing of the different urbanonym components + # if anything can be identified, the loop is terminated and not all loop passes are needed + for counter, newLocValueClean in enumerate(valueList): + # first, the unadjusted urbanonym checks the information before the comma + # binary search algorithm begins here + position = int(len(miniGOV) / 2) # start in the middle of the Mini-GOV + # initial not 0, because otherwise it leads to complex numbers + furtherPosition = len(miniGOV) + # execute loop until the new position is only 10 lines away from the old one + while (furtherPosition - position) not in range(-10, 10): + positionCache = position + # designation from the Mini-GOV must be converted to lower case + if newLocValueClean > miniGOV[position][keyMiniGOV].lower(): + # amount of difference between "furtherPosition" and "position" / 2 + position = position + int(abs(furtherPosition - position) / 2) + elif newLocValueClean < miniGOV[position][keyMiniGOV].lower(): + # amount of difference between "furtherPosition" and "position" / 2 + position = position - int(abs(furtherPosition - position) / 2) + elif newLocValueClean == miniGOV[position][keyMiniGOV].lower(): + break; # runtime improvement, it cannot be more precise + furtherPosition = positionCache + # search for a match 30 lines before and after the found position; find this positions + # looks at the 30 in front and behind, if there are e.g. 60 places with the same name + try: + miniGOV[position - 30][keyMiniGOV] + startPosition = position - 30 + except IndexError: # the error occurs when the number is in front + startPosition = 0 + try: + miniGOV[position + 30][keyMiniGOV] + endPosition = position + 30 + except IndexError: # the error occurs when the number is behind + endPosition = len(miniGOV) + + # initialising of lists + equalList = [] # values that are equal + similarList = [] # values that are similar + + # similarity analysis + if newLocValueClean != "": # similarity analysis makes sense, if "newLocValueClean" is not empty + # creation of a list with the results + simularityList = [] + # check each position 30 lines before and after the previously found line + for i in range(startPosition, endPosition): + # use of the levenshtein distance for equality checks + levenshteinDistance = distance(miniGOV[i][keyMiniGOV].lower(), newLocValueClean) + simularityList.append([i, # index (acutal position in Mini-GOV) + levenshteinDistance, # absolute levensthein distance + levenshteinDistance / len(newLocValueClean), # relative levenshtein distance + miniGOV[i][keyMiniGOV].lower(), # comparative Mini-GOV designation + newLocValueClean # comparative urbanonym + ]) + # search for hits where the Levenshtein Distance was 0 (equality) + for i in simularityList: + # if levenshteinDistance is 0 then both strings are the same + position = i[0] + levenshteinDistance = i[1] + if levenshteinDistance == 0: + equalList.append(position) # equalList contains only line numbers + # if there is none with the levenshteinDistance 0, then check if there are hits with an relative levvenshtein distance of 0.17 + if len(equalList) == 0: + for i in simularityList: + if i[2] <= 0.17: + similarList.append(i[0]) # similarList contains only line numbers + + # check length of equalList and similarList + # "equalList" has a priority over "similarList" + # "selectInfo" explains if and how an identification takes place + if len(equalList) == 0: + # no same hit but exactly one similar hit + if len(similarList) == 1: + # even if there is only one hit, it must not have a banned object type + if miniGOV[similarList[0]]["Objekttyp als Zahl"] in bannedObjectTypes: + selectInfo = "Not selected because nothing was found in the Mini-GOV (with similarity analysis)" + return ([-1, selectInfo]) + else: + selectInfo = "Selected based on a single matching hit in the similarity analysis" + return ([similarList[0], selectInfo]) + # no same hit but more then one similar hit + elif len(similarList) > 1: + # start a selection + resultAreaSearch = areaSearch(similarList, + "(with similarity analysis)", + miniGOV, + clusterMeanList, + bannedObjectTypes, + orderRankObjectTypes) + return (resultAreaSearch) # return value has the same structure as the previous + # no equal or similar hit + # should still be able to do the next cycle and will only return a value if the last element of the "valueList" was checked + elif len(similarList) == 0 and (counter + 1) == len(valueList): + selectInfo = "Not selected because nothing was found in the Mini-GOV (with similarity analysis)" + return ([-1, selectInfo]) + # exactly one hit in "equalList" + elif len(equalList) == 1: + selectInfo = "Selected based on a single matching hit" + return ([equalList[0], selectInfo]) + # more then one hits in "equalList" + elif len(equalList) > 1: + resultAreaSearch = areaSearch(equalList, + "", + miniGOV, + clusterMeanList, + bannedObjectTypes, + orderRankObjectTypes) + return (resultAreaSearch) # return value has the same structure as the previous + + # if nothing is found until here, then return -1 + selectInfo = "Nothing selected because nothing was found in the Mini-GOV" + return ([-1, selectInfo]) + + +def areaSearch(similarList, supplementSelectInfo, miniGOV, clusterMeanList, bannedObjectTypes, orderRankObjectTypes): + """ + This function selects one of several possible locations. + The basis for this is the distance to the other identified locations in the source. + :param similarList: list of line numbers in the Mini-GOV that match the urbanonym + :param supplementSelectInfo: text that can be appended to "selectInfo + :param miniGOV: list of all objects in the Mini-GOV + :param clusterMeanList: list of means of coordinates for the clusters in a source + :param bannedObjectTypes: list of banned object types + :param orderRankObjectTypes: list that defines rank order of some object types + :return: list of selected position and an information about the selection/identification process + """ + # reading coordinates from the Mini-GOV and write them into coordList + coordList = [] + for i in similarList: # i is position in Mini-GOV + longitude = miniGOV[i]["geographische Länge"] + latitude = miniGOV[i]["geographische Breite"] + coordList.append([i, longitude, latitude]) + + # calculate similarity of values in "coordList" + geoDistanceList = [] + for i in coordList: + # ignore entries without valid coordinates + if i[1] != "" and i[1] != "NONE" and i[2] != "" and i[2] != "NONE" and len(clusterMeanList) != 0: + # calculate a distance for each cluster center of the source and write it into a list + for j in clusterMeanList: # clusterMeanList consists of "cluster midpoints" + # latitude coordinate 1, longitude coordinate 1, latitude coordinate 2, longitude coordinate 2 + distance = haversine((float(i[2]), float(i[1])), (float(j[0]), float(j[1]))) + geoDistanceList.append([i, distance]) + + # determination of the smallest distance + minimalDistance = 9999999.999 # some high initial value + for i in geoDistanceList: + newDistance = i[1] # haversine in 1 + if newDistance < minimalDistance: + minimalDistance = newDistance + positionMiniGOV = i[0][0] # line number of the entry in the Mini-GOV that has the smallest distance + + # only one value with coordinates remains + # not 1, but 1*cluster, because one is created for each cluster; inequality condition mandatory + if len(geoDistanceList) == 1 * len(clusterMeanList) and len(geoDistanceList) != 0: + selectInfo = "Selected because it was the only one with coordinates " + supplementSelectInfo + # several values remain, but the closest value is selected + elif len(geoDistanceList) > 1 * len(clusterMeanList): + selectInfo = "Selected on the basis of geographical proximity " + supplementSelectInfo + # no distance was determined + elif len(geoDistanceList) == 0: + # no one with geodistance there, but maybe I can exclude some others via the types + # creation of a list in which the unauthorized types are filtered out + noGeoDistButAllowedTypeList = [] + for i in coordList: + position = i[0] + if miniGOV[position]["Objekttyp als Zahl"] not in bannedObjectTypes: + noGeoDistButAllowedTypeList.append(i) + # one object remains, chose this + if len(noGeoDistButAllowedTypeList) == 1: + selectInfo = "Selected based on the only valid type " + supplementSelectInfo + positionMiniGOV = noGeoDistButAllowedTypeList[0][0] + # no element is left over + elif len(noGeoDistButAllowedTypeList) == 0: + selectInfo = "None selected, because none has a valid type " + supplementSelectInfo + positionMiniGOV = -1 # must be described, because the variable has not yet been described + # several are left over + # selection via ranking order of the object types + else: + for objectTyp in orderRankObjectTypes: + # initialization of a list in which all elements of a type are written + objectTypeRankList = [] + for elementCoordList in noGeoDistButAllowedTypeList: + if miniGOV[elementCoordList[0]]["Objekttyp als Text"] == objectTyp: + objectTypeRankList.append(elementCoordList[0]) + # one object remains, then select it + if len(objectTypeRankList) == 1: + positionMiniGOV = objectTypeRankList[0] + selectInfo = "Selected on the basis of a suitable type " + supplementSelectInfo + return ([positionMiniGOV, selectInfo]) # e. g. a city was found and preferred over a village + # multiple hits, none can be selected + elif len(objectTypeRankList) > 1: + positionMiniGOV = -1 + selectInfo = "Not selected based on too many matching types " + supplementSelectInfo + return ([positionMiniGOV, selectInfo]) + # if no hit, the loop is repeated with the next object type + # this part of the function is only executed if the identification has failed finally + selectInfo = "Not selected, because no heuristic gives a result " + supplementSelectInfo + positionMiniGOV = -1 + return ([positionMiniGOV, selectInfo]) + + +def stringFunc1(behindTag, string): + """ + This function removes strings from "behindTag". + :param behindTag: urbanonym + :param string: forbidden string + :return: urbanonym purged from the forbidden string + """ + # if it is at the beginning, then take everything behind it, otherwise just delete + if string in behindTag: # is not at the beginning + if behindTag.find(string) != 0: + position = behindTag.find(string) + behindTag = behindTag[:position] + else: # is at the beginning + behindTag = behindTag.replace(string, "") + return (behindTag) + + +def stringFunc2(behindTag, string): + """ + This function is used to remove strings in "behindTag" if they are at the beginning. + :param behindTag: urbanonym + :param string: forbidden string + :return: urbanonym purged from the forbidden string + """ + if string in behindTag: + if behindTag.find(string) == 0: + behindTag = behindTag.replace(string, " ") + return (behindTag) + + +def dataCleaner(dataForCleansing): + """ + This function is used to clean up an urbanoynm. + :param dataForCleansing: urbanonym (string) + :return: adjusted urbanonym (string) + """ + # clean an urbanonym + behindTag = dataForCleansing # data behind GEDCOM tag "PLAC" (the urbanoynm) + behindTag = behindTag.lower() # behindTag lower cases for better cleansing + # cleansing of behindTag + # attention: order of cleansing operations is relevant + # definition of banned words + letters = ["a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "w", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z" + ] + # exclude the possibility that an abbreviation of a US state appears at the end + if behindTag[-4:-2] == ", " and behindTag[-2:-1] in letters and behindTag[-1:] in letters: + behindTag = "unrealisticSequenceOfStringsAlpha" + # definition of words that must not be included in the urbanonym + # banning abbreviations of staats is critial because thats are beginnigs of other places + for bannedWords in ["kanada", + "canada", + "america", + "united states", + " usa", + "alabama", + "alaska", + "arizona", + "arkansas", + "california", + "colorado", + "connecticut", + "delaware", + "florida", + "georgia", + "hawaii", + "idaho", + "illinois", + "indiana", + "iowa", + "kansas", + "kentucky", + "louisiana", + "maine", + "maryland", + "massachusetts", + "michigan", + "minnesota", + "mississippi", + "missouri", + "montana", + "nebraska", + "nevada", + "new hapshire", + "new jersey", + "new york", + "north carolina", + "north dakota", + "ohio", + "oklahoma", + "oregon", + "pennsylvania", + "rohde island", + "south carolina", + "south dakota", + "tennessee", + "texas", + "utah", + "vermont", + "virginia", + "washington", + "west virginia", + "wisconsin", + "wyoming", + "england", + "united kingdom", + "australia", + "spain", + "espagne", + "glamorga", + "russia", + "luxembourg", + "scotland", + "irland", + "norway", + "griechenland", + "turkey", + "südafrika", + "brasil", + "france"]: + if bannedWords in behindTag: + behindTag = "unrealisticSequenceOfStringsAlpha" + # definition of words that must not be equal to the urbanonym + for bannedWords in ["germany", + "poland", + "france", + "russland"]: # ausschließlich das, nicht "enthält" + if bannedWords == behindTag: + behindTag = "unrealisticSequenceOfStringsBeta" + + # if there is no space behind a dot, it should be added + if "." in behindTag: + position = behindTag.find(".") + if behindTag[position:position + 1] != " ": + behindTag = behindTag[:position] + " " + behindTag[position:] + # removal of defined strings + behindTag = behindTag.replace(">", "") # remove ">" + behindTag = behindTag.replace("<", "") # remove "<" + behindTag = behindTag.replace("_", "") # remove "_" + behindTag = behindTag.replace("'", "") # remove "'" + behindTag = behindTag.replace("rk.", "") # remove "rk." + behindTag = behindTag.replace("ev.", "") # remove "ev." + behindTag = behindTag.replace("waldfriedhof", "") # remove "("waldfriedhof" + behindTag = behindTag.replace("friedhof", "") # remove "friedhof" + behindTag = behindTag.replace("wahrscheinlich", "") # remove "wahrscheinlich" + behindTag = behindTag.replace("aus ", "") # remove "aus " + # remove numbers + behindTag = behindTag.replace("0", "") + behindTag = behindTag.replace("1", "") + behindTag = behindTag.replace("2", "") + behindTag = behindTag.replace("3", "") + behindTag = behindTag.replace("4", "") + behindTag = behindTag.replace("5", "") + behindTag = behindTag.replace("6", "") + behindTag = behindTag.replace("7", "") + behindTag = behindTag.replace("8", "") + behindTag = behindTag.replace("9", "") + # remove 7-bit ASCII + behindTag = behindTag.replace("\xa7", "ß") + behindTag = behindTag.replace("\x94", "ö") + behindTag = behindTag.replace("\x9a", "ö") + behindTag = behindTag.replace("\x8a", "ä") + behindTag = behindTag.replace("\x9f", "ü") + # removal of further special characters + behindTag = behindTag.replace("(?)", "") # before removing "?", otherwise many problems with empty brackets + behindTag = behindTag.replace("?", "") # often standing alone or behind places + behindTag = behindTag.replace(" -", "") # only with spaces in front, not as hyphen + + # definition of strings to be removed + stringFunc1List = ["standesamt ", + "sta ", + "ksp. ", + "ksp ", + "kirchspiel ", + "kirche ", + "pfarramt ", + "ambt ", + "oder ", + "gemeinde ", + "gmde. ", + "gmde ", + "pfarrei ", + "gericht ", + "ksp. " + ] + for i in stringFunc1List: + behindTag = stringFunc1(behindTag, i) + + # definition of strings to be deleted if they are at the beginning + stringFunc2List = [" bei ", + " b. ", + " in ", + " im " + ] + for i in stringFunc2List: + behindTag = stringFunc2(behindTag, i) + + # writing out abbreviations + behindTag = behindTag.replace("berg. ", "bergisch ") # Example: Bergisch Gladbach + behindTag = behindTag.replace("b. ", "bei ") # Lichtenau b. Ansbach + + # deletion of not needed content + if "jetzt" in behindTag: # Example: Grone jetzt Göttingen + position = behindTag.find(" jetzt") + behindTag = behindTag[:position] + if "heute" in behindTag: # Example: + position = behindTag.find(" heute") + behindTag = behindTag[:position] + if " um" in behindTag: # Example: ... um 12 Uhr + position = behindTag.find(" um") + behindTag = behindTag[:position] + if " bei" in behindTag: # Example: Lipke bei Landsberg + position = behindTag.find(" bei") + behindTag = behindTag[:position] + if " kr." in behindTag: # Example: Bronn Kr. Mergentheim + position = behindTag.find(" kr.") + behindTag = behindTag[:position] + if " amt" in behindTag: + position = behindTag.find(" amt") + behindTag = behindTag[:position] + if "/" in behindTag: # Example: Crossen/Oder + position = behindTag.find("/") + behindTag = behindTag[:position] + while behindTag[:1] == ",": # delete preceding commas + behindTag = behindTag[1:] + if "," in behindTag: # Example: Arendzhain, Kreis Luckau + position = behindTag.find(",") + behindTag = behindTag[:position] + if " in " in behindTag: # Example: Taufe in Ogkeln + position = behindTag.find(" in ") + behindTag = behindTag[(position + len(" in ")):] + + # eliminate double spaces + behindTag = behindTag.replace(" ", " ") + # eliminate spaces + behindTag = behindTag.strip(" ") + + # overwrite return value + dataForCleansing = behindTag + return (dataForCleansing) + + +def bannedObjects(): + """ + This function defines banned object types. + Banned object types are object types in the GOV that should not be used for identification. + Currently all ecclesiastical objects (up to and including 263) are banned. + Currently all legal objects (e.g. courts, from 263) are banned. + Currently administrative divisions outside Germany that make allocation difficult (from 257) are banned. + List of object types: http://gov.genealogy.net/type/list (retrieved on 8 December 2020) + Sometimes there is no English translation of the names of the object types. + :return: list of banned object types + """ + return (["124", # imperial abbey + "250", # Apostolische Administratur + "6", # diocese + "91", # Bistumsregion + "9", # deanery + "260", # Delegaturbezirk + "11", # diocese + "12", # Dompfarrei + "13", # filial church + "249", # Erzbischöfliches Amt + "96", # archbishopric + "219", # Expositur + "245", # chapel + "26", # church + "210", # Kirchenbund + "92", # Kirchengemeinde + "27", # Kirchenkreis + "28", # Kirchenprovinz + "29", # parish + "153", # Kommissariat + "35", # national church + "243", # Propstei + "244", # Nebenkirche + "245", # chapel + "249", # Erzbischöfliches Amt + "41", # Pfarr-Rektorat + "42", # parish + "155", # region + "43", # Pfarrkuratie + "44", # Pfarrverband + "155", # region + "206", # selsoviet + "253", # religious organization + "49", # sprengel + "260", # Delegaturbezirk + "263", # Landratsbezirk + "151", # Oberlandesgericht + "105", # judicial + "3", # Magistrates' Court + "223", # Landgericht + "224", # Pfleggericht + "228", # Gerichtsamt + "19", # Gerichtsbezirk + "70", # bailiwick + "79", # hundred + "114", # Vest + "154", # Honschaft + "202", # Amtsgerichtsbezirk + "257", # Landgemeinde PL + "264", # Mairie + "135", # canton + "134", # arrondissement + "25" # canton + ]) + + +def mainPlaceFinder(data, resultQualityChecker, filename, miniGov): + """ + This function attempts to assign a GOV identifier to each location in a GEDCOM file. + :param data: content of one GEDCOM file + :param resultQualityChecker: content for one line of the file "quality.csv" + :param filename: name of the file/source + :param miniGov: list of merged entries of the Mini-GOV + :return: list of dictionaries, which contains the identification for each location + """ + # copy the content to avoid compression + gedcomMetaInfo = resultQualityChecker + + # definition of banned object types + # banned object types are object types in the GOV that should not be used for identification + # currently all ecclesiastical objects (up to and including 263), all legal objects (e.g. courts, from 263) and administrative divisions outside Germany that make allocation difficult (from 257) + # list of object types: http://gov.genealogy.net/type/list (retrieved on 8 December 2020) + # sometimes there is no English translation of the names of the object types + bannedObjectTypes = bannedObjects() + + # "data" is compromised by the dataCleaner function and could no longer be used + # therefore a copy must be created that does not represent a pointer (that's why copy.copy is used) + initialGedcomData = copy.copy(data) + gedcomData = copy.copy(data) + + # clean up every urbanonym in a GEDCOM file + # clean each row in gedcomData + for cleanCounter in range(len(gedcomData)): + resultParser = qualitychecker.gedcomRowParser(gedcomData, cleanCounter) # seperate data of one row + tag = resultParser[2] # GEDCOM tag + behindTag = resultParser[3] # data behind GEDCOM tag + behindTag = behindTag.lower() # behindTag lower cases for better cleansing + # for urbanonyms: + if tag == "PLAC": + dataCleaned = dataCleaner(behindTag) + # overwrite the original GEDCOM line with the cleaned text + gedcomData[cleanCounter] = resultParser[0] + " " + resultParser[2] + " " + dataCleaned + + # creation of a list of locations and their sources + locList = [] + for counter, i in enumerate(gedcomData): + if i[2:6] == "PLAC": + # adjusted urbanonym, original urbanonym, name of file + locList.append([i[7:], initialGedcomData[counter][7:], filename]) + + # delete duplicates in a source + locList = sorted(set(map(tuple, locList)), reverse=True) + + # creation of a list containing the identifying data per urbanonym in a dictionary + resultList = [] + for counter, i in enumerate(locList): + locNameClean = i[0] # ubanonym with cleansing + locName = i[1] # urbanoym without cleansing + fileName = i[2] + # find place + resultPlaceFinder = placeFinder(locNameClean, + miniGov, + gedcomMetaInfo, + bannedObjectTypes + ) + # create dictionary + identifyingInfo = { + "id": resultPlaceFinder[0], + "latitude": resultPlaceFinder[1], + "longitude": resultPlaceFinder[2], + "selection information": resultPlaceFinder[3], + "adjusted name": locNameClean, + "original name": locName, + "filename": fileName + } + resultList.append(identifyingInfo) + return (resultList)