Using the data from the dispatch, a social network should be created. For this, the necessary information for the edges (source, target, weight) had to be extracted. In the following code, the places mentioned within one article are being considered. If two cities are mentioned within the same article - an edge is being formed. However, cases, where one city is mentioned more often within one article are being excluded. Then, these edges were collected and counted for each year, meaning that if one connection between two cities (the mentioning of two cities within the same article) occured more often, the weight if that edge was increased.

As the set of data was too large for the usage of Gephi, all the cases with weights below 5 were excluded.

import re
import os
import itertools

source_location = "./practice3/"   # the folder in which the "dispatch" is
target_location = "./edges/" # the folder in which the new files will be saved
mapData_source = "tgn_coordinates" 

 
lof=os.listdir(source_location) # getting all files from a folder



with open(mapData_source + ".tsv", "r", encoding ="utf8") as f9:
            mapData = f9.read()

def updateDic(dic, key):
    if key in dic:
        dic[key] += 1
    else:
        dic[key]  = 1
        
def edges(edgesList, edgesDic):
    edges = list(itertools.combinations(edgesList, 2))
    for e in edges:
        key = "\t".join(sorted(list(e))) # A > B (sorted alphabetically, to avoid cases of B > A)
        updateDic(edgesDic, key)
            
def collectTaggedToponyms(xmlText, dic,date):
    xmlText = re.sub("\s+", " ", xmlText)
    topList = []
    
    for t in re.findall(r"<placeName[^<]+</placeName>", xmlText):
        t = t.lower()
        if re.search(r'"(tgn,\d+)', t):
            reg = re.search(r'"(tgn,\d+)', t).group(1)
            reg = re.search(r"(\d+)",reg).group(1)
            
            if reg in mapData:
                if reg not in topList:
                    topList.append(reg)

    edges(topList,dic)

def tgn(year):
    weight = []
    source =[]
    target = []
    edgesDic = {}
    
    for f in lof: # looping through all the files            
        with open(source_location + f, "r", encoding="utf8") as f1:
            text0 = f1.read() # reads the file in
        
        issue_year=re.search(r"(\d\d\d\d)",text0).group(1)
        
        
        if int(issue_year) == int(year):
            collectTaggedToponyms(text0,edgesDic,year)

    new_file = str(year) + "_edges.tsv"
    header = "source\ttarget\tweight\n"
    for key in edgesDic:
        if edgesDic[key] >= 5:
            weight.append(edgesDic[key])
            splitting = key.split("\t")
            source.append(splitting[0])
            target.append(splitting[1])
    

    with open(target_location+new_file, "a") as f3:
        f3.write(header)
        for i in range (0,len(source)):
            f3.write(source[i] + "\t" + target[i] + "\t" + str(weight[i]) + "\n")
        
 
print("1860")
tgn(1860)

print("1861")
tgn(1861)

print("1862")
tgn(1862)
 
print("1863")
tgn(1863)
 
print("1864")
tgn(1864)
 
print("1865")
tgn(1865)

Unfortunately, I was unable to manage to include the coordinates of the source and target cities, as my code kept running without terminating or showing any error messages. The code written for this was:

import re
import os
import itertools

source_location = "./edges/"   # the folder in which the "dispatch" is
target_location = "./edges/" # the folder in which the new files will be saved 


def tgn(year):
    weight = []
    source =[]
    target = []

    edgesDic = {}

    data_list_lon = ["data"]
    data_list_lat = ["data"]

    data_list_tar_lat = ["data"]
    data_list_tar_lon = ["data"]

    tgn = []
    placeName = []
    lat = []
    lon = []

    
 
    for f in lof: # looping through all the files            
        with open(source_location + f, "r", encoding="utf8") as f1:
            text0 = f1.read().split("\n") # reads the file in
            for x in text0:
                x = x.split("\t")
                source.append(x[0])
                target.append(x[1])
                weight.append(x[2])


    new_file = str(1860) + "_edges_probe_merge.tsv"
    header = "source\ttarget\tweight\tlat_source\tlon_source\tlat_target\lon_target\n"
    
    with open ("tgn_coordinates.tsv", "r", encoding = "utf8") as f4:
        text1 = f4.read().split("\n")
        for r in text1:
            r= r.split("\t")  
            tgn.append(r[0])
            placeName.append(r[1])
            lat.append(r[2])
            lon.append(r[3])
    
    with open(target_location+new_file, "a") as f3:
        f3.write(header)
        for i in range (1,len(source)):
            ID=source[i]
            ID_tar = target[i]
            ID_weight= weight[i]
    
            val_lat = lat[tgn.index(ID)]
            data_list_lat.append(val_lat)
            val_lon = lon[tgn.index(ID)]
            data_list_lon.append(val_lon)
    
            val_tar_lat = lat[tgn.index(ID_tar)]
            data_list_tar_lat.append(val_tar_lat)
            val_tar_lon = lon[tgn.index(ID_tar)]
            data_list_tar_lon.append(val_tar_lon)
        
            f3.write(ID + "\t" + ID_tar + "\t" + ID_weight + "\t" + data_list_lat[i] + "\t"+ data_list_lon[i] + "\t" + data_list_tar_lat[i] + "\t" + data_list_tar_lon[i] + "\n")

print("1860")
tgn(1860)

print("1861")
tgn(1861)

print("1862")
tgn(1862)
 
print("1863")
tgn(1863)
 
print("1864")
tgn(1864)
 
print("1865")
tgn(1865)

As the process of including the coordinates in the file as well failed, the network could not be created using a geographcal map.

This is an exemplatory part of one of my output files, to show, how they are structured (year 1863):

source  target  weight
7013943 7013964 193
7013623 7013964 12
7007709 7013964 210
7013964 7014076 29
7013964 7014197 54
2110785 7013964 25
2100483 7013964 17
7007825 7013964 207
7013889 7013964 44
7012149 7013964 269
7013964 7014538 91
2075343 7013964 31
2682786 7013964 1
1127725 7013964 5
7013964 7014404 203
7013964 7014631 104
2515855 7013964 1
2117793 7013964 6
7013962 7013964 250
7013927 7013964 19

The gephi network for each year. The colors of the nodes indicate the modularity (if they are interconnected within one cluster, they have the same color). The size of the nodes indicates the degree the node has (the number of connecting edges). The brightness of the edges indicates the weights (the darker, the more weights).

1861:

1861

1862:

1862

1863:

1863

1864:

1864

1865:

1865