Mathematical Genealogy
This is an attempt to build automatically a mathematical genealogy using the data from http://genealogy.math.ndsu.nodak.edu/. This is used as an example to show how easy it is to scrap data from the web and to graph beautiful trees with R. We will use rvest and stringr to parse the website and igraph, DiagrammeR to construct the tree and display it. This script is inspired by geneagrapher (http://www.davidalber.net/geneagrapher/) and a post of the blog of Nathalie Villa Vialaneix (http://tuxette.nathalievilla.org/?p=1292&lang=en).
Site parsing
We define first a function that extract all the ancestors of a given mathematician as well as the advisor relationship between them.
library("rvest")
library("stringr")
ExtractYear <- function(x) {str_split(x,"</span>")[[1]][2]}
ExtractAdvisorIdG <- function(x) {
str_sub(x[["href"]], start = 11)
}
ExtractGenealogy <- function(StartIdG) {
BaseUrl <- "http://genealogy.math.ndsu.nodak.edu/id.php?id="
Individuals <- data.frame("IdG" = as.character(StartIdG), "Name" = as.character(NA),
"Institution" = as.character(NA),
"Year" = as.character(NA), stringsAsFactors = FALSE)
Links <- data.frame("from" = character(),"to" = character(),
"weight" = numeric(), stringsAsFactors = FALSE)
while (sum(is.na(Individuals[["Name"]])>0)) {
#Get id of first missing name
CurNb <- which(is.na(Individuals[["Name"]]))[1]
CurIdG <- Individuals[CurNb, "IdG"]
#Get the correspondig webpage
CurUrl <- paste(BaseUrl, CurIdG, sep = "")
CurPage <- read_html(CurUrl)
# Name (inspired by Geneagrapher)
CurName <- str_trim(html_text(html_node(CurPage, "h2")), side = "both")
print(CurName)
Individuals[CurNb, "Name"] <- CurName
# Institution and Year
CurInstitution <- html_text(html_nodes(CurPage, 'span[style*="margin-right: 0.5em"] span'))
#CurYearRaw <- unlist(lapply(html_nodes(CurPage, 'span[style*="margin-right: 0.5em"]'), XML::saveXML))
CurYearRaw <- html_nodes(CurPage, 'span[style*="margin-right: 0.5em"]')
CurYear <- str_trim(unlist(lapply(CurYearRaw, ExtractYear)), side ="both")
print(paste(CurInstitution, CurYear,sep = " - "))
Individuals[CurNb, "Institution"] <- paste(CurInstitution, collapse = " / ")
Individuals[CurNb, "Year"] <- paste(CurYear, collapse = " / ")
# Advisors
AdvisorsIdGRaw <- html_nodes(CurPage, 'p:contains("Advisor")') %>% html_nodes("a")
AdvisorsIdG <- lapply(AdvisorsIdGRaw %>% html_attrs(), ExtractAdvisorIdG)
for (AdvisorIdG in AdvisorsIdG) {
if (nrow(dplyr::filter(Individuals, IdG == AdvisorIdG))==0)
{
Individuals <- rbind(Individuals, data.frame("IdG" = AdvisorIdG,
"Name" = NA,
"Institution" = NA, "Year" = NA))
}
Links <- rbind(Links, data.frame("from" = AdvisorIdG, "to" = CurIdG,
"weight" = 1))
}
}
list(Individuals, Links, StartIdG)
}
We can then use this function to retrieve the genealogy of a random mathematician (me…).
Genealogy <- ExtractGenealogy(81086)
## [1] "Erwan Le Pennec"
## [1] "École Polytechnique - 2002"
## [1] "Stephane Georges Mallat"
## [1] "University of Pennsylvania - 1988"
## [1] "Ruzena Kucera Bajcsy"
## [1] "Stanford University - 1973"
## [1] "John McCarthy"
## [1] "Princeton University - 1951"
## [1] "Solomon Lefschetz"
## [1] "Clark University - 1911"
## [1] "William Edward Story"
## [1] "Universität Leipzig - 1875"
## [1] "Carl Gottfried Neumann"
## [1] "Universität Königsberg - 1856"
## [1] "Wilhelm Scheibner"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1848"
## [2] "Universität Leipzig - 1853"
## [1] "Friedrich Julius Richelot"
## [1] "Universität Königsberg - 1831"
## [1] "Otto Hesse"
## [1] "Universität Königsberg - 1840"
## [1] "Carl Gustav Jacob Jacobi"
## [1] "Humboldt-Universität zu Berlin - 1825"
## [1] "Enno Heeren Dirksen"
## [1] "Georg-August-Universität Göttingen - 1820"
## [1] "Johann Tobias Mayer"
## [1] "Georg-August-Universität Göttingen - 1773"
## [1] "Bernhard Friedrich Thibaut"
## [1] "Georg-August-Universität Göttingen - 1796"
## [1] "Abraham Gotthelf Kästner"
## [1] "Universität Leipzig - 1739"
## [1] "Georg Christoph Lichtenberg"
## [1] "Georg-August-Universität Göttingen - 1765"
## [1] "Christian August Hausen"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1713"
## [1] "Johann Christoph Wichmannshausen"
## [1] "Universität Leipzig - 1685"
## [1] "Johann Andreas Planer"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1686"
## [2] "Eberhard-Karls-Universität Tübingen - 1709"
## [1] "Otto Mencke"
## [1] "Universität Leipzig - 1665"
## [1] "Johann Pasch"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1683"
## [1] "Rudolf Jakob Camerarius"
## [1] "Eberhard-Karls-Universität Tübingen - 1684, 1686"
## [1] "Jakob Thomasius"
## [1] "Universität Leipzig - 1643"
## [1] "Michael d. J. Walther"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1656"
## [2] "Martin-Luther-Universität Halle-Wittenberg - 1660"
## [3] "Martin-Luther-Universität Halle-Wittenberg - 1687"
## [1] "Georg Balthasar Metzger"
## [1] "Friedrich-Schiller-Universität Jena - 1646"
## [2] "Universität Basel - 1650"
## [1] "Elias Rudolph Camerarius, Sr."
## [1] "Eberhard-Karls-Universität Tübingen - 1663"
## [1] "Friedrich Leibniz"
## [1] "Universität Leipzig - 1622"
## [1] "Aegidius Strauch"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1633"
## [1] "Constantin Ziegra"
## [1] " - "
## [1] "Gottfried Möbius"
## [1] "Friedrich-Schiller-Universität Jena - 1640"
## [1] "Johann Jakob von Brunn"
## [1] "Universität Basel - 1615"
## [1] "Nicolaus Zapf"
## [1] "Friedrich-Schiller-Universität Jena - 1622"
## [2] "Martin-Luther-Universität Halle-Wittenberg - 1625"
## [3] "Martin-Luther-Universität Halle-Wittenberg - 1629"
## [1] "Werner Rolfinck"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1618"
## [2] "Università degli Studi di Padova - 1625"
## [1] "Johannes Nicolaus Stupanus"
## [1] "Universität Basel - 1569"
## [1] "Erasmus Schmidt"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1592"
## [1] "Abraham Heinecke"
## [1] " - "
## [1] "Jacobus Martini"
## [1] "Universität Helmstedt - 1596"
## [1] "Daniel Sennert"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1594, 1599"
## [1] "Adriaan van den Spieghel"
## [1] "Università degli Studi di Padova and Université Catholique de Louvain - 1603"
## [1] "Theodor Zwinger"
## [1] "Collège de France - 1553"
## [2] "Università degli Studi di Padova - 1559"
## [1] "Sethus Calvisius"
## [1] "Universität Leipzig - 1582"
## [1] "Petrus Otto"
## [1] " - "
## [1] "Cornelius Martini"
## [1] "Universität Helmstedt - 1591" "Universität Helmstedt - 1592"
## [1] "Jan Jessenius"
## [1] "Universität Leipzig - 1588"
## [2] "Università degli Studi di Padova - 1591"
## [1] "Hieronymus (Girolamo Fabrici d'Acquapendente) Fabricius"
## [1] "Università degli Studi di Padova - 1559"
## [1] "Petrus (Pierre de La Ramée) Ramus"
## [1] "Collège de Navarre - 1536"
## [1] "Bassiano Landi"
## [1] "Università degli Studi di Padova - 1542"
## [1] "Vittore Trincavelli"
## [1] "Università degli Studi di Padova - "
## [1] "Nikolaus Selnecker"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1570"
## [1] "Moritz Valentin Steinmetz"
## [1] "Universität Leipzig - 1550, 1567"
## [1] "Albert Clampius"
## [1] "Ruprecht-Karls-Universität Heidelberg - "
## [1] "Duncan Liddel"
## [1] "Europa-Universität Viadrina Frankfurt an der Oder and Universität Breslau - 1582"
## [2] "Universität Helmstedt - 1596"
## [1] "Gabriele Falloppio"
## [1] "Università degli Studi di Padova and Università degli Studi di Ferrara - 1547"
## [1] "Johannes (Johann Sturm) Sturmius"
## [1] "Université Catholique de Louvain - 1527"
## [1] "Jacques Toussain"
## [1] "Université de Paris - 1521"
## [1] "Giovanni Battista della Monte"
## [1] "Università degli Studi di Padova - "
## [2] "Università degli Studi di Ferrara - "
## [1] "Pietro Pomponazzi"
## [1] "Università degli Studi di Padova - 1487"
## [1] "Georg Joachim von Leuchen Rheticus"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1535"
## [1] "Johann Hoffmann"
## [1] " - "
## [1] "Reinhard Bachoff von Echt"
## [1] "Ruprecht-Karls-Universität Heidelberg - 1598"
## [1] "John Craig"
## [1] "Universität Basel - 1580"
## [1] "Paul Wittich"
## [1] "Martin-Luther-Universität Halle-Wittenberg and Universität Leipzig - 1566"
## [1] "Antonio Musa Brasavola"
## [1] "Università degli Studi di Ferrara - 1520"
## [1] "Matteo Realdo (Renaldus Columbus) Colombo"
## [1] "Università degli Studi di Padova - 1544"
## [1] "Nicolas (Nicolaes Cleynaerts) Clénard"
## [1] "Université Catholique de Louvain - 1515, 1521"
## [1] "Johannes Winter von Andernach"
## [1] "Université Catholique de Louvain - 1527"
## [2] "Collège de Tréguier - 1532"
## [1] "Guillaume Budé"
## [1] "Université d'Orléans and Université de Paris - 1486, 1491"
## [1] "Marco Musuro"
## [1] "Università degli Studi di Firenze - 1486"
## [1] "Niccolò Leoniceno"
## [1] "Scuola Pubblica di Vicenza - 1446"
## [2] "Università degli Studi di Padova - 1453"
## [1] "Nicoletto Vernia"
## [1] "Università degli Studi di Padova - "
## [1] "Pietro Roccabonella"
## [1] "Università degli Studi di Padova - "
## [1] "Johannes Volmar"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1515"
## [1] "Nicolaus (Mikołaj Kopernik) Copernicus"
## [1] "Università di Bologna and Università degli Studi di Padova and Uniwersytet Jagielloński and Università degli Studi di Ferrara - 1499"
## [1] "Valentin Thau"
## [1] "Universität Leipzig - 1555"
## [1] "Andreas (Andries van Wesel) Vesalius"
## [1] "Università degli Studi di Padova and Université Catholique de Louvain - 1537"
## [1] "Jacobus (Jacques Masson) Latomus"
## [1] "Collège de Montaigu - 1502"
## [2] "Katholieke Universiteit Leuven - 1519"
## [1] "Jan (Johannes Campensis) van Campen"
## [1] "Universität Ingolstadt - "
## [2] "Université Catholique de Louvain - 1519"
## [1] "Rutger Rescius"
## [1] "Université de Paris - 1513"
## [1] "Jacobus (Jacques Dubois) Sylvius"
## [1] "Université de Montpellier - " "Université de Paris - 1530"
## [1] "Georgius Hermonymus"
## [1] " - "
## [1] "Janus Lascaris"
## [1] "Università degli Studi di Padova - 1472"
## [1] "Ognibene (Omnibonus Leonicenus) Bonisoli da Lonigo"
## [1] "Università di Mantova - "
## [1] "Pelope"
## [1] " - "
## [1] "Gaetano da Thiene"
## [1] " - "
## [1] "Sigismondo Polcastro"
## [1] " - "
## [1] "Bonifazius Erasmi"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1509"
## [1] "Leonhard (Leonard Vitreatoris z Dobczyc) von Dobschütz"
## [1] "Uniwersytet Jagielloński - 1489"
## [1] "Domenico Maria Novara da Ferrara"
## [1] "Università degli Studi di Firenze - 1483"
## [1] "Johannes Hommel"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1543"
## [1] "Gemma (Jemme Reinerszoon) Frisius"
## [1] "Université Catholique de Louvain - 1529, 1536"
## [1] "Jan Standonck"
## [1] "Collège Sainte-Barbe - 1474" "Collège de Montaigu - 1490"
## [1] "Johann (Johannes Kapnion) Reuchlin"
## [1] "Universität Basel - 1477" "Université de Poitiers - 1481"
## [1] "Matthaeus Adrianus"
## [1] " - "
## [1] "Girolamo (Hieronymus Aleander) Aleandro"
## [1] "Università degli Studi di Padova - 1499, 1508"
## [1] "Jean Tagault"
## [1] " - "
## [1] "François Dubois"
## [1] "Université de Paris - 1516"
## [1] "Basilios Bessarion"
## [1] "Mystras - 1436"
## [1] "Demetrios Chalcocondyles"
## [1] "Mystras and Accademia Romana - 1452"
## [1] "Vittorino da Feltre"
## [1] "Università degli Studi di Padova - 1416"
## [1] "Paolo (Nicoletti) da Venezia"
## [1] " - "
## [1] "Johannes Müller Regiomontanus"
## [1] "Universität Leipzig and Universität Wien - 1457"
## [1] "Luca Pacioli"
## [1] " - "
## [1] "Erasmus Reinhold"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1535"
## [1] "Philipp Melanchthon"
## [1] "Ruprecht-Karls-Universität Heidelberg - 1511"
## [2] "Eberhard-Karls-Universität Tübingen - 1514"
## [1] "Petrus (Pieter de Corte) Curtius"
## [1] "Université Catholique de Louvain - 1513, 1530"
## [1] "Johannes Argyropoulos"
## [1] "Università degli Studi di Padova - 1444"
## [1] "Jacob ben Jehiel Loans"
## [1] " - "
## [1] "Moses Perez"
## [1] " - "
## [1] "Scipione Fortiguerra"
## [1] "Università degli Studi di Firenze - 1493"
## [1] "Georgios Plethon Gemistos"
## [1] " - 1380, 1393"
## [1] "Theodoros Gazes"
## [1] "Università di Mantova and Constantinople - 1433"
## [1] "Guarino da Verona"
## [1] " - 1408"
## [1] "Georg von Peuerbach"
## [1] "Universität Wien - 1440"
## [1] "Jakob Milich"
## [1] "Albert-Ludwigs-Universität Freiburg im Breisgau - 1520"
## [2] "Universität Wien - 1524"
## [1] "Johannes Stöffler"
## [1] "Universität Ingolstadt - 1476"
## [1] "Maarten (Martinus Dorpius) van Dorp"
## [1] "Université Catholique de Louvain - 1504, 1515"
## [1] "Angelo Poliziano"
## [1] "Università degli Studi di Firenze - 1477"
## [1] "Demetrios Kydones"
## [1] " - "
## [1] "Elissaeus Judaeus"
## [1] " - "
## [1] "Manuel Chrysoloras"
## [1] " - "
## [1] "Johannes von Gmunden"
## [1] "Universität Wien - 1406"
## [1] "Desiderius Erasmus"
## [1] "Collège de Montaigu - 1497 /1506" "University of Turin - 1506"
## [1] "Ulrich Zasius"
## [1] "Albert-Ludwigs-Universität Freiburg im Breisgau - 1501"
## [1] "Leo Outers"
## [1] "Université Catholique de Louvain - 1485"
## [1] "Marsilio Ficino"
## [1] "Università degli Studi di Firenze - 1462"
## [1] "Cristoforo Landino"
## [1] " - "
## [1] "Nilos Kabasilas"
## [1] " - 1363"
## [1] "Heinrich von Langenstein"
## [1] "Université de Paris - 1363" "Université de Paris - 1375"
## [1] "Alexander Hegius"
## [1] " - 1474"
## [1] "Gregory Palamas"
## [1] " - "
## [1] "Nicole Oresme"
## [1] " - "
## [1] "Rudolf Agricola"
## [1] "Università degli Studi di Ferrara - 1478"
## [1] "Thomas von Kempen à Kempis"
## [1] " - "
## [1] "Theodore Metochites"
## [1] " - 1315"
## [1] "Geert Gerardus Magnus Groote"
## [1] " - "
## [1] "Florens Florentius Radwyn Radewyns"
## [1] " - "
## [1] "Manuel Bryennios"
## [1] " - "
## [1] "Gregory Chioniadis"
## [1] "Ilkhans Court at Tabriz - 1296"
## [1] "Shams al‐Dīn al‐Bukhārī"
## [1] "Maragheh Observatory - "
## [1] "Nasir al-Dīn al-Ṭūsī"
## [1] " - "
## [1] "Kamāl al-Dīn Ibn Yūnus"
## [1] " - "
## [1] "Sharaf al-Dīn al-Ṭūsī"
## [1] " - "
Transformation into a tree
It remains to transform this list into a tree (or more generally a graph).
# define the tree as an igraph object
library(igraph)
TransformIntoGraph <- function(Genealogy) {
GenealogyTree = graph.data.frame(Genealogy[[2]],
directed = TRUE,
vertices = dplyr::transmute(Genealogy[[1]],
id = IdG, name = paste(Name, Year, sep = "\\\n")))
V(GenealogyTree)$label = V(GenealogyTree)$name
GenealogyTree
}
GenealogyTree <- TransformIntoGraph(Genealogy)
Tree
We can now use DiagrammeR to plot it in this HTML page.
library("igraph")
PlotTreeDiagrammeR <- function(GenealogyTree, IdG) {
V(GenealogyTree)$label <- gsub("\\\\", "\\", V(GenealogyTree)$label)
V(GenealogyTree)$label <- gsub("\'", "\\'", V(GenealogyTree)$label, fixed = TRUE)
V(GenealogyTree)$name <- gsub("\'", "\\'", V(GenealogyTree)$name, fixed = TRUE)
write.graph(GenealogyTree, file = sprintf("Tree_%s.dot",IdG),format="dot")
DiagrammeR::grViz(sprintf("Tree_%s.dot",IdG), height = 2000 , width = 1500)
}
PlotTreeDiagrammeR(GenealogyTree, Genealogy[[3]])
If you want to obtain a pdf, just use the dot command from Graphviz on the produced dot file.
the visNetwork way
We can also obtain a (far from perfect) interactive display using visNetwork a package based on vis.js.
library(visNetwork)
visNetwork(dplyr::mutate(Genealogy[[1]], id = IdG, label = paste(Name, Year, sep = "\n")), Genealogy[[2]], height = "800px") %>% visHierarchicalLayout(sortMethod = "directed", levelSeparation = 450) %>% visNodes(font = '30px', shape = 'ellipse')