Mathematical Genealogy

This is an attempt to build automatically a mathematical genealogy using the data from http://genealogy.math.ndsu.nodak.edu/. This is used as an example to show how easy it is to scrap data from the web and to graph beautiful trees with R. We will use rvest and stringr to parse the website and igraph, DiagrammeR to construct the tree and display it. This script is inspired by geneagrapher (http://www.davidalber.net/geneagrapher/) and a post of the blog of Nathalie Villa Vialaneix (http://tuxette.nathalievilla.org/?p=1292&lang=en).

Site parsing

We define first a function that extract all the ancestors of a given mathematician as well as the advisor relationship between them.

library("rvest")
library("stringr")

ExtractYear <- function(x) {str_split(x,"</span>")[[1]][2]}

ExtractAdvisorIdG <- function(x) { 
      str_sub(x[["href"]], start = 11)
}

ExtractGenealogy <- function(StartIdG) {
  
  BaseUrl <- "http://genealogy.math.ndsu.nodak.edu/id.php?id="
  
  Individuals <- data.frame("IdG" = as.character(StartIdG), "Name" = as.character(NA),
                            "Institution" = as.character(NA),
                            "Year" = as.character(NA), stringsAsFactors = FALSE)
  Links <- data.frame("from" = character(),"to" = character(),
                      "weight" = numeric(), stringsAsFactors = FALSE)
  
  while (sum(is.na(Individuals[["Name"]])>0)) {
    #Get id of first missing name
    CurNb <- which(is.na(Individuals[["Name"]]))[1]
    CurIdG <- Individuals[CurNb, "IdG"]
    #Get the correspondig webpage
    CurUrl <- paste(BaseUrl, CurIdG, sep = "")
    CurPage <- read_html(CurUrl)
    
    # Name (inspired by Geneagrapher)
    CurName <- str_trim(html_text(html_node(CurPage, "h2")), side = "both")
    print(CurName)
    Individuals[CurNb, "Name"] <- CurName
    
    # Institution and Year
    CurInstitution <- html_text(html_nodes(CurPage, 'span[style*="margin-right: 0.5em"] span'))
    #CurYearRaw <- unlist(lapply(html_nodes(CurPage, 'span[style*="margin-right: 0.5em"]'), XML::saveXML))
    CurYearRaw <- html_nodes(CurPage, 'span[style*="margin-right: 0.5em"]')
    CurYear <- str_trim(unlist(lapply(CurYearRaw, ExtractYear)), side ="both")
    
    print(paste(CurInstitution, CurYear,sep = " - "))
    Individuals[CurNb, "Institution"] <- paste(CurInstitution, collapse = " / ")
    Individuals[CurNb, "Year"] <- paste(CurYear, collapse = " / ")
    
    # Advisors
    AdvisorsIdGRaw <- html_nodes(CurPage, 'p:contains("Advisor")') %>% html_nodes("a")
    
    AdvisorsIdG <- lapply(AdvisorsIdGRaw %>% html_attrs(), ExtractAdvisorIdG)
    
    for (AdvisorIdG in AdvisorsIdG) {
      if (nrow(dplyr::filter(Individuals, IdG == AdvisorIdG))==0)
      {
        Individuals <- rbind(Individuals, data.frame("IdG" = AdvisorIdG, 
                                                     "Name" = NA,
                                                     "Institution" = NA, "Year" = NA))
      }
      Links <- rbind(Links, data.frame("from" = AdvisorIdG, "to" = CurIdG,
                                       "weight" = 1))
    }
  }
  list(Individuals, Links, StartIdG)
}

We can then use this function to retrieve the genealogy of a random mathematician (me…).

Genealogy <- ExtractGenealogy(81086)
## [1] "Erwan  Le Pennec"
## [1] "École Polytechnique - 2002"
## [1] "Stephane Georges Mallat"
## [1] "University of Pennsylvania - 1988"
## [1] "Ruzena Kucera Bajcsy"
## [1] "Stanford University - 1973"
## [1] "John  McCarthy"
## [1] "Princeton University - 1951"
## [1] "Solomon  Lefschetz"
## [1] "Clark University - 1911"
## [1] "William Edward Story"
## [1] "Universität Leipzig - 1875"
## [1] "Carl Gottfried Neumann"
## [1] "Universität Königsberg - 1856"
## [1] "Wilhelm  Scheibner"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1848"
## [2] "Universität Leipzig - 1853"                       
## [1] "Friedrich Julius Richelot"
## [1] "Universität Königsberg - 1831"
## [1] "Otto  Hesse"
## [1] "Universität Königsberg - 1840"
## [1] "Carl Gustav Jacob Jacobi"
## [1] "Humboldt-Universität zu Berlin - 1825"
## [1] "Enno Heeren Dirksen"
## [1] "Georg-August-Universität Göttingen - 1820"
## [1] "Johann Tobias Mayer"
## [1] "Georg-August-Universität Göttingen - 1773"
## [1] "Bernhard Friedrich Thibaut"
## [1] "Georg-August-Universität Göttingen - 1796"
## [1] "Abraham Gotthelf Kästner"
## [1] "Universität Leipzig - 1739"
## [1] "Georg Christoph Lichtenberg"
## [1] "Georg-August-Universität Göttingen - 1765"
## [1] "Christian August Hausen"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1713"
## [1] "Johann Christoph Wichmannshausen"
## [1] "Universität Leipzig - 1685"
## [1] "Johann Andreas Planer"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1686"
## [2] "Eberhard-Karls-Universität Tübingen - 1709"       
## [1] "Otto  Mencke"
## [1] "Universität Leipzig - 1665"
## [1] "Johann  Pasch"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1683"
## [1] "Rudolf Jakob Camerarius"
## [1] "Eberhard-Karls-Universität Tübingen - 1684, 1686"
## [1] "Jakob  Thomasius"
## [1] "Universität Leipzig - 1643"
## [1] "Michael d. J.  Walther"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1656"
## [2] "Martin-Luther-Universität Halle-Wittenberg - 1660"
## [3] "Martin-Luther-Universität Halle-Wittenberg - 1687"
## [1] "Georg Balthasar Metzger"
## [1] "Friedrich-Schiller-Universität Jena - 1646"
## [2] "Universität Basel - 1650"                  
## [1] "Elias Rudolph Camerarius, Sr."
## [1] "Eberhard-Karls-Universität Tübingen - 1663"
## [1] "Friedrich  Leibniz"
## [1] "Universität Leipzig - 1622"
## [1] "Aegidius  Strauch"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1633"
## [1] "Constantin  Ziegra"
## [1] " - "
## [1] "Gottfried  Möbius"
## [1] "Friedrich-Schiller-Universität Jena - 1640"
## [1] "Johann Jakob von Brunn"
## [1] "Universität Basel - 1615"
## [1] "Nicolaus  Zapf"
## [1] "Friedrich-Schiller-Universität Jena - 1622"       
## [2] "Martin-Luther-Universität Halle-Wittenberg - 1625"
## [3] "Martin-Luther-Universität Halle-Wittenberg - 1629"
## [1] "Werner  Rolfinck"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1618"
## [2] "Università degli Studi di Padova - 1625"          
## [1] "Johannes Nicolaus Stupanus"
## [1] "Universität Basel - 1569"
## [1] "Erasmus  Schmidt"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1592"
## [1] "Abraham  Heinecke"
## [1] " - "
## [1] "Jacobus   Martini"
## [1] "Universität Helmstedt - 1596"
## [1] "Daniel  Sennert"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1594, 1599"
## [1] "Adriaan  van den Spieghel"
## [1] "Università degli Studi di Padova and Université Catholique de Louvain - 1603"
## [1] "Theodor  Zwinger"
## [1] "Collège de France - 1553"               
## [2] "Università degli Studi di Padova - 1559"
## [1] "Sethus  Calvisius"
## [1] "Universität Leipzig - 1582"
## [1] "Petrus   Otto"
## [1] " - "
## [1] "Cornelius  Martini"
## [1] "Universität Helmstedt - 1591" "Universität Helmstedt - 1592"
## [1] "Jan  Jessenius"
## [1] "Universität Leipzig - 1588"             
## [2] "Università degli Studi di Padova - 1591"
## [1] "Hieronymus (Girolamo Fabrici d'Acquapendente) Fabricius"
## [1] "Università degli Studi di Padova - 1559"
## [1] "Petrus (Pierre de La Ramée) Ramus"
## [1] "Collège de Navarre - 1536"
## [1] "Bassiano  Landi"
## [1] "Università degli Studi di Padova - 1542"
## [1] "Vittore  Trincavelli"
## [1] "Università degli Studi di Padova - "
## [1] "Nikolaus  Selnecker"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1570"
## [1] "Moritz Valentin Steinmetz"
## [1] "Universität Leipzig - 1550, 1567"
## [1] "Albert  Clampius"
## [1] "Ruprecht-Karls-Universität Heidelberg - "
## [1] "Duncan  Liddel"
## [1] "Europa-Universität Viadrina Frankfurt an der Oder and Universität Breslau - 1582"
## [2] "Universität Helmstedt - 1596"                                                    
## [1] "Gabriele  Falloppio"
## [1] "Università degli Studi di Padova and Università degli Studi di Ferrara - 1547"
## [1] "Johannes (Johann Sturm) Sturmius"
## [1] "Université Catholique de Louvain - 1527"
## [1] "Jacques  Toussain"
## [1] "Université de Paris - 1521"
## [1] "Giovanni Battista  della Monte"
## [1] "Università degli Studi di Padova - " 
## [2] "Università degli Studi di Ferrara - "
## [1] "Pietro  Pomponazzi"
## [1] "Università degli Studi di Padova - 1487"
## [1] "Georg Joachim von Leuchen Rheticus"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1535"
## [1] "Johann  Hoffmann"
## [1] " - "
## [1] "Reinhard  Bachoff von Echt"
## [1] "Ruprecht-Karls-Universität Heidelberg - 1598"
## [1] "John  Craig"
## [1] "Universität Basel - 1580"
## [1] "Paul  Wittich"
## [1] "Martin-Luther-Universität Halle-Wittenberg and Universität Leipzig - 1566"
## [1] "Antonio Musa Brasavola"
## [1] "Università degli Studi di Ferrara - 1520"
## [1] "Matteo Realdo (Renaldus Columbus) Colombo"
## [1] "Università degli Studi di Padova - 1544"
## [1] "Nicolas (Nicolaes Cleynaerts) Clénard"
## [1] "Université Catholique de Louvain - 1515, 1521"
## [1] "Johannes Winter von Andernach"
## [1] "Université Catholique de Louvain - 1527"
## [2] "Collège de Tréguier - 1532"             
## [1] "Guillaume  Budé"
## [1] "Université d'Orléans and Université de Paris - 1486, 1491"
## [1] "Marco  Musuro"
## [1] "Università degli Studi di Firenze - 1486"
## [1] "Niccolò  Leoniceno"
## [1] "Scuola Pubblica di Vicenza - 1446"      
## [2] "Università degli Studi di Padova - 1453"
## [1] "Nicoletto  Vernia"
## [1] "Università degli Studi di Padova - "
## [1] "Pietro  Roccabonella"
## [1] "Università degli Studi di Padova - "
## [1] "Johannes  Volmar"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1515"
## [1] "Nicolaus (Mikołaj Kopernik) Copernicus"
## [1] "Università di Bologna and Università degli Studi di Padova and Uniwersytet Jagielloński and Università degli Studi di Ferrara - 1499"
## [1] "Valentin  Thau"
## [1] "Universität Leipzig - 1555"
## [1] "Andreas (Andries van Wesel) Vesalius"
## [1] "Università degli Studi di Padova and Université Catholique de Louvain - 1537"
## [1] "Jacobus (Jacques Masson) Latomus"
## [1] "Collège de Montaigu - 1502"           
## [2] "Katholieke Universiteit Leuven - 1519"
## [1] "Jan (Johannes Campensis) van Campen"
## [1] "Universität Ingolstadt - "              
## [2] "Université Catholique de Louvain - 1519"
## [1] "Rutger  Rescius"
## [1] "Université de Paris - 1513"
## [1] "Jacobus (Jacques Dubois) Sylvius"
## [1] "Université de Montpellier - " "Université de Paris - 1530"  
## [1] "Georgius  Hermonymus"
## [1] " - "
## [1] "Janus  Lascaris"
## [1] "Università degli Studi di Padova - 1472"
## [1] "Ognibene (Omnibonus Leonicenus) Bonisoli da Lonigo"
## [1] "Università di Mantova - "
## [1] "Pelope"
## [1] " - "
## [1] "Gaetano  da Thiene"
## [1] " - "
## [1] "Sigismondo  Polcastro"
## [1] " - "
## [1] "Bonifazius  Erasmi"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1509"
## [1] "Leonhard (Leonard Vitreatoris z Dobczyc) von Dobschütz"
## [1] "Uniwersytet Jagielloński - 1489"
## [1] "Domenico Maria Novara da Ferrara"
## [1] "Università degli Studi di Firenze - 1483"
## [1] "Johannes  Hommel"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1543"
## [1] "Gemma (Jemme Reinerszoon) Frisius"
## [1] "Université Catholique de Louvain - 1529, 1536"
## [1] "Jan  Standonck"
## [1] "Collège Sainte-Barbe - 1474" "Collège de Montaigu - 1490" 
## [1] "Johann (Johannes Kapnion) Reuchlin"
## [1] "Universität Basel - 1477"      "Université de Poitiers - 1481"
## [1] "Matthaeus  Adrianus"
## [1] " - "
## [1] "Girolamo (Hieronymus Aleander) Aleandro"
## [1] "Università degli Studi di Padova - 1499, 1508"
## [1] "Jean  Tagault"
## [1] " - "
## [1] "François  Dubois"
## [1] "Université de Paris - 1516"
## [1] "Basilios  Bessarion"
## [1] "Mystras - 1436"
## [1] "Demetrios  Chalcocondyles"
## [1] "Mystras and Accademia Romana - 1452"
## [1] "Vittorino  da Feltre"
## [1] "Università degli Studi di Padova - 1416"
## [1] "Paolo (Nicoletti) da Venezia"
## [1] " - "
## [1] "Johannes Müller Regiomontanus"
## [1] "Universität Leipzig and Universität Wien - 1457"
## [1] "Luca  Pacioli"
## [1] " - "
## [1] "Erasmus  Reinhold"
## [1] "Martin-Luther-Universität Halle-Wittenberg - 1535"
## [1] "Philipp  Melanchthon"
## [1] "Ruprecht-Karls-Universität Heidelberg - 1511"
## [2] "Eberhard-Karls-Universität Tübingen - 1514"  
## [1] "Petrus (Pieter de Corte) Curtius"
## [1] "Université Catholique de Louvain - 1513, 1530"
## [1] "Johannes  Argyropoulos"
## [1] "Università degli Studi di Padova - 1444"
## [1] "Jacob ben Jehiel  Loans"
## [1] " - "
## [1] "Moses  Perez"
## [1] " - "
## [1] "Scipione  Fortiguerra"
## [1] "Università degli Studi di Firenze - 1493"
## [1] "Georgios Plethon Gemistos"
## [1] " - 1380, 1393"
## [1] "Theodoros  Gazes"
## [1] "Università di Mantova and Constantinople - 1433"
## [1] "Guarino  da Verona"
## [1] " - 1408"
## [1] "Georg  von Peuerbach"
## [1] "Universität Wien - 1440"
## [1] "Jakob  Milich"
## [1] "Albert-Ludwigs-Universität Freiburg im Breisgau - 1520"
## [2] "Universität Wien - 1524"                               
## [1] "Johannes  Stöffler"
## [1] "Universität Ingolstadt - 1476"
## [1] "Maarten (Martinus Dorpius) van Dorp"
## [1] "Université Catholique de Louvain - 1504, 1515"
## [1] "Angelo  Poliziano"
## [1] "Università degli Studi di Firenze - 1477"
## [1] "Demetrios  Kydones"
## [1] " - "
## [1] "Elissaeus  Judaeus"
## [1] " - "
## [1] "Manuel  Chrysoloras"
## [1] " - "
## [1] "Johannes   von Gmunden"
## [1] "Universität Wien - 1406"
## [1] "Desiderius  Erasmus"
## [1] "Collège de Montaigu - 1497 /1506" "University of Turin - 1506"      
## [1] "Ulrich  Zasius"
## [1] "Albert-Ludwigs-Universität Freiburg im Breisgau - 1501"
## [1] "Leo  Outers"
## [1] "Université Catholique de Louvain - 1485"
## [1] "Marsilio  Ficino"
## [1] "Università degli Studi di Firenze - 1462"
## [1] "Cristoforo  Landino"
## [1] " - "
## [1] "Nilos  Kabasilas"
## [1] " - 1363"
## [1] "Heinrich   von Langenstein"
## [1] "Université de Paris - 1363" "Université de Paris - 1375"
## [1] "Alexander  Hegius"
## [1] " - 1474"
## [1] "Gregory  Palamas"
## [1] " - "
## [1] "Nicole  Oresme"
## [1] " - "
## [1] "Rudolf  Agricola"
## [1] "Università degli Studi di Ferrara - 1478"
## [1] "Thomas von Kempen à Kempis"
## [1] " - "
## [1] "Theodore  Metochites"
## [1] " - 1315"
## [1] "Geert Gerardus Magnus Groote"
## [1] " - "
## [1] "Florens Florentius Radwyn Radewyns"
## [1] " - "
## [1] "Manuel  Bryennios"
## [1] " - "
## [1] "Gregory  Chioniadis"
## [1] "Ilkhans Court at Tabriz - 1296"
## [1] "Shams al‐Dīn  al‐Bukhārī"
## [1] "Maragheh Observatory - "
## [1] "Nasir al-Dīn  al-Ṭūsī"
## [1] " - "
## [1] "Kamāl al-Dīn  Ibn Yūnus"
## [1] " - "
## [1] "Sharaf al-Dīn  al-Ṭūsī"
## [1] " - "

Transformation into a tree

It remains to transform this list into a tree (or more generally a graph).

# define the tree as an igraph object
library(igraph)
TransformIntoGraph <- function(Genealogy) {
  GenealogyTree = graph.data.frame(Genealogy[[2]],
                                   directed = TRUE,
                                   vertices = dplyr::transmute(Genealogy[[1]], 
                                                               id = IdG, name = paste(Name, Year, sep = "\\\n")))
  V(GenealogyTree)$label = V(GenealogyTree)$name
  GenealogyTree
}

GenealogyTree <- TransformIntoGraph(Genealogy)

Tree

We can now use DiagrammeR to plot it in this HTML page.

library("igraph")
PlotTreeDiagrammeR <- function(GenealogyTree, IdG) {
  V(GenealogyTree)$label <- gsub("\\\\", "\\", V(GenealogyTree)$label)
  V(GenealogyTree)$label <- gsub("\'", "\\'", V(GenealogyTree)$label, fixed = TRUE)
  V(GenealogyTree)$name <- gsub("\'", "\\'", V(GenealogyTree)$name, fixed = TRUE)
  write.graph(GenealogyTree, file = sprintf("Tree_%s.dot",IdG),format="dot")
  DiagrammeR::grViz(sprintf("Tree_%s.dot",IdG), height = 2000 , width = 1500)
}

PlotTreeDiagrammeR(GenealogyTree, Genealogy[[3]])

If you want to obtain a pdf, just use the dot command from Graphviz on the produced dot file.

the visNetwork way

We can also obtain a (far from perfect) interactive display using visNetwork a package based on vis.js.

library(visNetwork)
visNetwork(dplyr::mutate(Genealogy[[1]], id = IdG, label = paste(Name, Year, sep = "\n")), Genealogy[[2]], height = "800px") %>% visHierarchicalLayout(sortMethod = "directed", levelSeparation = 450) %>% visNodes(font = '30px', shape = 'ellipse')
Professor of Applied Mathematics

Applied Math professor, my research and teaching interests range from Signal Processing to Data Science.

Related