Revision cadc9fffed25f77f3dd80b03e6f5a8fad907e637 authored by François on 29 December 2015, 16:38:27 UTC, committed by François on 29 December 2015, 16:38:27 UTC
1 parent 3eb42da
Raw File
comm.r
# parse sponsor JSON profiles
c = sapply(list.files("raw/sponsors", pattern = "json$", full.names = TRUE),
           fromJSON, flatten = TRUE)

# extract committee membership tables
c = sapply(c, function(x) {
  y = x$id
  x = x$committeeMemberships
  if (class(x) == "data.frame")
    data_frame(chamber = x$committee.council.id, id = y,
               legislature = x$entryDate, code = x$committee.code) %>%
    filter(chamber < 3)
}) %>% bind_rows

# convert chamber ids to letters
c$chamber = c("cn", "cs")[ c$chamber ]

# find legislature id (closest start date)
c$legislature = as.Date(c$legislature)
c$legislature = sapply(c$legislature, function(x) {
  x = x - as.Date(legislatures)
  names(legislatures)[ which.min(x[ x > 0 ]) ]
})

# trim committee codes
c$code = gsub("_$", "", c$code)

# remove baseline legislature
c = filter(c, legislature != "1991-1995")

# export committe membership counts
write.csv(group_by(c, chamber, legislature, code) %>%
            summarise(members = n()), "data/committees.csv", row.names = FALSE)

# unique legislature-committee pairs
c$uid = paste0(c$chamber, c$legislature, c$code)

# master committee membership dataset
comm = data_frame(uid = paste0(c$chamber, c$legislature, c$code)) %>% unique
comm[, as.character(unique(s$id)) ] = 0

for (i in colnames(comm)[ -1 ])
  comm[, i ] = as.integer(comm$uid %in% c$uid[ c$id == i ])

comm$legislature = substr(comm$uid, 1, 6)
for (i in unique(comm$legislature)) {
  
  cat("Legislature", i)
  
  n = get(paste0("net_ch_", i))
  
  sp = network.vertex.names(n)
  names(sp) = gsub("http://www.parlament.ch/f/suche/pages/biografie.aspx\\?biografie_id=", "", n %v% "url")
  
  stopifnot(names(sp) %in% colnames(comm))
  
  m = comm[ grepl(paste0("^", i), comm$legislature), names(comm) %in% names(sp) ]
  cat(":", nrow(m), "committees", ncol(m), "MPs")
  M = m
  
  m = t(as.matrix(m)) # sponsors in rows, committees in columns
  m = m %*% t(m) # adjacency matrix
  
  colnames(m) = sp[ colnames(m) ]
  rownames(m) = sp[ rownames(m) ]
  
  e = data_frame(i = n %e% "source", j = n %e% "target")
  e$committee = NA
  
  for (j in 1:nrow(e))
    e$committee[ j ] = m[ e$i[ j ], e$j[ j ] ]
  
  cat(" co-memberships:", 
      str_pad(paste0(range(e$committee), collapse = "-"), 6, "right"), 
      sum(e$committee == 0), "null,", 
      sum(e$committee == 1), "single,",
      sum(e$committee > 1), "> 1\n")
  
  nn = network(e[, 1:2], directed = FALSE)
  set.edge.attribute(nn, "committee", e$committee)
  
  print(table(nn %e% "committee", exclude = NULL))
  stopifnot(!is.na(nn %e% "committee"))
  
  set.edge.attribute(n, "committee", e$committee)
  assign(paste0("net_ch_", i), n)
  
  nn %n% "committees" = as.table(rowSums(M))
  assign(paste0("conet_ch_", i), nn)
  
}
back to top