Search for R mailing list archives with gmane
By romain francois on Thursday, May 7 2009, 21:59 - Permalink
This thread on R-devel led me to write these functions to search into R mailing list archives through gmane.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' search on gmane.org mailing list archives
#'
#' @param string query
#' @param group search on which gmane group
#' @param author author of the email
#' @param sort sort order criteria
#' @param op how to combine words
#' @param r.group R related group to use, the default "*" means all r related groups
gmaneSearch <- function( string,
group = paste( "gmane.comp.lang.r.", r.group, sep = ""),
author = "", sort = c("relevance", "date", "revdate"),
op = c("and", "or"),
r.group = "*" ){
sort <- match.arg(sort)
op <- match.arg( op )
url <- sprintf(
'http://search.gmane.org/?query=%s&author=%s&group=%s&sort=%s&DEFAULTOP=%s',
gsub( ' +', '+', string), author, group, sort, op )
url <- URLencode( url )
browseURL( url )
}
#' retrieves the list of gmane groups associated with a prefix
#'
#' @param prefix group prefix
gmaneGroups <- function( prefix = "gmane.comp.lang.r." ){
url <- URLencode( sprintf( "http://dir.gmane.org/index.php?prefix=%s", prefix) )
txt <- grep( '^<tr.*<td align=right.*<a', readLines( url ), value = TRUE )
rx <- '^.*?<a href="(.*?)">(.*?)</a>.*<td>(.*?)</td>.*$'
out <- data.frame(
url = gsub( rx, "\\1", txt ),
group = gsub( rx, "\\2", txt ),
description = gsub( rx, "\\3", txt ),
stringsAsFactors = FALSE
)
out$group <- sub( "...", ".*", out$group, fixed = TRUE )
out
}
So for example:
R> gmaneSearch ("browser prompt", author="romain" )
pops up this page