import urllib import xml.etree.ElementTree as ET import cgi def escapeToHTML(text, escapeQuotes=False): #unicodeText = unicode(text, 'utf-8') # convert '\xce\xb3' escaping into u'\u03b3' escaping htmlEscapedText = cgi.escape(text, escapeQuotes) # escape html symbols, like <>& htmlEntityText = htmlEscapedText.encode('ascii', 'xmlcharrefreplace') # encode non-ascii characters into xhtml entities, like γ return htmlEntityText def checkXML(XML, path): if XML.find(path) is not None: if XML.find(path).text is not None: return XML.find(path).text else: return "" else: return "" def digest_authors(authors): author_list = [] for author in authors: author_list.append(checkXML(author, "./Initials") + " " + checkXML(author, "./LastName")) return author_list def digest_issue(issue): issue_string = "" issue_string += checkXML(issue, "./Volume") if issue.find("./Issue") is not None: issue_string += "(" + issue.find("./Issue").text + ")" return issue_string def digest_year(XML): if XML.find("./Article/Journal/JournalIssue/PubDate/Year") is not None: return XML.find("./Article/Journal/JournalIssue/PubDate/Year").text elif XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate") is not None: return XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate").text[0:4] else: return "1900" def search_pubmed(term): params= { 'db': 'pubmed', 'tool': 'test', 'email':'test@test.com', 'term': term, 'usehistory':'y', 'retmax':20 } url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(params) tree = ET.fromstring(urllib.urlopen(url).read()) params['query_key'] = tree.find("./QueryKey").text params['WebEnv'] = tree.find("./WebEnv").text params['retmode'] = 'xml' url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + urllib.urlencode(params) data = urllib.urlopen(url).read() return data def xml_to_papers(data): tree = ET.fromstring(data) articles = tree.findall("./PubmedArticle/MedlineCitation") papers = [] for article in articles: paper = dict() paper["journal_name"] = article.find("./Article/Journal/ISOAbbreviation").text paper["title"] = article.find("./Article/ArticleTitle").text paper["authors"] = digest_authors(article.findall("./Article/AuthorList/Author")) paper["issue"] = digest_issue(article.find("./Article/Journal/JournalIssue")) paper["year"] = digest_year(article) paper["page_num"] = checkXML(article, "./Article/Pagination/MedlinePgn") paper["pmid"] = article.find("./PMID").text paper["doi"] = checkXML(article, "./Article/ELocationID") papers.append(paper) return papers def printCV(): papers = xml_to_papers(search_pubmed("Connelly WM")) #Collect papers in dictionary structure return papers cache = [] #citers = whocitedme(pmid, cache) #cites = [] #citers = whocitedme2(pmid, cites) #num_papers = len(list(flatten(citers))) #family_tree = family(citers) papers = printCV() print "Content-Type: text/html" print print "" print "" print "" print "CGI script output" print "" print "" print "" for paper in papers: print "
" print "

" print " "+ escapeToHTML(paper["title"]) +"" print "

" print "
" authorlist = "" num_authors = len(paper["authors"]) for a in range(num_authors): authorlist += escapeToHTML(paper["authors"][a]) if a < num_authors-1: authorlist += ", " else: authorlist += "." print "

" + authorlist + "

" print "

" if paper["journal_name"][-1] == ".": j_title = paper["journal_name"][0:-1] else: j_title = paper["journal_name"] if len(paper["doi"]) > 0: print " "+ j_title +". "+paper["year"]+" "+ paper["issue"] + ":" + paper["page_num"] + " doi: "+ paper["doi"] + "

" else: print " "+ j_title +". "+paper["year"]+" "+ paper["issue"] + ":" + paper["page_num"] + "

" print "
" print "
" print "" print ""