When submitting to PNAS, you’re required to suggest NAS members. Since there are so many of them, manually browsing their profiles on the website is incredibly tedious. To solve this, I had AI write a Python script to collect information for members in relevant fields.The results were great—check out the code below. Beyond just writing code, AI is becoming an autonomous agent through tools like OpenClaw, bridging the gap between instructions and results.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor
BASE = "https://nrc88.nas.edu"
SEARCH_URL = "https://nrc88.nas.edu/pnas_search/default.aspx"
HEADERS = {
"User-Agent": "Mozilla/5.0"
}
def safe_request(url, retries=3):
for i in range(retries):
try:
r = requests.get(url, headers=HEADERS, timeout=20)
return r
except Exception as e:
print("Retry", i + 1, url)
time.sleep(2)
return None
def get_hidden_fields(soup):
data = {}
for tag in soup.select("input[type=hidden]"):
data[tag.get("name")] = tag.get("value", "")
return data
def get_page(session, discipline_id, page_index, soup):
data = get_hidden_fields(soup)
event_target = f"ucSearch$ucSearchResults$datalistPaging$ctl0{page_index}$linkButtonPage"
data["__EVENTTARGET"] = event_target
data["__EVENTARGUMENT"] = ""
r = session.post(
SEARCH_URL + f"?disciplineID={discipline_id}",
data=data,
headers=HEADERS,
timeout=20
)
return BeautifulSoup(r.text, "html.parser")
def collect_editor_links(discipline_id):
session = requests.Session()
r = session.get(
SEARCH_URL,
params={"disciplineID": discipline_id},
headers=HEADERS,
timeout=20
)
soup = BeautifulSoup(r.text, "html.parser")
editors = {}
page_links = soup.select("#ucSearch_ucSearchResults_datalistPaging a")
total_pages = len(page_links)
print("Total pages:", total_pages)
for page in range(total_pages):
if page > 0:
soup = get_page(session, discipline_id, page, soup)
time.sleep(1)
for a in soup.select("a[href*='memberDetails.aspx']"):
name = a.text.strip()
link = BASE + "/pnas_search/" + a["href"]
editors[link] = name
return editors
def parse_editor(link):
try:
r = safe_request(link)
if not r:
return None
soup = BeautifulSoup(r.text, "html.parser")
record = {
"name": "",
"location": "",
"primary_field": "",
"secondary_field": "",
"election_citation": "",
"research_interests": "",
"profile_link": link
}
table = soup.find("table", class_="detailsTable")
if not table:
return record
rows = table.find_all("tr")
current_section = None
for row in rows:
name_cell = row.find("td", class_="detailsName")
value_cell = row.find("td", class_="detailsValue")
if name_cell and value_cell:
key = name_cell.get_text(strip=True)
value = value_cell.get_text(" ", strip=True)
if key == "Name":
record["name"] = value
elif key == "Location":
record["location"] = value
elif key == "Primary Field":
record["primary_field"] = value
elif key == "Secondary Field":
record["secondary_field"] = value
th = row.find("th")
if th:
title = th.get_text(strip=True)
if "Election Citation" in title:
current_section = "election"
elif "Research Interests" in title:
current_section = "research"
long_td = row.find("td", class_="detailsLongValue")
if long_td:
text = long_td.get_text(" ", strip=True)
if current_section == "election":
record["election_citation"] = text
elif current_section == "research":
record["research_interests"] = text
return record
except Exception as e:
print("Error:", link, e)
return None
def main(discipline_ids):
all_links = {}
for d in discipline_ids:
print("Scraping discipline:", d)
editors = collect_editor_links(d)
all_links.update(editors)
all_links = list(set(all_links))
print("Total editors:", len(all_links))
results = []
with ThreadPoolExecutor(max_workers=3) as executor:
records = executor.map(parse_editor, all_links)
for r in records:
if r:
results.append(r)
df = pd.DataFrame(results)
df.to_csv("./pnas_member_editors_full.csv", index=False)
print("Saved:", len(df))
if __name__ == "__main__":
discipline_ids = [24, 28, 52]
main(discipline_ids)