Citation Network Analysis with NetworkX

Analyzing a citation network to identify influential papers and research trends

import networkx as nx
import matplotlib.pyplot as plt
import random

# Create a sample citation network
G = nx.DiGraph()
papers = [f"Paper_{i}" for i in range(1, 21)]  # 20 papers
for paper in papers:
    G.add_node(paper, year=random.randint(2000, 2023))

# Add random citations
for i in range(len(papers)):
    for j in range(i + 1, len(papers)):
        if random.random() < 0.3:  # 30% chance of citation
            if G.nodes[papers[i]]['year'] < G.nodes[papers[j]]['year']:
                G.add_edge(papers[j], papers[i])
                G.add_edge(papers[i], papers[j])

# Calculate in-degree (number of citations received)
in_degree = dict(G.in_degree())
print("Top 5 most cited papers:")
for paper, citations in sorted(in_degree.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{paper}: {citations} citations")

# Calculate PageRank to identify influential papers
pagerank = nx.pagerank(G)
print("\nTop 5 influential papers (PageRank):")
for paper, score in sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{paper}: {score:.3f}")

# Identify research fronts (papers with high out-degree and recent year)
out_degree = dict(G.out_degree())
research_fronts = sorted(papers, key=lambda x: (out_degree[x], G.nodes[x]['year']), reverse=True)[:5]
print("\nTop 5 potential research fronts:")
for paper in research_fronts:
    print(f"{paper}: {out_degree[paper]} citations made, Year: {G.nodes[paper]['year']}")

# Visualize the network
pos = nx.spring_layout(G)
plt.figure(figsize=(12, 8))

# Color nodes based on year
node_colors = [[node]['year'] - 2000) / 23) for node in G.nodes()]
nx.draw(G, pos, with_labels=True, node_color=node_colors, node_size=300, font_size=8, 
        font_weight='bold', arrows=True, arrowsize=10)

# Adjust node size based on in-degree (citations received)
node_sizes = [300 + 100 * in_degree[node] for node in G.nodes()]
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, alpha=0.7)

plt.title("Citation Network")

# Add a colorbar to show the year scale
sm =, norm=plt.Normalize(vmin=2000, vmax=2023))
cbar = plt.colorbar(sm)
cbar.set_label('Year of Publication')