Full Source Code
import pandas as pd
from IPython.display import display
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
articles_df = pd.read_csv('shared_articles.csv')
print(articles_df.shape)
print(articles_df['timestamp'].head(5))
articles_df = articles_df[articles_df['eventType'] == 'CONTENT SHARED']
articles_df = articles_df[articles_df['lang'] == 'en']
print(articles_df.shape)
articles_df = pd.DataFrame(articles_df, columns=['contentId', 
                                                'authorPersonId', 
                                                'content', 
                                                'title', 
                                                'text'])
articles_df['soup'] = articles_df.apply(create_soup, axis=1)
def create_soup(x):
    soup = ' '.join(x['text'])
    return soup
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(articles_df['text'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix, True)
display(cosine_sim.shape)
display(cosine_sim)
metadata = articles_df.reset_index()
indices = pd.Series(metadata.index, index=metadata['title']).drop_duplicates()
display(indices[:10])
print(get_recommendations('The Rise And Growth of Ethereum Gets Mainstream Coverage', indices, cosine_sim,
                            metadata))
def get_recommendations(title, indices, cosine_sim, data):
    # Get the index of the article that matches the title
    idx = indices[title]
    # Get the pairwsie similarity scores of all articles with that article
    sim_scores = list(enumerate(cosine_sim[idx]))
    # Sort the articles based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]
    # Get the article indices
    movie_indices = [i[0] for i in sim_scores]
    # Return the top 10 most similar articles
>     return data['title'].iloc[movie_indices]
Authored by
https://medium.com/web-mining-is688-spring-2021/article-recommendation-system-using-python-8b0fec6e6de8
Article.csv file link
https://www.kaggle.com/datasets/gspmoreira/articles-sharing-reading-from-cit-deskdrop?select=shared_articles.csv