/** @module ghazal */
import puppeteer from 'puppeteer';
import {
rekhta,
languages,
ghazalSortParams,
orderParams,
} from './constants.js';
import {
InvalidLanguageError,
InvalidCountError,
InvalidGhazalSortParamError,
InvalidOrderParamError,
} from './errors.js';
import { isValidCount } from './helpers.js';
/**
* Fetch ghazals from a Rekhta URL using a specified selector.
*
* @async
* @param {String} rekhtaUrl - URL to scrape
* @param {String} selector - HTML selector to fetch data
* @param {Boolean} isSinglePoet - Ghazals of a single poet or not
* @param {Number} count - Count of ghazals to return
* @returns {Promise.<Array.<{ ghazal: String, poet: String, url: String }>> | Promise.<Array.<{ ghazal: String, url: String }>>}
*/
const getGhazals = async (rekhtaUrl, selector, isSinglePoet, count) => {
const browser = await puppeteer.launch({
headless: 'new',
});
const page = await browser.newPage();
await page.goto(rekhtaUrl, {
waitUntil: 'networkidle2',
});
// Retrieve ghazal links
const links = await page.evaluate(
(selector, count) => {
let ghazals = Array.from(document.querySelectorAll(selector));
if (count) ghazals = ghazals.slice(0, count);
const result = ghazals.map((ghazal) => {
const link = ghazal.querySelector('a:nth-child(2)').href;
return link;
});
return result;
},
selector,
count,
);
// Fetch the ghazal content
const ghazals = [];
for (let i = 0; i < links.length; i++) {
const link = links[i];
await page.goto(link, {
waitUntil: 'networkidle2',
});
page.setDefaultNavigationTimeout(0);
const result = await page.evaluate(
(url, isSinglePoet) => {
const ghazal = document
.querySelector('.poemPageContentBody')
.innerText.replace(/(\r\n|\n|\r)/gm, '')
.split('VIDEOS')[0]
.split('RECITATIONS')[0]
.trim();
if (!isSinglePoet) {
// Converting poet's name from upper case to title case
const poet = document
.querySelector('.ghazalAuthor')
.innerText.toLowerCase()
.split(' ')
.map((word) => {
return word.replace(word[0], word[0].toUpperCase());
})
.join(' ');
return { ghazal, poet, url };
}
return { ghazal, url };
},
link,
isSinglePoet,
);
ghazals.push(result);
}
await browser.close();
return ghazals;
};
/**
* Fetch ghazals by a specific tag.
*
* @async
* @param {String} tag - Tag to get ghazals of
* @param {String} language - Language to get results in
* @param {Number} count - Count of ghazals to return
* @param {String} sort - Result sorting parameters
* @param {String} order - Order of sorting
* @throws {InvalidLanguageError}
* @throws {InvalidCountError}
* @throws {InvalidGhazalSortParamError}
* @throws {InvalidOrderParamError}
* @returns {Promise.<Array.<{ ghazal: String, poet: String, url: String }>>}
*/
const getGhazalsByTag = async (
tag,
language = 'en',
count = false,
sort = 'popularity',
order = 'desc',
) => {
if (!languages.includes(language)) throw InvalidLanguageError;
if (count && !isValidCount(count)) throw InvalidCountError;
if (!ghazalSortParams.includes(sort)) throw InvalidGhazalSortParamError;
if (!orderParams.includes(order)) throw InvalidOrderParamError;
tag = tag.toLowerCase().replaceAll(' ', '-');
const url = `${rekhta}/tags/${tag}-shayari/ghazals?lang=${language}&sort=${sort}-${order}`;
const ghazals = await getGhazals(url, '.contentListBody', false, count);
return ghazals;
};
/**
* Fetch ghazals by a specific poet.
*
* @async
* @param {String} poet - Poet to get ghazals of
* @param {String} language - Language to get results in
* @param {Number} count - Count of ghazals to return
* @param {String} sort - Result sorting parameters
* @param {String} order - Order of sorting
* @throws {InvalidLanguageError}
* @throws {InvalidCountError}
* @throws {InvalidGhazalSortParamError}
* @throws {InvalidOrderParamError}
* @returns {Promise.<Array.<{ nazm: String, url: String }>>}
*/
const getGhazalsByPoet = async (
poet,
language = 'en',
count = false,
sort = 'popularity',
order = 'desc',
) => {
if (!languages.includes(language)) throw InvalidLanguageError;
if (count && !isValidCount(count)) throw InvalidCountError;
if (!ghazalSortParams.includes(sort)) throw InvalidGhazalSortParamError;
if (!orderParams.includes(order)) throw InvalidOrderParamError;
poet = poet.toLowerCase().replaceAll(' ', '-');
const url = `${rekhta}/poets/${poet}/ghazals?lang=${language}&sort=${sort}-${order}`;
const ghazals = await getGhazals(url, '.rt_bodyTitle', true, count);
return ghazals;
};
export { getGhazalsByTag, getGhazalsByPoet };