import { stopWords } from "../features/postTracker/stopWords";
import unidecode from "unidecode";

/* INDEXAR COPYS PARA LA BUSQUEDA*/
export const indexKeywords = (media) => {
  //Copys sin palabras repetidas
  const keywords = removeRepeatedWords(media);
  return keywords;
};

/* COPYS SIN PALABRAS REPETIDAS*/
export const removeRepeatedWords = (media) => {
  // Utilizar un conjunto en lugar de un array
  let keywords = new Set();
  media.forEach((post) => {
    if (post.caption) {
      //Limpiar Copy de #, acentos y simbolos alfanuméricos
      const words = cleanCopys(post);
      for (const word of words) {
        if (word !== "" && !isStopWord(word)) {
          keywords.add(word); // Agregar la palabra al conjunto
        }
      }
    }
  });
  return Array.from(keywords); // Convertir el conjunto de nuevo en un array
};

/* LIMPIAR COPYS */
export const cleanCopys = (post) => {
  //Eliminar los hashtags
  const newCaption = deleteHashtags(post);
  //Eliminar caracteres con acentos, diacriticos, etc.
  let text = unidecode(newCaption);
  //Convertir texto en minusculas y eliminar caracteres alfanuméricos
  const words = deleteSpecialCharacters(text);
  return words;
};

/* ELIMINAR CARACTERES ALFANUMÉRICOS */
export const deleteSpecialCharacters = (text) => {
  const words = text.toLowerCase().split(/\W+/);
  return words;
};

/* ELIMINAR HASHTAGS */
export const deleteHashtags = (post) => {
  const caption = post.caption.split(" "); //devolver un array con las palabras separadas por un espacio en la cadena.
  const captionWithoutHashtag = caption.filter((currentWord) => {
    return !currentWord.includes("#"); //verificar que no incluya #
  });
  const newCaption = captionWithoutHashtag.join(" "); //transformar array en cadena nuevamente.
  return newCaption;
};

/* ELIMINAR STOP WORDS DE UN TEXTO */
export const isStopWord = (word) => {
  return stopWords.includes(word.toLowerCase());
};

/* ORDENAR FRECUENCIAS DE MAYOR A MENOR */
export const sortKeywordsFrequency = (obj) => {
  // Convert the object to an array of key-value pairs
  const arrayOfPairs = Object.entries(obj);
  // Sort the array of pairs by the value (in descending order)
  const sortedArray = arrayOfPairs.sort((a, b) => b[1] - a[1]);
  return sortedArray;
};

/* CONTAR KEYWORDS REPETIDAS EN CADA CUENTA*/
export const accountKeywordsFreq = (media) => {
  let keywordsFrequency = {};
  media.forEach((post) => {
    if (post.caption) {
      //Limpiar Copy de #, acentos y simbolos alfanuméricos
      let words = cleanCopys(post);
      //Eliminar stopwords y espacios en blanco
      words = words.filter((word) => !isStopWord(word) && word !== "");
      //Agregar Palabras en el objeto keywordsFrequency (Palabra:cuenta)
      words.forEach((keyword) => {
        keywordsFrequency[keyword] = (keywordsFrequency[keyword] || 0) + 1;
      });
    }
  });
  //Array de arrays con pares key:value (keyword:count) ordenados de forma descendiente
  const sortedArray = sortKeywordsFrequency(keywordsFrequency);
  return sortedArray;
};

/* CONTAR KEYWORDS REPETIDAS EN CADA CATEGORIA  */
export const categoryKeywordsFreq = (keywords) => {
  let keywordsFrequency = {};
  keywords.forEach((keyword) => {
    keywordsFrequency[keyword] = (keywordsFrequency[keyword] || 0) + 1;
  });
  const sortedArray = sortKeywordsFrequency(keywordsFrequency);
  return sortedArray;
};

