Pastebin Search

wild

Tue May 05 2020
    private PostingsList Get_Wildcard_Postings_List(Query query, QueryType queryType, RankingType rankingType, NormalizationType normType)
    {
        ArrayList<HashSet<String>> query_terms_arr = kgIndex.Get_Wildcard_Query_Terms(query);
        query.queryterm.clear();

        if(queryType == QueryType.RANKED_QUERY)
        {
            HashSet<String> all_query_terms = new HashSet<>();
            for(HashSet<String> query_terms : query_terms_arr)
            {
                all_query_terms.addAll(query_terms);
            }
            for(String term : all_query_terms)
            {
                query.queryterm.add(query.new QueryTerm(term, 1.0));
            }
            return search(query, queryType, rankingType, normType);
        }
        else
        {
            ArrayList<PostingsList> pl_arr = new ArrayList<>();
            for(int a = 0; a < query_terms_arr.size(); a++)
            {
                PostingsList pl = null;
                Iterator<String> it = query_terms_arr.get(a).iterator();
                if(it.hasNext()) pl = index.getPostings(it.next());
                while(it.hasNext())
                {
                    pl.Merge_Postings_List(index.getPostings(it.next()));
                }
                pl.SortBasedOnDocIds();
                pl_arr.add(pl);
            }
            
            PostingsList pl = pl_arr.get(0);
            for(int a = 1; a < pl_arr.size(); a++)
            {
                if(queryType == QueryType.INTERSECTION_QUERY)
                    pl = IntersectionAlgorithmHelper(pl, pl_arr.get(a));
                else
                    pl = PhraseAlgorithmHelper(pl, pl_arr.get(a));
                if(pl == null) return null;
            }
            if(pl == null) return null;
            pl = Remove_Duplicates_From_Postings_List(pl);
            return pl;
        }
    }

    private PostingsList Remove_Duplicates_From_Postings_List(PostingsList postings_list)
    {
        PostingsList return_postings_list = new PostingsList();
        HashSet<Integer> doc_ids = new HashSet<>();
        for(int a = 0; a < postings_list.size(); a++)
        {
            PostingsEntry pe = postings_list.get(a);
            if(doc_ids.contains(pe.docID) == false)
            {
                doc_ids.add(pe.docID);
                return_postings_list.AddPostingsEntry(pe);
            }
        }
        return return_postings_list;
    }

    private Boolean Is_Wildcard_Query(Query query)
    {
        for(int a = 0; a < query.queryterm.size(); a++)
        {
            if(query.queryterm.get(a).term.contains("*"))
            {
                return true;
            }
        }
        return false;
    }

    /**
     *  Searches the index for postings matching the query.
     *  @return A postings list representing the result of the query.
     */
    public PostingsList search(Query query, QueryType queryType, RankingType rankingType, NormalizationType normType)
    {
        if (query.queryterm == null) return null;
        else if(query.queryterm.size() == 0) return null;
        else if(Is_Wildcard_Query(query) == true) return Get_Wildcard_Postings_List(query, queryType, rankingType, normType);

        if(queryType == QueryType.INTERSECTION_QUERY) return IntersectionAlgorithm(query);
        else if(queryType == QueryType.PHRASE_QUERY) return PhraseAlgorithm(query);
        else if(queryType == QueryType.RANKED_QUERY)
        {
            if(rankingType == RankingType.TF_IDF)
            {
                PostingsList postings_list = TF_IDF_Algorithm(query, false, normType);
                boolean ndcg = true;
                if(ndcg == true) Caluclate_NDCG(postings_list);
                return postings_list;
            }
            else if(rankingType == RankingType.PAGERANK) return PAGERANK_Algorithm(query, false);
            else if(rankingType == RankingType.COMBINATION) return COMBINATION_Algorithm(query, normType);
            else if(rankingType == RankingType.HITSRANKER) return HITSRanker_Algorithm(query);
            else return null;
        }
        else return null;
    }