Chapter 1 + Section 2.1 Introduction


attachment:InformationRetreivalProcess.jpg

Information Retrieval Process


Section 2.2 A taxonomy of Information Retrieval Models

Section 2.3 Retrieval: Ad hoc and Filtering

The following is the formal definition for IR from MIR p 23.

\usepackage{amsmath}%
\setcounter{MaxMatrixCols}{30}%
\usepackage{amsfonts}%

\usepackage{amssymb}%
\usepackage{graphicx}
\usepackage{geometry}
\newtheorem{theorem}{Theorem}
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newenvironment{proof}[1][Proof]{\noindent\textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\geometry{left=0.5in,right=0.5in,top=0.5in,bottom=0.5in}

%%end-prologue%%

\begin{definition}
An information retrieval model is a quadruple $D,Q,F,R(q_i , d_j))$ where 
\begin{enumerate}
\item $D$ is a set composed of logical views (or representations) for the {\bf documents} in the collection.
\item $Q$ is a set composed of logical views (or representations) for the user information needs. Such representations are called {\bf queries}
\item $F$ is a {\bf framework} for modeling document representations, queries and their relationships.
\item $R(q_i,d_j)$ is a {\bf ranking function} wich associates a real number with a query $q_i \in Q$ and a document represenation $d_j \in D$. Such ranking defines an ordering among the documents with regard to the query $q_i$.
\end{enumerate}
\end{definition}

Section 2.5.1 Basic Concepts of Classic IR

\usepackage{amsmath}%
\setcounter{MaxMatrixCols}{30}%
\usepackage{amsfonts}%
\usepackage{amssymb}%
\usepackage{graphicx}
\usepackage{geometry}
\newtheorem{theorem}{Theorem}
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newenvironment{proof}[1][Proof]{\noindent\textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\geometry{left=0.5in,right=0.5in,top=0.5in,bottom=0.5in}

%%end-prologue%%

\begin{definition}
Let $t$ be the number of index terms in the system and $k_i$ be a generic index term. $K={k_1,...,k_t}$ is the set of all index terms. A weight $w_{i,j} > 0$ is associated with each index term $k_i$ of  a document $d_j$. For an index term which does not appear in the document text, $w_{i,j}=0$. With document $d_j$ is associated an index term vector $\vec{d}_{j}=(w_{1,j},w_{2,j},...,w_{t,j})$. Further, let $g_{i}$ be a function that returns the wieght associated with the index term $k_{i}$ in any $t$-dimensional vector (i.e., $g_{i}(\vec{d}_{j})=w_{i,j}$).
\end{definition}

Section 2.5.2 Boolean Model

normal form as latex2($[\vec{q}_{dnf}=(1,1,1)\vee (1,1,0)\vee (1,0,0)]$).

\usepackage{amsmath}%
\setcounter{MaxMatrixCols}{30}%
\usepackage{amsfonts}%
\usepackage{amssymb}%
\usepackage{graphicx}
\usepackage{geometry}
\newtheorem{theorem}{Theorem}
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newenvironment{proof}[1][Proof]{\noindent\textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\geometry{left=0.5in,right=0.5in,top=0.5in,bottom=0.5in}

%%end-prologue%%

\begin{definition}
For the Boolean model, the index term weight variables are all binary ie.e, $w_{i,j} \in {0,1}$. A query $q$ is a conventional boolean expression. Let $\vec{q}_{dnf}$ be the disjunctive normal form for the query $q$. further, let $\vec{q}_{cc}$ be any of the conjunctive components of $\vec{q}_{dnf}$. The similarity of a document $d_j$ to the query $q$ is defined as 
\begin{equation}
sim(d_{j},q)= \left\{
  \begin{array}{ll}
  1 & if~\exists \vec{q}_{cc}~|~(\vec{q}_{cc}\in \vec{q}_{dnf})\wedge (\forall k_{i},g_{i}(\vec{d}_{j})=g_{i}(\vec{q}_{cc}) \\
  0 & otherwise
  \end{array}
\right.
\end{equation}
If $sim(d_j,q)=1$ then the Boolean model predicts that the document $d_j$ is relevant to the query $q$ (it might not be). Otherwise, the prediction is that the document is not relevant.

\end{definition}