IH/iihmsp13/ci2.tex

   1
   2 % It first synthesizes notations (Sec.~\ref{sec:ci2:notations}), and the scheme is
   3 % next formally presented as an iterative process (Sec.~\ref{sub:ci2:scheme}).
   4 % %A discussion (Sec.~\ref{sub:ci2:discussion})
   5 % %on the correctness and completeness of this algorithm follows these sections.
   6 % Finally, the last part shows how to decide whether an host image is watermarked or not.
   7
   8
   9
  10
  11 \subsection{Notations used for \CID}\label{sec:ci2:notations}
  12
  13
  14
  15
  16 \begin{itemize}
  17 \item $x^0 \in \mathbb{B}^\mathsf{N}$ is vector of the $\mathsf{N}$
  18   selected bits of a given host
  19   media where the watermark will be embedded.
  20   It is expressed as a vector of $\mathsf{N}$ Boolean values.
  21   In this work and in experimentations,  $x^0$ is defined as
  22   the LSBs of the host.
  23
  24   \item $m^0 \in \mathbb{B}^\mathsf{P}$ is the watermark message to embed into $x^0$.
  25     This is a  vector of $\mathsf{P}$ Boolean values.
  26   \item $S_p \in \mathbb{S}_\mathsf{N}$ is a strategy called \textbf{place strategy}. Intuitively, this sequence
  27     defines which element of $x$ is modified at each iteration.
  28   \item $S_c \in \mathbb{S}_\mathsf{P}$ is a strategy called \textbf{choice strategy}. It
  29     defines which element of the watermark is embedded at each iteration.
  30   \item Lastly, $S_m \in \mathbb{S}_\mathsf{P}$ is a strategy called \textbf{mixing strategy}. This sequence gives which element of the watermark is switched at each iteration.
  31 \end{itemize}
  32
  33 In what follows, $x^0$ and $m^0$ are sometimes replaced by
  34 $x$ and $m$ for the sake of brevity,
  35 when such abridge does not introduce confusion.
  36
  37
  38 \subsection{The $\CID$ scheme}\label{sub:ci2:scheme}
  39
  40
  41 With this material and for all
  42 $(n,i,j)$ in
  43 $\mathds{N}^{\ast} \times \llbracket 0;\mathsf{N}-1\rrbracket \times \llbracket
  44 0;\mathsf{P}-1\rrbracket$,
  45 the $\CID$ algorithm is defined by:
  46
  47 \begin{equation*}
  48 \left\{
  49 \begin{array}{l}
  50 x_i^n=\left\{
  51 \begin{array}{ll}
  52 x_i^{n-1} & \text{ if }S_p^n\neq i \\
  53 m_{S_c^n}^{n-1} & \text{ if }S_p^n=i.
  54 \end{array}
  55 \right.
  56 \\
  57 \\
  58 m_j^n=\left\{
  59 \begin{array}{ll}
  60 m_j^{n-1} & \text{ if }S_m^n\neq j \\
  61  & \\
  62 \overline{m_j^{n-1}} & \text{ if }S_m^n=j.
  63 \end{array}
  64 \right.
  65 \end{array}
  66 \right.
  67 \end{equation*}
  68 %\end{definition}
  69 \noindent where $\overline{m_j^{n-1}}$ is the Boolean negation of $m_j^{n-1}$.
  70 % It can be also remarked that simple are realized on the vector $m$ with the
  71 % strategy $S_m$ as explained in Section~\ref{sec:chaotic iterations}.
  72 The stego-content is the Boolean vector $y = x^l \in \mathbb{B}^{\mathsf{N}}$ provided the following constraints are applied:
  73
  74
  75 \begin{enumerate}
  76 \item The number $l$ of iterations is sufficiently large (see details
  77 below).
  78 \item \label{itm2:Sc} Let  $\Im(S_p)$ be the set
  79   $ \{S^1_p, S^2_p, \ldots,  S^l_p\}$ of cardinality $k$, $k \leq l$  (repetitions are removed in a
  80 set).
  81   This set contains all the elements of $x$ that have been modified
  82   along the iteration process.
  83   Let us consider $\Im(S_c)_{|D}$ defined by
  84   $\{S^{d_1}_c, S^{d_2}_c, \ldots,  S^{d_k}_c\}$
  85   where
  86   $d_i$ is the last iteration that has modified the element $i \in \Im(S_p)$.
  87   We require that this set is equal
  88   to $\llbracket 0 ;\mathsf{P} -1 \rrbracket$.
  89 \end{enumerate}
  90
  91
  92
  93 Let us discuss the constraints given above.
  94 The first one implies that the number of iterations is greater than a
  95 given threshold. This requirement has both practical and theoretical reasons.
  96 Theoretically speaking, the ability to
  97 successfully pass statistical tests is
  98 directly linked to this number of iterations.
  99 And, in practice, this value is bounded
 100 by the size of the host content.
 101 %\CG{Parler d'entropie, d'exposant de Lyapounov pour ce nombre d'itérations}
 102 %\JFC{en deduire une borne suffisante pour itérer. Pratiquement j'ai fais
 103 %4 fois la taille du support}.
 104 The second constrain, for its part, addresses the method's completeness and correctness, as detailed below.
 105
 106 \subsection{Correctness and Completeness Studies}\label{sub:ci2:discussion}
 107
 108
 109 Without attack, the scheme has to ensure that the user can always extract a
 110 message and that this latter is the watermark, provided the user has the
 111 correct keys. These two demands
 112 correspond respectively to the study
 113 of completeness and of correctness
 114 for the proposed approach.
 115 To achieve this study, let us firstly prove the following assessment.
 116
 117
 118 \begin{proposition}
 119 In section~\ref{sub:ci2:scheme}, item  \ref{itm2:Sc}
 120 is a  necessary and sufficient condition
 121 to allow message to be extracted
 122 from the host.
 123 \end{proposition}
 124 \begin{proof}
 125 For sufficiency, let $d_i$ be the last iteration (date) the element $i \in \Im(S_p)$
 126 of $x$ has been modified:% is defined by
 127 $$
 128 d_i = \max\{j | S^j_p = i \}.
 129 $$
 130 Let $D=\{d_i|i \in \Im(S_p) \}$.
 131 The set $\Im(S_c)_{|D}$ is thus
 132 the restriction of  the image of $S_c$ to $D$.
 133
 134
 135 The host that results from this iteration scheme is thus
 136 $(x^l_0,\ldots,x^l_{\mathsf{N}-1})$ where
 137 $x^l_i$ is either $x^{d_i}_i$ if $i$ belongs to $\Im(S_p)$ or $x^0_i$ otherwise.
 138 Moreover, for each $i \in \Im(S_p)$, the element $x^{d_i}_i$ is equal to
 139 $m^{d_i-1}_{S^{d_i}_c}$.
 140 Thanks to constraint \ref{itm2:Sc}, all the indexes
 141 $j \in \llbracket 0 ;\mathsf{P} -1 \rrbracket$ belong to
 142 $\Im(S_c)_{|D}$.
 143 Let then $j \in \llbracket 0 ;\mathsf{P} -1 \rrbracket$ s.t.
 144 $S^{d_i}_c=j$.
 145 Thus we have all the elements $m^._j$ of the vector $m$.
 146 Let us focus now on some  $m^{d_i-1}_j$.
 147 Thus the value of $m^0_j$ can be  immediately
 148 deduced by counting in $S_c$ how many
 149 times the component $j$ has been switched
 150 before $d_i-1$.
 151
 152 Let us focus now on necessity.
 153 If $\Im(S_c)_{|D} \subsetneq
 154 \llbracket 0 ;\mathsf{P} -1 \rrbracket$,
 155 there exist some $j \in \llbracket 0 ;\mathsf{P} -1 \rrbracket$ that
 156 do not belong to $\Im(S_c)_{|\Im(S_p)}$.
 157 Thus $m_j$ is  not present in $x^l$ and the message cannot be extracted.
 158 \end{proof}
 159
 160 When the constraint \ref{itm2:Sc} is satisfied, we obtain a scheme
 161 that always finds the original message provided the watermarked media
 162 has not been modified.
 163 In that context, correctness and completeness are established.
 164
 165
 166 Thanks to constraint~\ref{itm2:Sc}, the cardinality $k$ of
 167 $\Im(S_p)$ is larger than $\mathsf{P}$.
 168 Otherwise the cardinality of $D$  would be smaller than $\mathsf{P}$
 169 and similar to the cardinality of $\Im(S_c)_{|D}$,
 170 which is contradictory.
 171
 172 One bit of index $j$ of the original message $m^0$
 173 is thus embedded at least twice in $x^l$.
 174 By counting the number of times this bit has been switched in $S_m$, the value of
 175 $m_j$ can be deduced in many places.
 176 Without attack, all these values are equal and the message is immediately
 177 obtained.
 178  After an attack,  the value of $m_j$ is obtained as mean value of all
 179 its occurrences.
 180 The scheme is thus complete.
 181 Notice that if the cover is not attacked, the returned message is always equal to the original
 182 due to the definition of the mean function.
 183
 184
 185 \subsection{Deciding whether a Media is Watermarked}\label{sub:ci2:distances}
 186
 187 Let us consider  a media $y$ that is watermarked with a message $m$.
 188 Let us consider  $y'$ that is an altered version of $y$, \textit{i.e.}, where
 189 some bits have been modified.
 190 Let $m'$ be the message that is extracted from $y'$.
 191
 192 Let us now check how  far the extracted message $m'$ is from $m$.
 193 To achieve this, let us consider
 194 $M = \{ i | m_i = 1 \}$ of the Boolean vector message $m$ and similarly
 195 the set $M'$ for the message $m'$.
 196 Most of similarity measures %, e.g.~\cite{yule1950introduction}  %,Anderberg-Cluster-1973,Rifqi:2000:DPM:342947.342970},
 197 depend on the functions $a$, $b$, $c$, and
 198 $d$, all from
 199 $\mathbb{B}^{\mathsf{P}}  \times \mathbb{B}^{\mathsf{P}}$ to $\mathbb{N}$,
 200 and respectively equal to
 201 $a(m,m') = |M \cap M' |$,
 202 $b(m,m') = |M \setminus M' |$,
 203 $c(m,m') = |M' \setminus M|$, and
 204 $d(m,m') = |\overline{M} \cap \overline{M'}|$
 205 ($|S|$ and $\overline{S}$ respectively
 206 denote  the cardinality
 207 and the complementary
 208 of any set $S$).
 209 In what follows $a$, $b$, $c$, and $d$ respectively stand for
 210 $a(m,m')$, $b(m,m')$, $c(m,m')$, and $d(m,m')$.
 211
 212 According
 213 to~\cite{rifq/others03} %,DBLP:conf/ipmu/LesotR10}
 214 the Fermi-Dirac measure $S_{\textit{FD}}$
 215 is the one that has higher discrimination power, \textit{i.e.},
 216 which  allows a clear separation between correlated vectors and
 217 uncorrelated ones.
 218 The measure is recalled hereafter with
 219 respect to the previously defined scalars $a$, $b$, and $c$.
 220
 221 \begin{eqnarray*}
 222 S_{\textit{FD}}(\varphi) &= &
 223 \dfrac{F_{\textit{FD}}(\varphi) -F_{\textit{FD}}(\frac{\pi}{2})}
 224 {F_{\textit{FD}}(0) -F_{\textit{FD}}(\frac{\pi}{2})},\\
 225 F_{\textit{FD}}(\varphi) &= &
 226 \dfrac{1}{1+\exp(\dfrac{\varphi - \varphi_0}{\gamma})},\\
 227 %\varphi &= &\arctan(\dfrac{b+c}{a})
 228 \end{eqnarray*}
 229 \noindent where $\varphi = \arctan(\dfrac{b+c}{a})$, $\varphi_0$ is $\pi/4$, and $\gamma$ is 0.1.
 230
 231 The distance between $m$ and $m'$ is then computed as
 232 $1-S_{\textit{FD}}(m,m')$ and is thus a real number in $[0;1]$.
 233 If such a distance is behind a threshold, $y'$ will be
 234 declared as watermarked and not watermarked otherwise.
 235 Next section presents a practical evaluation of this approach.