From d461a7211f0df57f26735939703c7466e3274ccd Mon Sep 17 00:00:00 2001
From: Martin Goik <goik@hdm-stuttgart.de>
Date: Wed, 18 Jan 2023 15:31:46 +0100
Subject: [PATCH] Hash value clashes

---
 Doc/Sd1/CoreClasses/coreClasses.xml | 107 ++++++++++++++++++++--------
 1 file changed, 79 insertions(+), 28 deletions(-)

diff --git a/Doc/Sd1/CoreClasses/coreClasses.xml b/Doc/Sd1/CoreClasses/coreClasses.xml
index 9ae0e8da4..de66a8e2e 100644
--- a/Doc/Sd1/CoreClasses/coreClasses.xml
+++ b/Doc/Sd1/CoreClasses/coreClasses.xml
@@ -526,17 +526,18 @@ public boolean equals(Object anObject) {
             <para><code language="java">true == a.equals(b)</code> ⟹ <code
             language="java">a.hashCode() == b.hashCode()</code>.</para>
 
-            <para>An ideal/perfect <methodname>hashCode()</methodname> method
-            in addition will return different values whenever two instances
-            <code>a</code> and <code>b</code> differ in value with respect to
-            the underlying <methodname>equals()</methodname> method:</para>
+            <para>A so called <emphasis>perfect</emphasis>
+            <methodname>hashCode()</methodname> method in addition will return
+            different values whenever two instances <code>a</code> and
+            <code>b</code> differ in value with respect to the underlying
+            <methodname>equals()</methodname> method:</para>
 
             <para><code language="java">false == a.equals(b)</code> ⟺ <code
             language="java">a.hashCode() != b.hashCode()</code>.</para>
 
-            <para>Combining these two statements a perfect hashCode() method
-            will have the following property with respect to its corresponding
-            <methodname>equals()</methodname> method:</para>
+            <para>Combining these two statements a
+            <emphasis>perfect</emphasis> hashCode() method will have the
+            following property:</para>
 
             <para><code language="java">a.equals(b) == (a.hashCode() ==
             b.hashCode())</code></para>
@@ -565,6 +566,23 @@ public boolean equals(Object anObject) {
             So method 2 requiring just two additions offers (slightly) better
             runtime performance at the expense of a higher hash value
             collision rate.</para>
+
+            <note>
+              <para>Perfect hash functions are rare with respect to real world
+              modeling problems. In the current example
+              <classname>Timeperiod</classname> instances are limited by 24
+              hours, 59 minutes and 59 seconds. This limit is equal to 89999
+              seconds fitting well into the count of <inlineequation>
+                  <m:math display="inline">
+                    <m:msup>
+                      <m:mi>2</m:mi>
+
+                      <m:mi>32</m:mi>
+                    </m:msup>
+                  </m:math>
+                </inlineequation> different <code language="java">int</code>
+              values.</para>
+            </note>
           </answer>
         </qandaentry>
       </qandadiv>
@@ -577,7 +595,7 @@ public boolean equals(Object anObject) {
       <qandadiv>
         <qandaentry>
           <question>
-            <para>In the previous exercise we found an ideal
+            <para>In the previous exercise we found a perfect
             <methodname>hashCode()</methodname> implementation:</para>
 
             <programlisting language="java">public class TimePeriod {
@@ -588,7 +606,9 @@ public boolean equals(Object anObject) {
   }
 }</programlisting>
 
-            <para>Is this possible for instances of String as well?</para>
+            <para>Does a perfect hash function exist for <link
+            xlink:href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html">String</link>
+            instances as well?</para>
 
             <tip>
               <para>Consider the possible number of different strings.</para>
@@ -596,19 +616,27 @@ public boolean equals(Object anObject) {
           </question>
 
           <answer>
-            <para>It is not possible to construct a perfect
-            <methodname>hashCode()</methodname> method acting on arbitrary
-            strings. A Java <classname
+            <para>It is not possible to construct a perfect <link
+            xlink:href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#hashCode()">hashCode()</link>
+            method acting on strings. A Java <classname
             xlink:href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html">String</classname>
             consists of individual <code language="java">char</code> elements
-            each requiring two bytes. Considering strings of fixed length we
-            have the following number of different strings:</para>
+            each requiring two bytes representing <inlineequation>
+                <m:math display="inline">
+                  <m:msup>
+                    <m:mi>2</m:mi>
+
+                    <m:mi>16</m:mi>
+                  </m:msup>
+                </m:math>
+              </inlineequation> different characters. Depending on a string's
+            length we have:</para>
 
             <informaltable border="1">
               <tr>
-                <th>Number of chars</th>
+                <th>String length (number of characters)</th>
 
-                <th>Number of possible strings</th>
+                <th>Number of different strings</th>
               </tr>
 
               <tr>
@@ -666,22 +694,45 @@ public boolean equals(Object anObject) {
               </tr>
             </informaltable>
 
-            <para>A four byte <code language="java">int</code> only offers
-            <inlineequation>
+            <para>Thus considering just the union of zero (empty), one- and
+            two character strings we have <inlineequation>
+                <m:math display="inline">
+                  <m:mrow>
+                    <m:mi>1</m:mi>
+
+                    <m:mo>+</m:mo>
+
+                    <m:msup>
+                      <m:mi>2</m:mi>
+
+                      <m:mi>16</m:mi>
+                    </m:msup>
+
+                    <m:mo>+</m:mo>
+
+                    <m:msup>
+                      <m:mi>2</m:mi>
+
+                      <m:mi>32</m:mi>
+                    </m:msup>
+                  </m:mrow>
+                </m:math>
+              </inlineequation> possibilities exceeding the <inlineequation>
                 <m:math display="inline">
                   <m:msup>
                     <m:mi>2</m:mi>
 
-                    <m:mrow>
-                      <m:mi>32</m:mi>
-                    </m:mrow>
+                    <m:mi>32</m:mi>
                   </m:msup>
                 </m:math>
-              </inlineequation> different values. Thus even mapping just one-
-            and two-<xref linkend="glo_unicode"/> character strings exceeds
-            the number of different <code language="java">int</code> values
-            thus requiring different string instances being mapped to
-            identical hash values. Consider for example:</para>
+              </inlineequation> count of different <code
+            language="java">int</code> values. Thus hash value clashes are
+            inevitable.</para>
+
+            <para>The <xref linkend="glo_JDK"/>'s <link
+            xlink:href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/String.html#hashCode()">String.hashCode()</link>
+            implementation already reveals conflicts for <xref
+            linkend="glo_ASCII"/> strings of length 2:</para>
 
             <informaltable border="1">
               <tr>
@@ -691,10 +742,10 @@ public boolean equals(Object anObject) {
               </tr>
 
               <tr>
-                <td valign="top"><programlisting language="java">System.out.println("hashcode of AA: " + "Aa".hashCode());
+                <td valign="top"><programlisting language="java">System.out.println("hashcode of Aa: " + "Aa".hashCode());
 System.out.println("hashcode of BB: " + "BB".hashCode());</programlisting></td>
 
-                <td valign="top"><screen>hashcode of AA: 2112
+                <td valign="top"><screen>hashcode of Aa: 2112
 hashcode of BB: 2112</screen></td>
               </tr>
             </informaltable>
-- 
GitLab