<!DOCTYPE html>
<!-- ========================================== kroc camen of camen design ============================================= -->
<title>code · Improved Title Case Function for PHP</title>
<link rel="stylesheet" type="text/css" href="/design/design.css" />
<meta name="viewport" content="width=device-width, maximum-scale=1.0, user-scalable=no" />
<link rel="alternate" type="application/rss+xml" href="/code/rss" title="Just code" />
<link rel="canonical" href="/code/title-case" />
<!-- =================================================================================================================== -->
<header>
	<h1><a href="/" rel="index">
		Camen Design
	</a></h1>
	<nav><ul>
		<li><a href="/">all</a></li>
		<li><a href="/projects">projects</a></li>
		<li><a href="http://forum.camendesign.com">forum</a></li>
	</ul><ul>
		<li><a href="/quote/">quote</a></li>
		<li><a href="/writing/">writing</a></li>
		<li><a href="/blog/">blog</a></li>
		<li><a href="/photo/">photo</a></li>
		<li><a href="/code/" rel="tag">code</a></li>
		<li><a href="/art/">art</a></li>
		<li><a href="/link/">link</a></li>
		<li><a href="/poem/">poem</a></li>
		<li><a href="/audio/">audio</a></li>
	</ul><ul>
		<li><a href="/web-dev/">web-dev</a></li>
		<li><a href="/annoyances/">annoyances</a></li>
		<li><a href="/eve/">eve</a></li>
		<li><a href="/code-is-art/">code-is-art</a></li>
		<li><a href="/inspiration/">inspiration</a></li>
		<li><a href="/windows/">windows</a></li>
		<li><a href="/gift/">gift</a></li>
		<li><a href="/gaming/">gaming</a></li>
		<li><a href="/mac/">mac</a></li>
		<li><a href="/osnews/">osnews</a></li>
		<li><a href="/c64/">c64</a></li>
		<li><a href="/linux/">linux</a></li>
	</ul>
	<a rel="previous" href="/code/it_figures">
		older article →
	</a><a rel="next" href="/code/end_of_vfe">
		← newer article
	</a></nav>
</header>
<!-- =================================================================================================================== -->
<article><header>
	<!-- date published or updated -->
	<time pubdate datetime="2010-01-01T19:38:00+00:00">
		<sup>7:38<abbr>pm</abbr> • 2010</sup>
		<abbr title="January">Jan</abbr> 1
	</time>
	<!-- categories -->
	<ul>
		<li><a href="/code/title-case" rel="bookmark tag">code</a></li>
		<li><a href="/web-dev/title-case">web-dev</a></li>
	</ul>
	<!-- licence -->
	<small>
		<a rel="license" href="http://creativecommons.org/licenses/by/3.0/deed.en_GB">c</a>
		share + remix
	</small>
</header>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<section>
<h1>Improved Title Case Function for PHP</h1>
<aside>
	<strong>Update:</strong><br />
	Small words (“in”, “and” <abbr title="et cetera">&amp;c.</abbr>) now capitalise after em or en-dash.
</aside>
<p>
	<strong>John Gruber</strong> originally <a href="http://daringfireball.net/2008/05/title_case" rel="external">made
	available</a> his script to Title Case text, working around the fringe-cases.
</p><p>
	From this, a <a href="http://daringfireball.net/2008/08/title_case_update" rel="external">number of ports</a> were
	made of the script of which particularly noteworthy
	<a href="http://individed.com/code/to-title-case/" rel="external">David Gouch’s Javascript port</a> that was
	smaller, simpler and handled more
	<a href="http://individed.com/code/to-title-case/tests.html" rel="external">fringe cases</a>.
</p><p>
	I’ve ported this to PHP and put it to use on this site. My version is based on David Gouch’s Javascript port,
	<del>unlike the <a href="http://files.nanovivid.com/wordpress/title-case.php" rel="external">WordPress port</a>
	which is, frankly, crap</del>. Ironically, now there’s a
	<a href="http://wordpress.org/extend/plugins/to-title-case/" rel="external">WordPress port</a> that uses my port.
	The circle is complete! <samp>:P</samp>
	<br /><br />
	Code below.
</p>

<pre><code>//original Title Case script © John Gruber &lt;daringfireball.net&gt;
//javascript port © David Gouch &lt;individed.com&gt;
//PHP port of the above by Kroc Camen &lt;camendesign.com&gt;

function titleCase ($title) {
	//remove HTML, storing it for later
	//       HTML elements to ignore    | tags  | entities
	$regx = '/&lt;(code|var)[^&gt;]*&gt;.*?&lt;\/\1&gt;|&lt;[^&gt;]+&gt;|&amp;\S+;/';
	preg_match_all ($regx, $title, $html, PREG_OFFSET_CAPTURE);
	$title = preg_replace ($regx, '', $title);
	
	//find each word (including punctuation attached)
	preg_match_all ('/[\w\p{L}&amp;`\'‘’"“\.@:\/\{\(\[&lt;&gt;_]+-? */u', $title, $m1, PREG_OFFSET_CAPTURE);
	foreach ($m1[0] as &amp;$m2) {
		//shorthand these- "match" and "index"
		list ($m, $i) = $m2;
		
		//correct offsets for multi-byte characters (`PREG_OFFSET_CAPTURE` returns *byte*-offset)
		//we fix this by recounting the text before the offset using multi-byte aware `strlen`
		$i = mb_strlen (substr ($title, 0, $i), 'UTF-8');
		
		//find words that should always be lowercase…
		//(never on the first word, and never if preceded by a colon)
		$m = $i&gt;0 &amp;&amp; mb_substr ($title, max (0, $i-2), 1, 'UTF-8') !== ':' &amp;&amp; 
			!preg_match ('/[\x{2014}\x{2013}] ?/u', mb_substr ($title, max (0, $i-2), 2, 'UTF-8')) &amp;&amp; 
			 preg_match ('/^(a(nd?|s|t)?|b(ut|y)|en|for|i[fn]|o[fnr]|t(he|o)|vs?\.?|via)[ \-]/i', $m)
		?	//…and convert them to lowercase
			mb_strtolower ($m, 'UTF-8')
			
		//else:	brackets and other wrappers
		: (	preg_match ('/[\'"_{(\[‘“]/u', mb_substr ($title, max (0, $i-1), 3, 'UTF-8'))
		?	//convert first letter within wrapper to uppercase
			mb_substr ($m, 0, 1, 'UTF-8').
			mb_strtoupper (mb_substr ($m, 1, 1, 'UTF-8'), 'UTF-8').
			mb_substr ($m, 2, mb_strlen ($m, 'UTF-8')-2, 'UTF-8')
			
		//else:	do not uppercase these cases
		: (	preg_match ('/[\])}]/', mb_substr ($title, max (0, $i-1), 3, 'UTF-8')) ||
			preg_match ('/[A-Z]+|&amp;|\w+[._]\w+/u', mb_substr ($m, 1, mb_strlen ($m, 'UTF-8')-1, 'UTF-8'))
		?	$m
			//if all else fails, then no more fringe-cases; uppercase the word
		:	mb_strtoupper (mb_substr ($m, 0, 1, 'UTF-8'), 'UTF-8').
			mb_substr ($m, 1, mb_strlen ($m, 'UTF-8'), 'UTF-8')
		));
		
		//resplice the title with the change (`substr_replace` is not multi-byte aware)
		$title = mb_substr ($title, 0, $i, 'UTF-8').$m.
			 mb_substr ($title, $i+mb_strlen ($m, 'UTF-8'), mb_strlen ($title, 'UTF-8'), 'UTF-8')
		;
	}
	
	//restore the HTML
	foreach ($html[0] as &amp;$tag) $title = substr_replace ($title, $tag[0], $tag[1], 0);
	return $title;
}</code></pre>

<p>
	Anything broken, please let me know.<br />
	Kind regards,
</p>
</section>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
</article>
<footer>
	<nav><a href="http://forum.camendesign.com">‹ Discuss this in the Forum ›</a></nav>
		
	<a href="mailto:kroc@camendesign.com">kroc@camendesign.com</a>
	<nav>view-source:
		<a href="/code/title-case.rem">Rem</a> •
		<a href="/code/title-case.html">HTML</a> •
		<a href="/design/">CSS</a> •
		<a href="/.system/">PHP</a> •
		<a href="/.htaccess">.htaccess</a>
	</nav>
	<form method="get" action="https://duckduckgo.com">
		<input type="hidden" name="sites" value="camendesign.com" />
		<input type="search" name="q" placeholder="search…" />
		<input type="submit" value="Go" />
	</form>
</footer>
<!-- =================================================================================================== code is art === -->