[Home] [By Thread] [By Date] [Recent Entries]
At 2008-12-10 14:15 -0600, Quinn Dombrowski wrote:
I'm trying to calculate all of the groups of 2+ elements (in the sample data below, words) that appear together in more than one place. Ideally, I'd like to be able to sort descending both by length of group (5-word group, 4-word groups, etc), and by number of places the groups occur (100 places, 99 places, etc.) I also need to be able to list the place numbers where they occur. You don't show how these places are to be listed, so I guessed. I started doing it manually this way but the number of possible combinations quickly became too big a task: Not sure where you are going with the intersects, so I approached this as a grouping problem. Here's a sample of the data. Almost every word appears in multiple places, but each appears only once in the index, which I've used in other applications for matching to avoid re-calculating stats for the word over and over. Any help would be wonderful! I hope the code below helps, though I am a bit unclear on what you want so my comments should reveal what I think you want. . . . . . . . Ken
<place> <place_number>2</place_number> <words> <word>Aa</word> <word>Bbbb</word> <word>C</word> <word>W</word> <word>Zz</word> </words> </place> <place> <place_number>3</place_number> <words> <word>Aa</word> <word>C</word> <word>Bb</word> <word>Qqq</word> <word>Wwww</word> <word>Zz</word> </words> </place> </atlas> T:\ftemp>type quinn.xsl
<?xml version="1.0" encoding="US-ASCII"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="2.0"><xsl:output indent="yes"/> <!--keep track for counting purposes--> <xsl:key name="words" match="word" use="substring(.,1,1)"/> <xsl:template match="atlas">
<!--process the document element as is-->
<xsl:next-match/>
<!--add an index at the end-->
<index>
<!--basing the "underlying word" as the first character-->
<xsl:for-each-group select="//word" group-by="substring(.,1,1)">
<!--sort descending by the number of words in the group-->
<xsl:sort select="count(key('words',substring(.,1,1)))"
order="descending"/>
<!--sort descending by the number of places for the word group-->
<xsl:sort select="count(key('words',substring(.,1,1))/../..)"
order="descending"/>
<!--create the index entry for the word group-->
<index_entry>
<!--embed some diagnostics-->
<xsl:comment select="current-grouping-key(),'=',
'Words:',count(current-group()),
'Places:',count(current-group()/../..)"/>
<xsl:text>
</xsl:text>
<!--what underlying word are we at?-->
<underlying_word>
<xsl:value-of select="current-grouping-key()"/>
</underlying_word>
<!--which words are related?-->
<xsl:for-each-group select="current-group()" group-by=".">
<word><xsl:value-of select="."/></word>
</xsl:for-each-group>
<!--where are these words used?-->
<places>
<xsl:for-each select="current-group()/../..">
<place><xsl:value-of select="place_number"/></place>
</xsl:for-each>
</places>
</index_entry>
</xsl:for-each-group>
</index>
</xsl:template><xsl:template match="@*|node()"><!--identity for all other nodes-->
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template></xsl:stylesheet> T:\ftemp>call xslt2 quinn.xml quinn.xsl quinn.out T:\ftemp>type quinn.out
<?xml version="1.0" encoding="UTF-8"?>
<atlas>
<place>
<place_number>1</place_number>
<words>
<word>Aa</word>
<word>C</word>
<word>Qqq</word>
</words>
</place> <place>
<place_number>2</place_number>
<words>
<word>Aa</word>
<word>Bbbb</word>
<word>C</word>
<word>W</word>
<word>Zz</word>
</words>
</place> <place>
<place_number>3</place_number>
<words>
<word>Aa</word>
<word>C</word>
<word>Bb</word>
<word>Qqq</word>
<word>Wwww</word>
<word>Zz</word>
</words>
</place>
</atlas>
<index>
<index_entry><!--A = Words: 3 Places: 3-->
<underlying_word>A</underlying_word>
<word>Aa</word>
<places>
<place>1</place>
<place>2</place>
<place>3</place>
</places>
</index_entry>
<index_entry><!--C = Words: 3 Places: 3-->
<underlying_word>C</underlying_word>
<word>C</word>
<places>
<place>1</place>
<place>2</place>
<place>3</place>
</places>
</index_entry>
<index_entry><!--Q = Words: 2 Places: 2-->
<underlying_word>Q</underlying_word>
<word>Qqq</word>
<places>
<place>1</place>
<place>3</place>
</places>
</index_entry>
<index_entry><!--B = Words: 2 Places: 2-->
<underlying_word>B</underlying_word>
<word>Bbbb</word>
<word>Bb</word>
<places>
<place>2</place>
<place>3</place>
</places>
</index_entry>
<index_entry><!--W = Words: 2 Places: 2-->
<underlying_word>W</underlying_word>
<word>W</word>
<word>Wwww</word>
<places>
<place>2</place>
<place>3</place>
</places>
</index_entry>
<index_entry><!--Z = Words: 2 Places: 2-->
<underlying_word>Z</underlying_word>
<word>Zz</word>
<places>
<place>2</place>
<place>3</place>
</places>
</index_entry>
</index>
|

Cart



