Esempio: classificare curricula
Supponiamo di avere un sito per la ricerca di lavoro e vogliamo classificare i curricula inseriti per mostrarli agli utenti che cercano delle determinate figure. Dai curricula si possono estrarre una enomre quantità di dati ma supponiamo di voler estrarre solo questi:- se il candidato ha delle certificazioni
- se è una persona loquace
- se è iscritto al golf club
- se ha una laurea o un master
case class Person(
val hasCertifications : Boolean,
val isTalkative : Boolean,
val golfClub : Boolean,
val hasMasterDegree : Boolean,
val job : String
)
val persons = Person(hasCertifications = true, isTalkative = false,
golfClub = false, hasMasterDegree = true,
job = "Programmer") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = false, hasMasterDegree = true,
job = "Junior Programmer") ::
Person(hasCertifications = true, isTalkative = false,
golfClub = false, hasMasterDegree = false,
job = "Programmer") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = false, hasMasterDegree = true,
job = "Seller") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = false, hasMasterDegree = false,
job = "Seller") ::
Person(hasCertifications = true, isTalkative = true,
golfClub = false, hasMasterDegree = false,
job = "Seller") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = true, hasMasterDegree = true,
job = "CEO") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = true, hasMasterDegree = false,
job = "CEO") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = true, hasMasterDegree = false,
job = "CEO") ::
Nil
Nella lista persons ho messo il mio training set. Per usare l'algoritmo ID3, basta importare l'oggetto giusto e usare la lista come training set:
import org.scalarecog.decisiontree._ def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree) val dataset = persons map (p => (toVector(p), p.job)) val tree = new ID3[Boolean,String] buildTree datasetLa funzione toVector server perché la classe ID3 ha bisogno di un Vector.
Ora tree può classificare una nuova persona:
val newPerson = Person(false, false, false, false, "?") assert( tree.classify(toVector(newPerson)) == "Junior Programmer" )Ma sarebbe carino visualizzare l'albero di decisione creato da ID3. Con JGraph è semplicissimo e otteniamo questo: Ehi! È proprio l'algoritmo usato nella vita reale! ^_^
Ecco qui il codice completo:
package scalarecoggraph
import org.scalarecog.decisiontree._
import javax.swing.JFrame
import com.mxgraph.swing.mxGraphComponent
import com.mxgraph.view.mxGraph
class Program(tree : DecisionTree[Vector[Boolean], String], propertyNames : Vector[String]) extends JFrame("ScalaRecog") {
type Tree = DecisionTree[Vector[Boolean], String]
type Vertex = (AnyRef, (Double, Double))
draw()
def draw() {
val graph: mxGraph = new mxGraph
val root = graph.getDefaultParent
def draw(t : Tree, parentPos : (Double, Double), offset : (Int, Int)) : Vertex = {
def createVertex(label : String, action : Vertex => Unit = v => {}) : Vertex = {
val vertexSize = (100, 30)
val newPos = (parentPos._1 + offset._1, parentPos._2 + offset._2)
val created = (graph.insertVertex(root, null, label, newPos._1, newPos._2 , vertexSize._1, vertexSize._2), newPos)
action(created)
created
}
def createEdge(label : String, from : Vertex, to : Vertex) = graph.insertEdge(root, null, label, from._1, to._1)
t match {
case a : DecisionLeaf[Vector[Boolean],String] => createVertex(a.label)
case a : DecisionBranchVector[String,Boolean] =>
createVertex(propertyNames(a.index), n => {
for ( ((label, child), index) <- a.branches.zipWithIndex )
createEdge(label.toString, n, draw(child, n._2, (120*index, offset._2)))
})
}
}
graph.getModel.beginUpdate
try {
draw(tree, (0, 0), (120, 120))
}
finally {
graph.getModel.endUpdate
}
getContentPane.add(new mxGraphComponent(graph))
}
}
object Program {
case class Person(
val hasCertifications : Boolean,
val isTalkative : Boolean,
val golfClub : Boolean,
val hasMasterDegree : Boolean,
val job : String
)
def main(args : Array[String]) : Unit = {
val persons = Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Programmer") ::
Person(hasCertifications = false, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Junior Programmer") ::
Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = false, job = "Programmer") ::
Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = true, job = "Seller") ::
Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") ::
Person(hasCertifications = true, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") ::
Person(hasCertifications = false, isTalkative = true, golfClub = true, hasMasterDegree = true, job = "CEO") ::
Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") ::
Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") ::
Nil
def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree)
val dataset = persons map (p => (toVector(p), p.job))
val tree = new ID3[Boolean,String] buildTree dataset
val newPerson = Person(false, false, false, false, "?")
assert(
tree.classify(toVector(newPerson)) == "Junior Programmer"
)
val frame = new Program(tree, Vector("Has certifications?", "Is talkative?", "Likes playing golf?", "Has a master degree?"))
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE)
frame.setSize(400, 320)
frame.setVisible(true)
}
}

Nessun commento:
Posta un commento