Thursday, 4 August 2011

Error Handling in SOA 11g :Introducing Error Report: Part 3

I would like to ask all the reader to go through my previous two error handling  blog,http://shrikworld.blogspot.com/2011/03/errorhandling-in-soa-11g.html and http://shrikworld.blogspot.com/2011/04/error-handling-in-soa-11g-part-2.html. In previous blog I discussed how to enqueue ECID to your JMS queue from fault policy on any error.So instead of only enqueueing ECID we can also put Fault complete metadata like fault details, composite id etc.You need to modify your custom java code little bit, here is the modified code,

package com.shrik.world.fault;

import com.collaxa.cube.engine.fp.BPELFaultRecoveryContextImpl;

import java.util.Map;
import java.util.UUID;

import oracle.integration.platform.faultpolicy.IFaultRecoveryContext;
import oracle.integration.platform.faultpolicy.IFaultRecoveryJavaClass;

import javax.jms.Connection;
import javax.jms.JMSException;
import javax.jms.MessageProducer;
import javax.jms.Queue;
import javax.jms.QueueConnectionFactory;
import javax.jms.Session;
import javax.jms.TextMessage;

import javax.naming.Context;
import javax.naming.InitialContext;
import javax.naming.NamingException;

import oracle.integration.platform.faulthandling.recovery.RejectedMsgRecoveryContext;

public class CustomFaultHandler implements IFaultRecoveryJavaClass {
  
    Map props;

    public CustomFaultHandler() {
        super();
    }

    public void handleRetrySuccess(IFaultRecoveryContext iFaultRecoveryContext) {
        System.out.println("Retry Success");
        handleFault(iFaultRecoveryContext);
    }

    public String handleFault(IFaultRecoveryContext iFaultRecoveryContext) {
        //Print Fault Meta Data to Console
        System.out.println("****************Fault Metadata********************************");
        System.out.println("Fault policy id: " +
                           iFaultRecoveryContext.getPolicyId());
        System.out.println("Fault type: " + iFaultRecoveryContext.getType());
        System.out.println("Partnerlink: " +
                           iFaultRecoveryContext.getReferenceName());
        System.out.println("Port type: " +
                           iFaultRecoveryContext.getPortType());
        System.out.println("**************************************************************");
        //print all properties defined in the fault-policy file
        System.out.println("Properties Set for the Fault");
        props = iFaultRecoveryContext.getProperties();
        for (Object key : props.keySet()) {
            System.out.println("Key : " + key.toString() + " Value : " +
                               props.get(key).toString());
        }
        //Print Fault Details to Console if it exists
        System.out.println("****************Fault Details********************************");
        BPELFaultRecoveryContextImpl bpelCtx =
            (BPELFaultRecoveryContextImpl)iFaultRecoveryContext;
        if (iFaultRecoveryContext instanceof BPELFaultRecoveryContextImpl) {

            System.out.println("Fault: " + bpelCtx.getFault());
            System.out.println("Activity: " + bpelCtx.getActivityName());
            System.out.println("Composite Instance: " +
                               bpelCtx.getCompositeInstanceId());
            System.out.println("Composite Name: " +
                               bpelCtx.getCompositeName());
            System.out.println("***********************************************************");
        }
        //enqueueing Error Details
        System.out.println("Enqueueing Data into ErrorQ.....");
        try {
            enqueueAqEvent(iFaultRecoveryContext);
        } catch (JMSException e) {
            e.printStackTrace();
        } catch (NamingException e) {
            e.printStackTrace();
        }
        return bpelCtx.getFault().getMessage().contains("env:Server") ? "Terminate":"Manual";
       
    }

    private void enqueueAqEvent(IFaultRecoveryContext iFaultRecoveryContext) throws NamingException,
                                                                                    JMSException {
      
        UUID uuid = UUID.randomUUID();
        Session session = null;
        MessageProducer publisher = null;
        TextMessage message = null;
        InitialContext context = new InitialContext();
        QueueConnectionFactory connectionFactory =(QueueConnectionFactory)context.lookup("error.qcf");
        Connection connection =connectionFactory.createConnection();
        Queue errQueue =(Queue)context.lookup("error.q");
        session =connection.createSession(false, Session.AUTO_ACKNOWLEDGE);
        publisher = session.createProducer(errQueue);
        message =session.createTextMessage(createEventPayload(iFaultRecoveryContext));
        message.setJMSCorrelationID(uuid.toString());
        connection.start();
        publisher.send(message);
        connection.stop();
        connection.close();
    }

    private String createEventPayload(IFaultRecoveryContext iFaultRecoveryContext) {
        String eventPayload =
            "<SOAFault xmlns=\"http://www.shrik.world.com/\">\n" +
            " <ecid>UNKNOWN_ECID</ecid>\n" +
            " <policyID>"+ iFaultRecoveryContext.getPolicyId() + "</policyID>\n" +
            " <type>"+ iFaultRecoveryContext.getType() + "</type>\n" +
            " <partnerLink>"+ iFaultRecoveryContext.getReferenceName() + "</partnerLink>\n" +
            " <port>"+ iFaultRecoveryContext.getPortType() + "</port>\n" +
            " <faultDetails>UNKNOWN_FAULT_DETAILS</faultDetails>\n" +
            " <activity>UNKNOWN_ACTIVITY</activity>\n" +
            " <compositeID>UNKNOWN_INSTANCE_ID</compositeID>\n" +
            " <compositeName>UNKNOWN_COMPOSITE_NAME</compositeName>\n" +
            " <compositeName>UNKNOWN_COMPONENT_NAME</compositeName>\n" +
            "</SOAFault>";

        if (iFaultRecoveryContext instanceof RejectedMsgRecoveryContext) {

            RejectedMsgRecoveryContext rejectedMessageContext =
                (RejectedMsgRecoveryContext)iFaultRecoveryContext;
            String ecid = null;
            if (rejectedMessageContext.getRejectedMessage() != null &&
                rejectedMessageContext.getRejectedMessage().getEcid() !=
                null) {
                ecid = rejectedMessageContext.getRejectedMessage().getEcid();
            } else if (rejectedMessageContext.getFault() != null &&
                       rejectedMessageContext.getFault().getECID() != null) {
                ecid = rejectedMessageContext.getFault().getECID();
            }
            eventPayload = eventPayload.replace("UNKNOWN_ECID", ecid);
        } else if (iFaultRecoveryContext instanceof
                   BPELFaultRecoveryContextImpl) {
            BPELFaultRecoveryContextImpl bpelFaultRecoveryContextImpl =
                (BPELFaultRecoveryContextImpl)iFaultRecoveryContext;
       
            eventPayload =eventPayload.replace("UNKNOWN_ECID", bpelFaultRecoveryContextImpl.getECID());
            eventPayload =eventPayload.replace("UNKNOWN_FAULT_DETAILS", bpelFaultRecoveryContextImpl.getFault().getMessage());
            eventPayload =eventPayload.replace("UNKNOWN_ACTIVITY", bpelFaultRecoveryContextImpl.getActivityName());
            eventPayload =eventPayload.replace("UNKNOWN_INSTANCE_ID", bpelFaultRecoveryContextImpl.getComponentInstanceId());
            eventPayload =eventPayload.replace("UNKNOWN_COMPOSITE_NAME", bpelFaultRecoveryContextImpl.getCompositeName());
            eventPayload =eventPayload.replace("UNKNOWN_COMPONENT_NAME", bpelFaultRecoveryContextImpl.getComponentName());
        }
        System.out.println(eventPayload);
        return eventPayload;
    }
}

So whenever any error occur at composite the below message will be put into the queue

image

<SOAFault xmlns="http://www.shrik.world.com/">
<ecid>11d1def534ea1be0:1ba57489:13197ef8107:-8000-0000000000000347</ecid>
<policyID>CompositeFaultPolicy</policyID>
<type>bpel</type>
<partnerLink>Service1</partnerLink>
<port>{
http://xmlns.oracle.com/ErrorHandlingApp/HelloWorld/sayHello}sayHello</port>
<faultDetails>faultName: {{
http://schemas.oracle.com/bpel/extension}remoteFault}
messageType: {{http://schemas.oracle.com/bpel/extension}RuntimeFaultMessage}
parts: {{
summary=<summary>Message Router for shrik/HelloWorld!1.0*soa_a94e595b-965e-48d9-8b15-6735c29a2805 is not able to process messages. The composite state is set to "off".  The composite can be turned "on" by using the administrative consoles.</summary>
,detail=<detail>&lt;exception>Message Router for shrik/HelloWorld!1.0*soa_a94e595b-965e-48d9-8b15-6735c29a2805 is not able to process messages. The composite state is set to "off".  The composite can be turned "on" by using the administrative consoles.&lt;/exception>
</detail>
,code=<code>env:Server</code>}
</faultDetails>
<activity>Invoke1</activity>
<compositeID>bpel:260001</compositeID>
<compositeName>CallHelloWorld</compositeName>
<compositeName>BPELProcess1</compositeName>
</SOAFault>

Now we can have a composite say SOAErrorNotificationProcess to dequeue the data from this error queue and send notification to concerned group along with auditing the same.Here is the design of that composite,

image

Its pretty simple and for auditing purpose I created below table in soainfra.database to store the error details,here is the DDL and sample data,

CREATE TABLE "DEV_SOAINFRA"."XX_COMPOSITE_ERRORS"
  (
    "ECID"           VARCHAR2(1000 BYTE) NOT NULL ENABLE,
    "POLICY_ID"      VARCHAR2(100 BYTE),
    "TYPE"           VARCHAR2(20 BYTE),
    "PARTNERLINK"    VARCHAR2(50 BYTE),
    "PORT"           VARCHAR2(500 BYTE),
    "FAULT_SUMMARY"  VARCHAR2(1000 BYTE),
    "FAULT_DETAILS"  VARCHAR2(1000 BYTE),
    "FAULT_CODE"     VARCHAR2(100 BYTE),
    "ACTIVITY"       VARCHAR2(20 BYTE),
    "COMPOSITE_ID"   VARCHAR2(100 BYTE),
    "COMPOSITE_NAME" VARCHAR2(1000 BYTE),
    "ERROR_TIME" DATE,
    CONSTRAINT "XX_COMPOSITE_ERRORS_PK" PRIMARY KEY ("ECID") USING INDEX PCTFREE 10 INITRANS 2 MAXTRANS 255 COMPUTE STATISTICS STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DEV_SOAINFRA" ENABLE
  )
  SEGMENT CREATION IMMEDIATE PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING STORAGE
  (
    INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT
  )
  TABLESPACE "DEV_SOAINFRA" ;

Make the routing parallel in mediator so that u can incorporate your custom fault policy there,

image

I’m not going into more details of this composite.Now just make one process faulted and our custom framework will do the following things sequentially,

1.It parse the custom policy file.2.It retrieves the fault metadata and details from em 3.It enqueue all the details to error queue. 4.SOAErrorNotificationProcess will keep on polling that queue and as soon as a data comes into error queue it retrieves that.5.It audit all the error details into a table as well as send email to a group with all fault details.

Now what's next? you can see all the faulted instances for manual recovery.Now can’t we have a report by which we can see all the error details , all the recoverable instance and retry or abort from there itself instead of going to em.Even that report would be useful for business users to get a bird’s eye view.

I used ADF here for creating the report.To get started create a view(using entity) based on the error table that you created in soainfra like below,

image

Register you vo to am to make it accessible to your report as a datasource.Create a page and drag drop your data source as a ADF Query panel. Here is the UI of mine,

image

In the menu I just incorporated print page and export as a excel functionality,

There is a button Instance Statistics in toolbar which bound to adf popup. Data source for popup is different and sql query based,here is the query

SELECT (CASE WHEN STATE=1 THEN 'OPEN AND RUNNING'
WHEN STATE=2 THEN 'OPEN AND SUSPENDED'
WHEN STATE=3 THEN 'OPEN AND FAULTED'
WHEN STATE=4 THEN 'CLOSED AND PENDING'
WHEN STATE=5 THEN 'CLOSED AND COMPLETED'
WHEN STATE=6 THEN 'CLOSED AND FAUTED'
WHEN STATE=7 THEN 'CLOSED AND CANCELLED'
WHEN STATE=8 THEN 'CLOSED AND ABORTED'
WHEN STATE=9 THEN 'CLOSED AND STALE'
WHEN STATE=10 THEN 'NON-RECOVERABLE'
ELSE STATE || ''
END) AS STATE, COUNT(*) AS NUM_OF_CUBE_INST FROM CUBE_INSTANCE GROUP BY STATE

image

Register your view to am.Now create a popup and bind the popup id to your command button popup behaviour.

image

Now create a pie chart based on SOAInstancesV as below in your popup,

image

Now just run the page to check the added functionality,Run the query and click on command button on toolbar.

image

image

Now we need to build left hand navigation.There are two links with popup behaviour, RetryInstances and SOAErrorByDay.

In RetryInstances it will popup all the recoverable instances from soa mbean that went to manual intervention in em and there would be option for retry or terminate instances without login into em.     The underlying datasource for retryinstances is based on Java bean.First create below classes in your model,

FaultDetails.java it basically contains all the getter and setter method that will be used later as a table.

package com.shrik.world.model.bean;

public class FaultDetails {

    private String compositeDN;
    private String compositeInstanceID;
    private String componentName;
    private String componentInstanceID;
    private String activityName;
    private String faultID;
    private String faultName;
    private boolean recoverableFlag;
    private String faultMessage;

    public FaultDetails() {
        super();
    }

    public FaultDetails(String compositeDN, String compositeInstanceID, String componentName, String componentInstanceID,
                 String activityName, String faultID, String faultName,boolean recoverableFlag,String faultMessage) {
        this.compositeDN=compositeDN;
        this.compositeInstanceID=compositeInstanceID;
        this.componentName=componentName;
        this.componentInstanceID=componentInstanceID;
        this.activityName=activityName;
        this.faultID=faultID;
        this.faultName=faultName;
        this.recoverableFlag=recoverableFlag;
        this.faultMessage=faultMessage;
    }

 

    public void setCompositeDN(String compositeDN) {
        this.compositeDN = compositeDN;
    }

    public String getCompositeDN() {
        return compositeDN;
    }

    public void setCompositeInstanceID(String compositeInstanceID) {
        this.compositeInstanceID = compositeInstanceID;
    }

    public String getCompositeInstanceID() {
        return compositeInstanceID;
    }

    public void setComponentName(String componentName) {
        this.componentName = componentName;
    }

    public String getComponentName() {
        return componentName;
    }

    public void setComponentInstanceID(String componentInstanceID) {
        this.componentInstanceID = componentInstanceID;
    }

    public String getComponentInstanceID() {
        return componentInstanceID;
    }

    public void setActivityName(String activityName) {
        this.activityName = activityName;
    }

    public String getActivityName() {
        return activityName;
    }

    public void setFaultID(String faultID) {
        this.faultID = faultID;
    }

    public String getFaultID() {
        return faultID;
    }

    public void setFaultName(String faultName) {
        this.faultName = faultName;
    }

    public String getFaultName() {
        return faultName;
    }

  

    public void setFaultMessage(String faultMessage) {
        this.faultMessage = faultMessage;
    }

    public String getFaultMessage() {
        return faultMessage;
    }

    public void setRecoverableFlag(boolean recoverableFlag) {
        this.recoverableFlag = recoverableFlag;
    }

    public boolean isRecoverableFlag() {
        return recoverableFlag;
    }
}

FaultReport.java that populates FaultDetails with all required information,

package com.shrik.world.model.bean;

import java.util.ArrayList;
import java.util.Hashtable;

import java.util.List;

import javax.naming.Context;

import oracle.soa.management.facade.Fault;
import oracle.soa.management.facade.FaultRecoveryActionTypeConstants;
import oracle.soa.management.facade.Locator;
import oracle.soa.management.facade.LocatorFactory;
import oracle.soa.management.facade.bpel.BPELServiceEngine;
import oracle.soa.management.util.FaultFilter;

public class FaultReport {
    private Locator locator = null;
    private BPELServiceEngine mBPELServiceEngine;
    private List<Fault> faultList;
    private List<FaultDetails> myfaults = new ArrayList();

    public List<FaultDetails> findAllRecoverableFaults() {
        return myfaults;
    }

    public FaultReport() {
        locator = this.getLocator();
        try {
            mBPELServiceEngine = (BPELServiceEngine)locator.getServiceEngine(Locator.SE_BPEL);

            FaultFilter filter = new FaultFilter();
            filter.setFaultName("{http://schemas.oracle.com/bpel/extension}remoteFault");
            filter.setRecoverable(true);

            //Get faults using defined filter
            faultList = mBPELServiceEngine.getFaults(filter);
            for (Fault fault : faultList) {
                myfaults.add(new FaultDetails(
                                                fault.getCompositeDN().getStringDN(),
                                                fault.getCompositeInstanceId(),
                                                fault.getComponentName(),
                                                fault.getComponentInstanceId(),
                                                fault.getLabel(),
                                                fault.getId(),
                                                fault.getName().toString(),
                                                fault.isRecoverable(),
                                                fault.getMessage().toString()));
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    public Locator getLocator() {

        try {
            return LocatorFactory.createLocator(getJndiProps());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    public Hashtable getJndiProps() {
        Hashtable jndiProps = new Hashtable();
        jndiProps.put(Context.PROVIDER_URL, "t3://localhost:8001/soa-infra");
        jndiProps.put(Context.INITIAL_CONTEXT_FACTORY, "weblogic.jndi.WLInitialContextFactory");
        jndiProps.put(Context.SECURITY_PRINCIPAL, "weblogic");
        jndiProps.put(Context.SECURITY_CREDENTIALS, "welcome1");
        jndiProps.put("dedicated.connection", "true");
        return jndiProps;
    }
   
    public void retryRecoverableInstances(){
        try {
            mBPELServiceEngine.recoverFaults(faultList.toArray(new Fault[faultList.size()]), FaultRecoveryActionTypeConstants.ACTION_RETRY);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
   
    public void terminateRecoverableInstances(){
        try {
            mBPELServiceEngine.recoverFaults(faultList.toArray(new Fault[faultList.size()]), FaultRecoveryActionTypeConstants.ACTION_ABORT);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
Here you can retrieve the server properties from external file as well instead of hardcoding.

Now right click on FaultReport and generate the data source.After sometime you should be able to see the datasource,

image

Now in the popup drag drop that datasource in a panel collection as below,

image

In the menu I added two options,Retry and Abort and here is the action listener binding

image

image

Methods are written as below,

package com.shrik.world.bean;
import  com.shrik.world.model.bean.FaultReport;

import javax.faces.event.ActionEvent;

public class Reconcile {
    public Reconcile() {
    }

    public void RetrySOARecoverable(ActionEvent actionEvent) {
       new FaultReport().retryRecoverableInstances();
      
    }

    public void TerminateSOAInstances(ActionEvent actionEvent) {
        new FaultReport().terminateRecoverableInstances();
    }

   
}
Now to get error details per day just create a SQL query based vo and register to am as below,

image

Now create a bar chart in popup window as below,

image

Now run the page and click on RetryInstances in left navigator,check the Retry and Abort functionality and verify the same from em console.

image

Now click on SOAErrorByDay and a bar chart would popup as below,

image

Now you can wrap up the whole code into a EAR and deploy that to em.You can customize your GUI as per your need.